@exulu/backend 1.48.1 → 1.49.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/index.cjs +353 -42
- package/dist/index.d.cts +422 -327
- package/dist/index.d.ts +422 -327
- package/dist/index.js +341 -38
- package/ee/{markdown.ts → chunking/markdown.ts} +2 -2
- package/ee/python/README.md +295 -0
- package/ee/python/documents/processing/README.md +155 -0
- package/ee/{documents → python/documents}/processing/doc_processor.ts +25 -17
- package/ee/{documents/processing/pdf_to_markdown.py → python/documents/processing/document_to_markdown.py} +3 -10
- package/ee/python/setup.sh +180 -0
- package/package.json +17 -5
- package/scripts/postinstall.cjs +149 -0
- package/.agents/skills/mintlify/SKILL.md +0 -347
- package/.editorconfig +0 -15
- package/.eslintrc.json +0 -52
- package/.github/workflows/release-backend.yml +0 -38
- package/.husky/commit-msg +0 -1
- package/.jscpd.json +0 -18
- package/.mcp.json +0 -18
- package/.nvmrc +0 -1
- package/.prettierignore +0 -5
- package/.prettierrc.json +0 -12
- package/CHANGELOG.md +0 -6
- package/SECURITY.md +0 -5
- package/commitlint.config.js +0 -4
- package/devops/documentation/patch-older-releases.md +0 -42
- package/ee/documents/processing/build_pdf_processor.sh +0 -35
- package/ee/documents/processing/chunk_markdown.py +0 -263
- package/ee/documents/processing/pdf_processor.spec +0 -115
- package/eslint.config.js +0 -88
- package/jest.config.ts +0 -25
- package/mintlify-docs/.mintignore +0 -7
- package/mintlify-docs/AGENTS.md +0 -33
- package/mintlify-docs/CLAUDE.MD +0 -50
- package/mintlify-docs/CONTRIBUTING.md +0 -32
- package/mintlify-docs/LICENSE +0 -21
- package/mintlify-docs/README.md +0 -55
- package/mintlify-docs/ai-tools/claude-code.mdx +0 -43
- package/mintlify-docs/ai-tools/cursor.mdx +0 -39
- package/mintlify-docs/ai-tools/windsurf.mdx +0 -39
- package/mintlify-docs/api-reference/core-types/agent-types.mdx +0 -110
- package/mintlify-docs/api-reference/core-types/analytics-types.mdx +0 -95
- package/mintlify-docs/api-reference/core-types/configuration-types.mdx +0 -83
- package/mintlify-docs/api-reference/core-types/evaluation-types.mdx +0 -106
- package/mintlify-docs/api-reference/core-types/job-types.mdx +0 -135
- package/mintlify-docs/api-reference/core-types/overview.mdx +0 -73
- package/mintlify-docs/api-reference/core-types/prompt-types.mdx +0 -102
- package/mintlify-docs/api-reference/core-types/rbac-types.mdx +0 -163
- package/mintlify-docs/api-reference/core-types/session-types.mdx +0 -77
- package/mintlify-docs/api-reference/core-types/user-management.mdx +0 -112
- package/mintlify-docs/api-reference/core-types/workflow-types.mdx +0 -88
- package/mintlify-docs/api-reference/core-types.mdx +0 -585
- package/mintlify-docs/api-reference/dynamic-types.mdx +0 -851
- package/mintlify-docs/api-reference/endpoint/create.mdx +0 -4
- package/mintlify-docs/api-reference/endpoint/delete.mdx +0 -4
- package/mintlify-docs/api-reference/endpoint/get.mdx +0 -4
- package/mintlify-docs/api-reference/endpoint/webhook.mdx +0 -4
- package/mintlify-docs/api-reference/introduction.mdx +0 -661
- package/mintlify-docs/api-reference/mutations.mdx +0 -1012
- package/mintlify-docs/api-reference/openapi.json +0 -217
- package/mintlify-docs/api-reference/queries.mdx +0 -1154
- package/mintlify-docs/backend/introduction.mdx +0 -218
- package/mintlify-docs/changelog.mdx +0 -387
- package/mintlify-docs/community-edition.mdx +0 -304
- package/mintlify-docs/core/exulu-agent/api-reference.mdx +0 -894
- package/mintlify-docs/core/exulu-agent/configuration.mdx +0 -690
- package/mintlify-docs/core/exulu-agent/introduction.mdx +0 -552
- package/mintlify-docs/core/exulu-app/api-reference.mdx +0 -481
- package/mintlify-docs/core/exulu-app/configuration.mdx +0 -319
- package/mintlify-docs/core/exulu-app/introduction.mdx +0 -117
- package/mintlify-docs/core/exulu-authentication.mdx +0 -810
- package/mintlify-docs/core/exulu-chunkers/api-reference.mdx +0 -1011
- package/mintlify-docs/core/exulu-chunkers/configuration.mdx +0 -596
- package/mintlify-docs/core/exulu-chunkers/introduction.mdx +0 -403
- package/mintlify-docs/core/exulu-context/api-reference.mdx +0 -911
- package/mintlify-docs/core/exulu-context/configuration.mdx +0 -648
- package/mintlify-docs/core/exulu-context/introduction.mdx +0 -394
- package/mintlify-docs/core/exulu-database.mdx +0 -811
- package/mintlify-docs/core/exulu-default-agents.mdx +0 -545
- package/mintlify-docs/core/exulu-eval/api-reference.mdx +0 -772
- package/mintlify-docs/core/exulu-eval/configuration.mdx +0 -680
- package/mintlify-docs/core/exulu-eval/introduction.mdx +0 -459
- package/mintlify-docs/core/exulu-logging.mdx +0 -464
- package/mintlify-docs/core/exulu-otel.mdx +0 -670
- package/mintlify-docs/core/exulu-queues/api-reference.mdx +0 -648
- package/mintlify-docs/core/exulu-queues/configuration.mdx +0 -650
- package/mintlify-docs/core/exulu-queues/introduction.mdx +0 -474
- package/mintlify-docs/core/exulu-reranker/api-reference.mdx +0 -630
- package/mintlify-docs/core/exulu-reranker/configuration.mdx +0 -663
- package/mintlify-docs/core/exulu-reranker/introduction.mdx +0 -516
- package/mintlify-docs/core/exulu-tool/api-reference.mdx +0 -723
- package/mintlify-docs/core/exulu-tool/configuration.mdx +0 -805
- package/mintlify-docs/core/exulu-tool/introduction.mdx +0 -539
- package/mintlify-docs/core/exulu-variables/api-reference.mdx +0 -699
- package/mintlify-docs/core/exulu-variables/configuration.mdx +0 -736
- package/mintlify-docs/core/exulu-variables/introduction.mdx +0 -511
- package/mintlify-docs/development.mdx +0 -94
- package/mintlify-docs/docs.json +0 -248
- package/mintlify-docs/enterprise-edition.mdx +0 -538
- package/mintlify-docs/essentials/code.mdx +0 -35
- package/mintlify-docs/essentials/images.mdx +0 -59
- package/mintlify-docs/essentials/markdown.mdx +0 -88
- package/mintlify-docs/essentials/navigation.mdx +0 -87
- package/mintlify-docs/essentials/reusable-snippets.mdx +0 -110
- package/mintlify-docs/essentials/settings.mdx +0 -318
- package/mintlify-docs/favicon.svg +0 -3
- package/mintlify-docs/frontend/introduction.mdx +0 -39
- package/mintlify-docs/getting-started.mdx +0 -267
- package/mintlify-docs/guides/custom-agent.mdx +0 -608
- package/mintlify-docs/guides/first-agent.mdx +0 -315
- package/mintlify-docs/images/admin_ui.png +0 -0
- package/mintlify-docs/images/contexts.png +0 -0
- package/mintlify-docs/images/create_agents.png +0 -0
- package/mintlify-docs/images/evals.png +0 -0
- package/mintlify-docs/images/graphql.png +0 -0
- package/mintlify-docs/images/graphql_api.png +0 -0
- package/mintlify-docs/images/hero-dark.png +0 -0
- package/mintlify-docs/images/hero-light.png +0 -0
- package/mintlify-docs/images/hero.png +0 -0
- package/mintlify-docs/images/knowledge_sources.png +0 -0
- package/mintlify-docs/images/mcp.png +0 -0
- package/mintlify-docs/images/scaling.png +0 -0
- package/mintlify-docs/index.mdx +0 -411
- package/mintlify-docs/logo/dark.svg +0 -9
- package/mintlify-docs/logo/light.svg +0 -9
- package/mintlify-docs/partners.mdx +0 -558
- package/mintlify-docs/products.mdx +0 -77
- package/mintlify-docs/snippets/snippet-intro.mdx +0 -4
- package/mintlify-docs/styles.css +0 -207
- package/ngrok.bash +0 -1
- package/ngrok.md +0 -6
- package/ngrok.yml +0 -10
- package/release.config.cjs +0 -15
- package/skills-lock.json +0 -10
- package/types/context-processor.ts +0 -45
- package/types/enums/eval-types.ts +0 -5
- package/types/enums/field-types.ts +0 -1
- package/types/enums/jobs.ts +0 -11
- package/types/enums/statistics.ts +0 -13
- package/types/exulu-table-definition.ts +0 -79
- package/types/file-types.ts +0 -18
- package/types/models/agent-session.ts +0 -27
- package/types/models/agent.ts +0 -68
- package/types/models/context.ts +0 -53
- package/types/models/embedding.ts +0 -17
- package/types/models/eval-run.ts +0 -40
- package/types/models/exulu-agent-tool-config.ts +0 -11
- package/types/models/item.ts +0 -21
- package/types/models/job.ts +0 -8
- package/types/models/project.ts +0 -16
- package/types/models/rate-limiter-rules.ts +0 -7
- package/types/models/test-case.ts +0 -25
- package/types/models/tool.ts +0 -9
- package/types/models/user-role.ts +0 -12
- package/types/models/user.ts +0 -20
- package/types/models/variable.ts +0 -8
- package/types/models/vector-methods.ts +0 -7
- package/types/provider-config.ts +0 -21
- package/types/queue-config.ts +0 -16
- package/types/rbac-rights-modes.ts +0 -1
- package/types/statistics.ts +0 -20
- package/types/workflow.ts +0 -31
- /package/ee/{documents → python/documents}/THIRD_PARTY_LICENSES/docling.txt +0 -0
- /package/ee/{documents/processing → python}/requirements.txt +0 -0
package/README.md
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
**Exulu IMP** (Intelligence Management Platform) is an open-source TypeScript framework for building production-ready AI agent applications. It provides the backend infrastructure for managing agents, semantic search contexts, background job processing, and tool integrations.
|
|
2
|
+
|
|
1
3
|
<a href="https://exulu.com/"><img width="100%" src="https://mintlify.s3.us-west-1.amazonaws.com/exulu/images/create_agents.png" alt="Exulu IMP - Create, deploy and manage AI agents" /></a>
|
|
2
4
|
|
|
3
5
|
<br />
|
|
@@ -26,8 +28,6 @@
|
|
|
26
28
|
> [!IMPORTANT]
|
|
27
29
|
> ⭐ Star this repo to follow development and updates
|
|
28
30
|
|
|
29
|
-
**Exulu IMP** (Intelligence Management Platform) is an open-source TypeScript framework for building production-ready AI agent applications. It provides the backend infrastructure for managing agents, semantic search contexts, background job processing, and tool integrations.
|
|
30
|
-
|
|
31
31
|
<h3>✨ Why Exulu IMP?</h3>
|
|
32
32
|
|
|
33
33
|
<ul>
|
package/dist/index.cjs
CHANGED
|
@@ -43,12 +43,20 @@ __export(index_exports, {
|
|
|
43
43
|
ExuluEval: () => ExuluEval,
|
|
44
44
|
ExuluJobs: () => ExuluJobs,
|
|
45
45
|
ExuluOtel: () => ExuluOtel,
|
|
46
|
+
ExuluProvider: () => ExuluProvider,
|
|
47
|
+
ExuluPython: () => ExuluPython,
|
|
46
48
|
ExuluQueues: () => queues,
|
|
47
49
|
ExuluReranker: () => ExuluReranker,
|
|
48
50
|
ExuluTool: () => ExuluTool,
|
|
49
51
|
ExuluVariables: () => ExuluVariables
|
|
50
52
|
});
|
|
51
53
|
module.exports = __toCommonJS(index_exports);
|
|
54
|
+
|
|
55
|
+
// node_modules/tsup/assets/cjs_shims.js
|
|
56
|
+
var getImportMetaUrl = () => typeof document === "undefined" ? new URL(`file:${__filename}`).href : document.currentScript && document.currentScript.src || new URL("main.js", document.baseURI).href;
|
|
57
|
+
var importMetaUrl = /* @__PURE__ */ getImportMetaUrl();
|
|
58
|
+
|
|
59
|
+
// src/index.ts
|
|
52
60
|
var import_config = require("dotenv/config");
|
|
53
61
|
|
|
54
62
|
// src/redis/client.ts
|
|
@@ -910,7 +918,7 @@ var uploadFile = async (file, fileName, config, options = {}, user, customBucket
|
|
|
910
918
|
if (error.name === "SignatureDoesNotMatch" || error.name === "InvalidAccessKeyId" || error.name === "AccessDenied") {
|
|
911
919
|
if (attempt < maxRetries) {
|
|
912
920
|
const backoffMs = Math.pow(2, attempt) * 1e3;
|
|
913
|
-
await new Promise((
|
|
921
|
+
await new Promise((resolve3) => setTimeout(resolve3, backoffMs));
|
|
914
922
|
s3Client = void 0;
|
|
915
923
|
getS3Client(config);
|
|
916
924
|
continue;
|
|
@@ -4640,7 +4648,7 @@ async function withRetry(generateFn, maxRetries = 3) {
|
|
|
4640
4648
|
if (attempt === maxRetries) {
|
|
4641
4649
|
throw error;
|
|
4642
4650
|
}
|
|
4643
|
-
await new Promise((
|
|
4651
|
+
await new Promise((resolve3) => setTimeout(resolve3, Math.pow(2, attempt) * 1e3));
|
|
4644
4652
|
}
|
|
4645
4653
|
}
|
|
4646
4654
|
throw lastError;
|
|
@@ -5296,8 +5304,8 @@ async function* executeAgenticRetrieval({
|
|
|
5296
5304
|
try {
|
|
5297
5305
|
let finishResolver;
|
|
5298
5306
|
let finishRejector;
|
|
5299
|
-
const finishPromise = new Promise((
|
|
5300
|
-
finishResolver =
|
|
5307
|
+
const finishPromise = new Promise((resolve3, reject) => {
|
|
5308
|
+
finishResolver = resolve3;
|
|
5301
5309
|
finishRejector = reject;
|
|
5302
5310
|
});
|
|
5303
5311
|
const timeoutId = setTimeout(() => {
|
|
@@ -7376,7 +7384,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
|
|
|
7376
7384
|
} = await validateWorkflowPayload(data, providers);
|
|
7377
7385
|
const retries = 3;
|
|
7378
7386
|
let attempts = 0;
|
|
7379
|
-
const promise = new Promise(async (
|
|
7387
|
+
const promise = new Promise(async (resolve3, reject) => {
|
|
7380
7388
|
while (attempts < retries) {
|
|
7381
7389
|
try {
|
|
7382
7390
|
const messages2 = await processUiMessagesFlow({
|
|
@@ -7391,7 +7399,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
|
|
|
7391
7399
|
config,
|
|
7392
7400
|
variables: data.inputs
|
|
7393
7401
|
});
|
|
7394
|
-
|
|
7402
|
+
resolve3(messages2);
|
|
7395
7403
|
break;
|
|
7396
7404
|
} catch (error) {
|
|
7397
7405
|
console.error(
|
|
@@ -7402,7 +7410,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
|
|
|
7402
7410
|
if (attempts >= retries) {
|
|
7403
7411
|
reject(new Error(error instanceof Error ? error.message : String(error)));
|
|
7404
7412
|
}
|
|
7405
|
-
await new Promise((
|
|
7413
|
+
await new Promise((resolve4) => setTimeout((resolve5) => resolve5(true), 2e3));
|
|
7406
7414
|
}
|
|
7407
7415
|
}
|
|
7408
7416
|
});
|
|
@@ -7452,7 +7460,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
|
|
|
7452
7460
|
} = await validateEvalPayload(data, providers);
|
|
7453
7461
|
const retries = 3;
|
|
7454
7462
|
let attempts = 0;
|
|
7455
|
-
const promise = new Promise(async (
|
|
7463
|
+
const promise = new Promise(async (resolve3, reject) => {
|
|
7456
7464
|
while (attempts < retries) {
|
|
7457
7465
|
try {
|
|
7458
7466
|
const messages2 = await processUiMessagesFlow({
|
|
@@ -7466,7 +7474,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
|
|
|
7466
7474
|
tools,
|
|
7467
7475
|
config
|
|
7468
7476
|
});
|
|
7469
|
-
|
|
7477
|
+
resolve3(messages2);
|
|
7470
7478
|
break;
|
|
7471
7479
|
} catch (error) {
|
|
7472
7480
|
console.error(
|
|
@@ -7477,7 +7485,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
|
|
|
7477
7485
|
if (attempts >= retries) {
|
|
7478
7486
|
reject(new Error(error instanceof Error ? error.message : String(error)));
|
|
7479
7487
|
}
|
|
7480
|
-
await new Promise((
|
|
7488
|
+
await new Promise((resolve4) => setTimeout((resolve5) => resolve5(true), 2e3));
|
|
7481
7489
|
}
|
|
7482
7490
|
}
|
|
7483
7491
|
});
|
|
@@ -7892,7 +7900,7 @@ var pollJobResult = async ({
|
|
|
7892
7900
|
attempts++;
|
|
7893
7901
|
const job = await import_bullmq3.Job.fromId(queue.queue, jobId);
|
|
7894
7902
|
if (!job) {
|
|
7895
|
-
await new Promise((
|
|
7903
|
+
await new Promise((resolve3) => setTimeout((resolve4) => resolve4(true), 2e3));
|
|
7896
7904
|
continue;
|
|
7897
7905
|
}
|
|
7898
7906
|
const elapsedTime = Date.now() - startTime;
|
|
@@ -7922,7 +7930,7 @@ var pollJobResult = async ({
|
|
|
7922
7930
|
console.log(`[EXULU] eval function ${job.id} result: ${result}`);
|
|
7923
7931
|
break;
|
|
7924
7932
|
}
|
|
7925
|
-
await new Promise((
|
|
7933
|
+
await new Promise((resolve3) => setTimeout((resolve4) => resolve4(true), 2e3));
|
|
7926
7934
|
}
|
|
7927
7935
|
return result;
|
|
7928
7936
|
};
|
|
@@ -8030,7 +8038,7 @@ var processUiMessagesFlow = async ({
|
|
|
8030
8038
|
label: agent.name,
|
|
8031
8039
|
trigger: "agent"
|
|
8032
8040
|
};
|
|
8033
|
-
messageHistory = await new Promise(async (
|
|
8041
|
+
messageHistory = await new Promise(async (resolve3, reject) => {
|
|
8034
8042
|
const startTime = Date.now();
|
|
8035
8043
|
try {
|
|
8036
8044
|
const result = await provider.generateStream({
|
|
@@ -8107,7 +8115,7 @@ var processUiMessagesFlow = async ({
|
|
|
8107
8115
|
})
|
|
8108
8116
|
] : []
|
|
8109
8117
|
]);
|
|
8110
|
-
|
|
8118
|
+
resolve3({
|
|
8111
8119
|
messages,
|
|
8112
8120
|
metadata: {
|
|
8113
8121
|
tokens: {
|
|
@@ -8855,7 +8863,7 @@ type PageInfo {
|
|
|
8855
8863
|
} = await validateWorkflowPayload(jobData, providers);
|
|
8856
8864
|
const retries = 3;
|
|
8857
8865
|
let attempts = 0;
|
|
8858
|
-
const promise = new Promise(async (
|
|
8866
|
+
const promise = new Promise(async (resolve3, reject) => {
|
|
8859
8867
|
while (attempts < retries) {
|
|
8860
8868
|
try {
|
|
8861
8869
|
const messages2 = await processUiMessagesFlow({
|
|
@@ -8870,7 +8878,7 @@ type PageInfo {
|
|
|
8870
8878
|
config,
|
|
8871
8879
|
variables: args.variables
|
|
8872
8880
|
});
|
|
8873
|
-
|
|
8881
|
+
resolve3(messages2);
|
|
8874
8882
|
break;
|
|
8875
8883
|
} catch (error) {
|
|
8876
8884
|
console.error(
|
|
@@ -8884,7 +8892,7 @@ type PageInfo {
|
|
|
8884
8892
|
if (attempts >= retries) {
|
|
8885
8893
|
reject(error instanceof Error ? error : new Error(String(error)));
|
|
8886
8894
|
}
|
|
8887
|
-
await new Promise((
|
|
8895
|
+
await new Promise((resolve4) => setTimeout((resolve5) => resolve5(true), 2e3));
|
|
8888
8896
|
}
|
|
8889
8897
|
}
|
|
8890
8898
|
});
|
|
@@ -9137,10 +9145,10 @@ type PageInfo {
|
|
|
9137
9145
|
contexts.map(async (context2) => {
|
|
9138
9146
|
let processor = null;
|
|
9139
9147
|
if (context2.processor) {
|
|
9140
|
-
processor = await new Promise(async (
|
|
9148
|
+
processor = await new Promise(async (resolve3, reject) => {
|
|
9141
9149
|
const config2 = context2.processor?.config;
|
|
9142
9150
|
const queue = await config2?.queue;
|
|
9143
|
-
|
|
9151
|
+
resolve3({
|
|
9144
9152
|
name: context2.processor.name,
|
|
9145
9153
|
description: context2.processor.description,
|
|
9146
9154
|
queue: queue?.queue?.name || void 0,
|
|
@@ -9221,10 +9229,10 @@ type PageInfo {
|
|
|
9221
9229
|
}
|
|
9222
9230
|
let processor = null;
|
|
9223
9231
|
if (data.processor) {
|
|
9224
|
-
processor = await new Promise(async (
|
|
9232
|
+
processor = await new Promise(async (resolve3, reject) => {
|
|
9225
9233
|
const config2 = data.processor?.config;
|
|
9226
9234
|
const queue = await config2?.queue;
|
|
9227
|
-
|
|
9235
|
+
resolve3({
|
|
9228
9236
|
name: data.processor.name,
|
|
9229
9237
|
description: data.processor.description,
|
|
9230
9238
|
queue: queue?.queue?.name || void 0,
|
|
@@ -13112,7 +13120,7 @@ var internetSearchTool = new ExuluTool({
|
|
|
13112
13120
|
} catch (error) {
|
|
13113
13121
|
if (error instanceof import_perplexity_ai.default.RateLimitError && attempt < maxRetries - 1) {
|
|
13114
13122
|
const delay = Math.pow(2, attempt) * 1e3 + Math.random() * 1e3;
|
|
13115
|
-
await new Promise((
|
|
13123
|
+
await new Promise((resolve3) => setTimeout(resolve3, delay));
|
|
13116
13124
|
continue;
|
|
13117
13125
|
}
|
|
13118
13126
|
throw error;
|
|
@@ -15309,7 +15317,7 @@ var create = ({
|
|
|
15309
15317
|
// src/index.ts
|
|
15310
15318
|
var import_crypto_js9 = __toESM(require("crypto-js"), 1);
|
|
15311
15319
|
|
|
15312
|
-
// ee/markdown.ts
|
|
15320
|
+
// ee/chunking/markdown.ts
|
|
15313
15321
|
var extractPageTag = (text) => {
|
|
15314
15322
|
let match;
|
|
15315
15323
|
let lastPageNumber = void 0;
|
|
@@ -15736,9 +15744,172 @@ var MarkdownChunker = class {
|
|
|
15736
15744
|
}
|
|
15737
15745
|
};
|
|
15738
15746
|
|
|
15739
|
-
//
|
|
15747
|
+
// src/utils/python-setup.ts
|
|
15740
15748
|
var import_child_process = require("child_process");
|
|
15741
15749
|
var import_util2 = require("util");
|
|
15750
|
+
var import_path = require("path");
|
|
15751
|
+
var import_fs2 = require("fs");
|
|
15752
|
+
var import_url = require("url");
|
|
15753
|
+
var execAsync = (0, import_util2.promisify)(import_child_process.exec);
|
|
15754
|
+
function getPackageRoot() {
|
|
15755
|
+
const currentFile = (0, import_url.fileURLToPath)(importMetaUrl);
|
|
15756
|
+
let currentDir = (0, import_path.dirname)(currentFile);
|
|
15757
|
+
let attempts = 0;
|
|
15758
|
+
const maxAttempts = 10;
|
|
15759
|
+
while (attempts < maxAttempts) {
|
|
15760
|
+
const packageJsonPath = (0, import_path.join)(currentDir, "package.json");
|
|
15761
|
+
if ((0, import_fs2.existsSync)(packageJsonPath)) {
|
|
15762
|
+
try {
|
|
15763
|
+
const packageJson = JSON.parse((0, import_fs2.readFileSync)(packageJsonPath, "utf-8"));
|
|
15764
|
+
if (packageJson.name === "@exulu/backend") {
|
|
15765
|
+
return currentDir;
|
|
15766
|
+
}
|
|
15767
|
+
} catch {
|
|
15768
|
+
}
|
|
15769
|
+
}
|
|
15770
|
+
const parentDir = (0, import_path.resolve)(currentDir, "..");
|
|
15771
|
+
if (parentDir === currentDir) {
|
|
15772
|
+
break;
|
|
15773
|
+
}
|
|
15774
|
+
currentDir = parentDir;
|
|
15775
|
+
attempts++;
|
|
15776
|
+
}
|
|
15777
|
+
const fallback = (0, import_path.resolve)((0, import_path.dirname)((0, import_url.fileURLToPath)(importMetaUrl)), "../..");
|
|
15778
|
+
return fallback;
|
|
15779
|
+
}
|
|
15780
|
+
function getSetupScriptPath(packageRoot) {
|
|
15781
|
+
return (0, import_path.resolve)(packageRoot, "ee/python/setup.sh");
|
|
15782
|
+
}
|
|
15783
|
+
function getVenvPath(packageRoot) {
|
|
15784
|
+
return (0, import_path.resolve)(packageRoot, "ee/python/.venv");
|
|
15785
|
+
}
|
|
15786
|
+
function isPythonEnvironmentSetup(packageRoot) {
|
|
15787
|
+
const root = packageRoot ?? getPackageRoot();
|
|
15788
|
+
const venvPath = getVenvPath(root);
|
|
15789
|
+
const pythonPath = (0, import_path.join)(venvPath, "bin", "python");
|
|
15790
|
+
return (0, import_fs2.existsSync)(venvPath) && (0, import_fs2.existsSync)(pythonPath);
|
|
15791
|
+
}
|
|
15792
|
+
async function setupPythonEnvironment(options = {}) {
|
|
15793
|
+
const {
|
|
15794
|
+
packageRoot = getPackageRoot(),
|
|
15795
|
+
force = false,
|
|
15796
|
+
verbose = false,
|
|
15797
|
+
timeout = 6e5
|
|
15798
|
+
// 10 minutes
|
|
15799
|
+
} = options;
|
|
15800
|
+
if (!force && isPythonEnvironmentSetup(packageRoot)) {
|
|
15801
|
+
if (verbose) {
|
|
15802
|
+
console.log("\u2713 Python environment already set up");
|
|
15803
|
+
}
|
|
15804
|
+
return {
|
|
15805
|
+
success: true,
|
|
15806
|
+
message: "Python environment already exists",
|
|
15807
|
+
alreadyExists: true
|
|
15808
|
+
};
|
|
15809
|
+
}
|
|
15810
|
+
const setupScriptPath = getSetupScriptPath(packageRoot);
|
|
15811
|
+
if (!(0, import_fs2.existsSync)(setupScriptPath)) {
|
|
15812
|
+
return {
|
|
15813
|
+
success: false,
|
|
15814
|
+
message: `Setup script not found at: ${setupScriptPath}`,
|
|
15815
|
+
alreadyExists: false
|
|
15816
|
+
};
|
|
15817
|
+
}
|
|
15818
|
+
try {
|
|
15819
|
+
if (verbose) {
|
|
15820
|
+
console.log("Setting up Python environment...");
|
|
15821
|
+
}
|
|
15822
|
+
const { stdout, stderr } = await execAsync(`bash "${setupScriptPath}"`, {
|
|
15823
|
+
cwd: packageRoot,
|
|
15824
|
+
timeout,
|
|
15825
|
+
env: {
|
|
15826
|
+
...process.env,
|
|
15827
|
+
// Ensure script can write to the directory
|
|
15828
|
+
PYTHONDONTWRITEBYTECODE: "1"
|
|
15829
|
+
},
|
|
15830
|
+
maxBuffer: 10 * 1024 * 1024
|
|
15831
|
+
// 10MB buffer
|
|
15832
|
+
});
|
|
15833
|
+
const output = stdout + stderr;
|
|
15834
|
+
const versionMatch = output.match(/Python (\d+\.\d+\.\d+)/);
|
|
15835
|
+
const pythonVersion = versionMatch ? versionMatch[1] : void 0;
|
|
15836
|
+
if (verbose) {
|
|
15837
|
+
console.log(output);
|
|
15838
|
+
}
|
|
15839
|
+
return {
|
|
15840
|
+
success: true,
|
|
15841
|
+
message: "Python environment set up successfully",
|
|
15842
|
+
alreadyExists: false,
|
|
15843
|
+
pythonVersion,
|
|
15844
|
+
output
|
|
15845
|
+
};
|
|
15846
|
+
} catch (error) {
|
|
15847
|
+
const errorOutput = error.stdout + error.stderr;
|
|
15848
|
+
return {
|
|
15849
|
+
success: false,
|
|
15850
|
+
message: `Setup failed: ${error.message}`,
|
|
15851
|
+
alreadyExists: false,
|
|
15852
|
+
output: errorOutput
|
|
15853
|
+
};
|
|
15854
|
+
}
|
|
15855
|
+
}
|
|
15856
|
+
function getPythonSetupInstructions() {
|
|
15857
|
+
return `
|
|
15858
|
+
Python environment not set up. Please run one of the following:
|
|
15859
|
+
|
|
15860
|
+
Option 1 (Automatic):
|
|
15861
|
+
import { setupPythonEnvironment } from '@exulu/backend';
|
|
15862
|
+
await setupPythonEnvironment();
|
|
15863
|
+
|
|
15864
|
+
Option 2 (Manual - for package consumers):
|
|
15865
|
+
npx @exulu/backend setup-python
|
|
15866
|
+
|
|
15867
|
+
Option 3 (Manual - for contributors):
|
|
15868
|
+
npm run python:setup
|
|
15869
|
+
|
|
15870
|
+
Requirements:
|
|
15871
|
+
- Python 3.10 or higher must be installed
|
|
15872
|
+
- pip must be available
|
|
15873
|
+
|
|
15874
|
+
Installing Python:
|
|
15875
|
+
- macOS: brew install python@3.12
|
|
15876
|
+
- Ubuntu/Debian: sudo apt-get install python3.12
|
|
15877
|
+
- Windows: Download from https://www.python.org/downloads/
|
|
15878
|
+
`.trim();
|
|
15879
|
+
}
|
|
15880
|
+
async function validatePythonEnvironment(packageRoot) {
|
|
15881
|
+
const root = packageRoot ?? getPackageRoot();
|
|
15882
|
+
const venvPath = getVenvPath(root);
|
|
15883
|
+
const pythonPath = (0, import_path.join)(venvPath, "bin", "python");
|
|
15884
|
+
if (!(0, import_fs2.existsSync)(venvPath)) {
|
|
15885
|
+
return {
|
|
15886
|
+
valid: false,
|
|
15887
|
+
message: getPythonSetupInstructions()
|
|
15888
|
+
};
|
|
15889
|
+
}
|
|
15890
|
+
if (!(0, import_fs2.existsSync)(pythonPath)) {
|
|
15891
|
+
return {
|
|
15892
|
+
valid: false,
|
|
15893
|
+
message: "Python virtual environment is corrupted. Please run:\n await setupPythonEnvironment({ force: true })"
|
|
15894
|
+
};
|
|
15895
|
+
}
|
|
15896
|
+
try {
|
|
15897
|
+
await execAsync(`"${pythonPath}" --version`, { cwd: root });
|
|
15898
|
+
return {
|
|
15899
|
+
valid: true,
|
|
15900
|
+
message: "Python environment is valid"
|
|
15901
|
+
};
|
|
15902
|
+
} catch {
|
|
15903
|
+
return {
|
|
15904
|
+
valid: false,
|
|
15905
|
+
message: "Python executable is not working. Please run:\n await setupPythonEnvironment({ force: true })"
|
|
15906
|
+
};
|
|
15907
|
+
}
|
|
15908
|
+
}
|
|
15909
|
+
|
|
15910
|
+
// ee/python/documents/processing/doc_processor.ts
|
|
15911
|
+
var import_child_process3 = require("child_process");
|
|
15912
|
+
var import_util4 = require("util");
|
|
15742
15913
|
var fs2 = __toESM(require("fs"), 1);
|
|
15743
15914
|
var path = __toESM(require("path"), 1);
|
|
15744
15915
|
var import_ai7 = require("ai");
|
|
@@ -15749,7 +15920,131 @@ var mammoth = __toESM(require("mammoth"), 1);
|
|
|
15749
15920
|
var import_turndown = __toESM(require("turndown"), 1);
|
|
15750
15921
|
var import_word_extractor = __toESM(require("word-extractor"), 1);
|
|
15751
15922
|
var import_officeparser2 = require("officeparser");
|
|
15752
|
-
|
|
15923
|
+
|
|
15924
|
+
// src/utils/python-executor.ts
|
|
15925
|
+
var import_child_process2 = require("child_process");
|
|
15926
|
+
var import_util3 = require("util");
|
|
15927
|
+
var import_path2 = require("path");
|
|
15928
|
+
var import_fs3 = require("fs");
|
|
15929
|
+
var import_url2 = require("url");
|
|
15930
|
+
var execAsync2 = (0, import_util3.promisify)(import_child_process2.exec);
|
|
15931
|
+
function getPackageRoot2() {
|
|
15932
|
+
const currentFile = (0, import_url2.fileURLToPath)(importMetaUrl);
|
|
15933
|
+
let currentDir = (0, import_path2.dirname)(currentFile);
|
|
15934
|
+
let attempts = 0;
|
|
15935
|
+
const maxAttempts = 10;
|
|
15936
|
+
while (attempts < maxAttempts) {
|
|
15937
|
+
const packageJsonPath = (0, import_path2.join)(currentDir, "package.json");
|
|
15938
|
+
if ((0, import_fs3.existsSync)(packageJsonPath)) {
|
|
15939
|
+
try {
|
|
15940
|
+
const packageJson = JSON.parse((0, import_fs3.readFileSync)(packageJsonPath, "utf-8"));
|
|
15941
|
+
if (packageJson.name === "@exulu/backend") {
|
|
15942
|
+
return currentDir;
|
|
15943
|
+
}
|
|
15944
|
+
} catch {
|
|
15945
|
+
}
|
|
15946
|
+
}
|
|
15947
|
+
const parentDir = (0, import_path2.resolve)(currentDir, "..");
|
|
15948
|
+
if (parentDir === currentDir) {
|
|
15949
|
+
break;
|
|
15950
|
+
}
|
|
15951
|
+
currentDir = parentDir;
|
|
15952
|
+
attempts++;
|
|
15953
|
+
}
|
|
15954
|
+
return (0, import_path2.resolve)((0, import_path2.dirname)((0, import_url2.fileURLToPath)(importMetaUrl)), "../..");
|
|
15955
|
+
}
|
|
15956
|
+
var PythonEnvironmentError = class extends Error {
|
|
15957
|
+
constructor(message) {
|
|
15958
|
+
super(message);
|
|
15959
|
+
this.name = "PythonEnvironmentError";
|
|
15960
|
+
}
|
|
15961
|
+
};
|
|
15962
|
+
var PythonExecutionError = class extends Error {
|
|
15963
|
+
stdout;
|
|
15964
|
+
stderr;
|
|
15965
|
+
exitCode;
|
|
15966
|
+
constructor(message, stdout, stderr, exitCode) {
|
|
15967
|
+
super(message);
|
|
15968
|
+
this.name = "PythonExecutionError";
|
|
15969
|
+
this.stdout = stdout;
|
|
15970
|
+
this.stderr = stderr;
|
|
15971
|
+
this.exitCode = exitCode;
|
|
15972
|
+
}
|
|
15973
|
+
};
|
|
15974
|
+
function getVenvPath2(packageRoot) {
|
|
15975
|
+
return (0, import_path2.resolve)(packageRoot, "ee/python/.venv");
|
|
15976
|
+
}
|
|
15977
|
+
function getPythonExecutable(packageRoot) {
|
|
15978
|
+
const venvPath = getVenvPath2(packageRoot);
|
|
15979
|
+
return (0, import_path2.join)(venvPath, "bin", "python");
|
|
15980
|
+
}
|
|
15981
|
+
async function validatePythonEnvironmentForExecution(packageRoot) {
|
|
15982
|
+
const validation = await validatePythonEnvironment(packageRoot);
|
|
15983
|
+
if (!validation.valid) {
|
|
15984
|
+
throw new PythonEnvironmentError(validation.message);
|
|
15985
|
+
}
|
|
15986
|
+
}
|
|
15987
|
+
async function executePythonScript(config) {
|
|
15988
|
+
const {
|
|
15989
|
+
scriptPath,
|
|
15990
|
+
args = [],
|
|
15991
|
+
packageRoot = getPackageRoot2(),
|
|
15992
|
+
cwd = process.cwd(),
|
|
15993
|
+
timeout = 3e5,
|
|
15994
|
+
// 5 minutes default
|
|
15995
|
+
env = {},
|
|
15996
|
+
validateEnvironment = true
|
|
15997
|
+
} = config;
|
|
15998
|
+
if (validateEnvironment) {
|
|
15999
|
+
await validatePythonEnvironmentForExecution(packageRoot);
|
|
16000
|
+
}
|
|
16001
|
+
const resolvedScriptPath = (0, import_path2.resolve)(packageRoot, scriptPath);
|
|
16002
|
+
if (!(0, import_fs3.existsSync)(resolvedScriptPath)) {
|
|
16003
|
+
throw new PythonExecutionError(
|
|
16004
|
+
`Python script not found: ${resolvedScriptPath}`,
|
|
16005
|
+
"",
|
|
16006
|
+
"",
|
|
16007
|
+
1
|
|
16008
|
+
);
|
|
16009
|
+
}
|
|
16010
|
+
const pythonExecutable = getPythonExecutable(packageRoot);
|
|
16011
|
+
const quotedArgs = args.map((arg) => {
|
|
16012
|
+
return arg.includes(" ") ? `"${arg}"` : arg;
|
|
16013
|
+
});
|
|
16014
|
+
const command = `${pythonExecutable} "${resolvedScriptPath}" ${quotedArgs.join(" ")}`;
|
|
16015
|
+
try {
|
|
16016
|
+
const { stdout, stderr } = await execAsync2(command, {
|
|
16017
|
+
cwd,
|
|
16018
|
+
timeout,
|
|
16019
|
+
env: {
|
|
16020
|
+
...process.env,
|
|
16021
|
+
...env,
|
|
16022
|
+
// Ensure Python doesn't write bytecode files
|
|
16023
|
+
PYTHONDONTWRITEBYTECODE: "1"
|
|
16024
|
+
},
|
|
16025
|
+
maxBuffer: 10 * 1024 * 1024
|
|
16026
|
+
// 10MB buffer for output
|
|
16027
|
+
});
|
|
16028
|
+
return {
|
|
16029
|
+
stdout: stdout.trim(),
|
|
16030
|
+
stderr: stderr.trim(),
|
|
16031
|
+
exitCode: 0,
|
|
16032
|
+
success: true
|
|
16033
|
+
};
|
|
16034
|
+
} catch (error) {
|
|
16035
|
+
const stdout = error.stdout?.toString() ?? "";
|
|
16036
|
+
const stderr = error.stderr?.toString() ?? "";
|
|
16037
|
+
const exitCode = error.code ?? 1;
|
|
16038
|
+
throw new PythonExecutionError(
|
|
16039
|
+
`Python script execution failed: ${error.message}`,
|
|
16040
|
+
stdout,
|
|
16041
|
+
stderr,
|
|
16042
|
+
exitCode
|
|
16043
|
+
);
|
|
16044
|
+
}
|
|
16045
|
+
}
|
|
16046
|
+
|
|
16047
|
+
// ee/python/documents/processing/doc_processor.ts
|
|
15753
16048
|
async function processDocx(file) {
|
|
15754
16049
|
const html = await mammoth.convertToHtml({ buffer: file });
|
|
15755
16050
|
const turndownService = new import_turndown.default();
|
|
@@ -15894,15 +16189,15 @@ Specific notes and guidelines:
|
|
|
15894
16189
|
};
|
|
15895
16190
|
return validation;
|
|
15896
16191
|
}
|
|
15897
|
-
async function validateWithVLM(
|
|
15898
|
-
console.log(`[EXULU] Starting VLM validation for docling output, ${
|
|
16192
|
+
async function validateWithVLM(document2, model, verbose = false, concurrency = 10) {
|
|
16193
|
+
console.log(`[EXULU] Starting VLM validation for docling output, ${document2.length} pages...`);
|
|
15899
16194
|
console.log(
|
|
15900
16195
|
`[EXULU] Concurrency limit: ${concurrency}`
|
|
15901
16196
|
);
|
|
15902
16197
|
let validatedCount = 0;
|
|
15903
16198
|
let correctedCount = 0;
|
|
15904
16199
|
const limit = (0, import_p_limit.default)(concurrency);
|
|
15905
|
-
const validationTasks =
|
|
16200
|
+
const validationTasks = document2.map(
|
|
15906
16201
|
(page) => limit(async () => {
|
|
15907
16202
|
const imagePath = page.image;
|
|
15908
16203
|
if (!imagePath) {
|
|
@@ -15947,7 +16242,7 @@ async function validateWithVLM(document, model, verbose = false, concurrency = 1
|
|
|
15947
16242
|
console.log(`[EXULU] VLM validation complete:`);
|
|
15948
16243
|
console.log(`[EXULU] Validated: ${validatedCount} chunks`);
|
|
15949
16244
|
console.log(`[EXULU] Corrected: ${correctedCount} chunks`);
|
|
15950
|
-
return
|
|
16245
|
+
return document2;
|
|
15951
16246
|
}
|
|
15952
16247
|
async function processDocument(filePath, fileType, buffer, tempDir, config, verbose = false) {
|
|
15953
16248
|
console.log("Starting document processing...");
|
|
@@ -15999,16 +16294,24 @@ async function processPdf(buffer, paths, config, verbose = false) {
|
|
|
15999
16294
|
try {
|
|
16000
16295
|
let json;
|
|
16001
16296
|
if (config?.docling) {
|
|
16002
|
-
|
|
16003
|
-
|
|
16004
|
-
|
|
16005
|
-
|
|
16006
|
-
|
|
16007
|
-
|
|
16008
|
-
|
|
16009
|
-
|
|
16010
|
-
|
|
16011
|
-
|
|
16297
|
+
console.log(`[EXULU] Processing document with document_to_markdown.py`);
|
|
16298
|
+
const result = await executePythonScript({
|
|
16299
|
+
scriptPath: "ee/python/documents/processing/document_to_markdown.py",
|
|
16300
|
+
args: [
|
|
16301
|
+
paths.source,
|
|
16302
|
+
"-o",
|
|
16303
|
+
paths.json,
|
|
16304
|
+
"--images-dir",
|
|
16305
|
+
paths.images
|
|
16306
|
+
],
|
|
16307
|
+
timeout: 30 * 60 * 1e3
|
|
16308
|
+
// 30 minutes for large documents
|
|
16309
|
+
});
|
|
16310
|
+
if (result.stderr) {
|
|
16311
|
+
console.log("Processing info:", result.stderr.trim());
|
|
16312
|
+
}
|
|
16313
|
+
if (!result.success) {
|
|
16314
|
+
throw new Error(`Document processing failed: ${result.stderr}`);
|
|
16012
16315
|
}
|
|
16013
16316
|
const jsonContent = await fs2.promises.readFile(paths.json, "utf-8");
|
|
16014
16317
|
json = JSON.parse(jsonContent);
|
|
@@ -16023,10 +16326,10 @@ async function processPdf(buffer, paths, config, verbose = false) {
|
|
|
16023
16326
|
headings: []
|
|
16024
16327
|
}];
|
|
16025
16328
|
}
|
|
16026
|
-
console.log(`
|
|
16329
|
+
console.log(`[EXULU]
|
|
16027
16330
|
\u2713 Document processing completed successfully`);
|
|
16028
|
-
console.log(`
|
|
16029
|
-
console.log(`
|
|
16331
|
+
console.log(`[EXULU] Total pages: ${json.length}`);
|
|
16332
|
+
console.log(`[EXULU] Output file: ${paths.json}`);
|
|
16030
16333
|
if (!config?.docling && config?.vlm?.model) {
|
|
16031
16334
|
console.error("[EXULU] VLM validation is only supported when docling is enabled, skipping validation.");
|
|
16032
16335
|
}
|
|
@@ -16239,6 +16542,12 @@ var ExuluChunkers = {
|
|
|
16239
16542
|
rules: RecursiveRules
|
|
16240
16543
|
}
|
|
16241
16544
|
};
|
|
16545
|
+
var ExuluPython = {
|
|
16546
|
+
setup: setupPythonEnvironment,
|
|
16547
|
+
check: isPythonEnvironmentSetup,
|
|
16548
|
+
validate: validatePythonEnvironment,
|
|
16549
|
+
instructions: getPythonSetupInstructions
|
|
16550
|
+
};
|
|
16242
16551
|
// Annotate the CommonJS export names for ESM import in node:
|
|
16243
16552
|
0 && (module.exports = {
|
|
16244
16553
|
EXULU_JOB_STATUS_ENUM,
|
|
@@ -16254,6 +16563,8 @@ var ExuluChunkers = {
|
|
|
16254
16563
|
ExuluEval,
|
|
16255
16564
|
ExuluJobs,
|
|
16256
16565
|
ExuluOtel,
|
|
16566
|
+
ExuluProvider,
|
|
16567
|
+
ExuluPython,
|
|
16257
16568
|
ExuluQueues,
|
|
16258
16569
|
ExuluReranker,
|
|
16259
16570
|
ExuluTool,
|