@exulu/backend 1.48.2 → 1.49.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +351 -42
- package/dist/index.d.cts +96 -1
- package/dist/index.d.ts +96 -1
- package/dist/index.js +340 -38
- package/ee/{markdown.ts → chunking/markdown.ts} +2 -2
- package/ee/python/README.md +295 -0
- package/ee/python/documents/processing/README.md +155 -0
- package/ee/{documents → python/documents}/processing/doc_processor.ts +25 -17
- package/ee/{documents/processing/pdf_to_markdown.py → python/documents/processing/document_to_markdown.py} +3 -10
- package/ee/python/setup.sh +180 -0
- package/package.json +14 -3
- package/scripts/postinstall.cjs +149 -0
- package/.agents/skills/mintlify/SKILL.md +0 -347
- package/.editorconfig +0 -15
- package/.eslintrc.json +0 -52
- package/.github/workflows/release-backend.yml +0 -38
- package/.husky/commit-msg +0 -1
- package/.jscpd.json +0 -18
- package/.mcp.json +0 -25
- package/.nvmrc +0 -1
- package/.prettierignore +0 -5
- package/.prettierrc.json +0 -12
- package/CHANGELOG.md +0 -8
- package/SECURITY.md +0 -5
- package/commitlint.config.js +0 -4
- package/devops/documentation/patch-older-releases.md +0 -42
- package/ee/documents/processing/build_pdf_processor.sh +0 -35
- package/ee/documents/processing/chunk_markdown.py +0 -263
- package/ee/documents/processing/pdf_processor.spec +0 -115
- package/eslint.config.js +0 -88
- package/jest.config.ts +0 -25
- package/mintlify-docs/.mintignore +0 -7
- package/mintlify-docs/AGENTS.md +0 -33
- package/mintlify-docs/CLAUDE.MD +0 -50
- package/mintlify-docs/CONTRIBUTING.md +0 -32
- package/mintlify-docs/LICENSE +0 -21
- package/mintlify-docs/README.md +0 -55
- package/mintlify-docs/ai-tools/claude-code.mdx +0 -43
- package/mintlify-docs/ai-tools/cursor.mdx +0 -39
- package/mintlify-docs/ai-tools/windsurf.mdx +0 -39
- package/mintlify-docs/api-reference/core-types/agent-types.mdx +0 -110
- package/mintlify-docs/api-reference/core-types/analytics-types.mdx +0 -95
- package/mintlify-docs/api-reference/core-types/configuration-types.mdx +0 -83
- package/mintlify-docs/api-reference/core-types/evaluation-types.mdx +0 -106
- package/mintlify-docs/api-reference/core-types/job-types.mdx +0 -135
- package/mintlify-docs/api-reference/core-types/overview.mdx +0 -73
- package/mintlify-docs/api-reference/core-types/prompt-types.mdx +0 -102
- package/mintlify-docs/api-reference/core-types/rbac-types.mdx +0 -163
- package/mintlify-docs/api-reference/core-types/session-types.mdx +0 -77
- package/mintlify-docs/api-reference/core-types/user-management.mdx +0 -112
- package/mintlify-docs/api-reference/core-types/workflow-types.mdx +0 -88
- package/mintlify-docs/api-reference/core-types.mdx +0 -585
- package/mintlify-docs/api-reference/dynamic-types.mdx +0 -851
- package/mintlify-docs/api-reference/endpoint/create.mdx +0 -4
- package/mintlify-docs/api-reference/endpoint/delete.mdx +0 -4
- package/mintlify-docs/api-reference/endpoint/get.mdx +0 -4
- package/mintlify-docs/api-reference/endpoint/webhook.mdx +0 -4
- package/mintlify-docs/api-reference/introduction.mdx +0 -661
- package/mintlify-docs/api-reference/mutations.mdx +0 -1012
- package/mintlify-docs/api-reference/openapi.json +0 -217
- package/mintlify-docs/api-reference/queries.mdx +0 -1154
- package/mintlify-docs/backend/introduction.mdx +0 -218
- package/mintlify-docs/changelog.mdx +0 -387
- package/mintlify-docs/community-edition.mdx +0 -304
- package/mintlify-docs/core/exulu-agent/api-reference.mdx +0 -894
- package/mintlify-docs/core/exulu-agent/configuration.mdx +0 -690
- package/mintlify-docs/core/exulu-agent/introduction.mdx +0 -552
- package/mintlify-docs/core/exulu-app/api-reference.mdx +0 -481
- package/mintlify-docs/core/exulu-app/configuration.mdx +0 -319
- package/mintlify-docs/core/exulu-app/introduction.mdx +0 -117
- package/mintlify-docs/core/exulu-authentication.mdx +0 -810
- package/mintlify-docs/core/exulu-chunkers/api-reference.mdx +0 -1011
- package/mintlify-docs/core/exulu-chunkers/configuration.mdx +0 -596
- package/mintlify-docs/core/exulu-chunkers/introduction.mdx +0 -403
- package/mintlify-docs/core/exulu-context/api-reference.mdx +0 -911
- package/mintlify-docs/core/exulu-context/configuration.mdx +0 -648
- package/mintlify-docs/core/exulu-context/introduction.mdx +0 -394
- package/mintlify-docs/core/exulu-database.mdx +0 -811
- package/mintlify-docs/core/exulu-default-agents.mdx +0 -545
- package/mintlify-docs/core/exulu-eval/api-reference.mdx +0 -772
- package/mintlify-docs/core/exulu-eval/configuration.mdx +0 -680
- package/mintlify-docs/core/exulu-eval/introduction.mdx +0 -459
- package/mintlify-docs/core/exulu-logging.mdx +0 -464
- package/mintlify-docs/core/exulu-otel.mdx +0 -670
- package/mintlify-docs/core/exulu-queues/api-reference.mdx +0 -648
- package/mintlify-docs/core/exulu-queues/configuration.mdx +0 -650
- package/mintlify-docs/core/exulu-queues/introduction.mdx +0 -474
- package/mintlify-docs/core/exulu-reranker/api-reference.mdx +0 -630
- package/mintlify-docs/core/exulu-reranker/configuration.mdx +0 -663
- package/mintlify-docs/core/exulu-reranker/introduction.mdx +0 -516
- package/mintlify-docs/core/exulu-tool/api-reference.mdx +0 -723
- package/mintlify-docs/core/exulu-tool/configuration.mdx +0 -805
- package/mintlify-docs/core/exulu-tool/introduction.mdx +0 -539
- package/mintlify-docs/core/exulu-variables/api-reference.mdx +0 -699
- package/mintlify-docs/core/exulu-variables/configuration.mdx +0 -736
- package/mintlify-docs/core/exulu-variables/introduction.mdx +0 -511
- package/mintlify-docs/development.mdx +0 -94
- package/mintlify-docs/docs.json +0 -248
- package/mintlify-docs/enterprise-edition.mdx +0 -538
- package/mintlify-docs/essentials/code.mdx +0 -35
- package/mintlify-docs/essentials/images.mdx +0 -59
- package/mintlify-docs/essentials/markdown.mdx +0 -88
- package/mintlify-docs/essentials/navigation.mdx +0 -87
- package/mintlify-docs/essentials/reusable-snippets.mdx +0 -110
- package/mintlify-docs/essentials/settings.mdx +0 -318
- package/mintlify-docs/favicon.svg +0 -3
- package/mintlify-docs/frontend/introduction.mdx +0 -39
- package/mintlify-docs/getting-started.mdx +0 -267
- package/mintlify-docs/guides/custom-agent.mdx +0 -608
- package/mintlify-docs/guides/first-agent.mdx +0 -315
- package/mintlify-docs/images/admin_ui.png +0 -0
- package/mintlify-docs/images/contexts.png +0 -0
- package/mintlify-docs/images/create_agents.png +0 -0
- package/mintlify-docs/images/evals.png +0 -0
- package/mintlify-docs/images/graphql.png +0 -0
- package/mintlify-docs/images/graphql_api.png +0 -0
- package/mintlify-docs/images/hero-dark.png +0 -0
- package/mintlify-docs/images/hero-light.png +0 -0
- package/mintlify-docs/images/hero.png +0 -0
- package/mintlify-docs/images/knowledge_sources.png +0 -0
- package/mintlify-docs/images/mcp.png +0 -0
- package/mintlify-docs/images/scaling.png +0 -0
- package/mintlify-docs/index.mdx +0 -411
- package/mintlify-docs/logo/dark.svg +0 -9
- package/mintlify-docs/logo/light.svg +0 -9
- package/mintlify-docs/partners.mdx +0 -558
- package/mintlify-docs/products.mdx +0 -77
- package/mintlify-docs/snippets/snippet-intro.mdx +0 -4
- package/mintlify-docs/styles.css +0 -207
- package/ngrok.bash +0 -1
- package/ngrok.md +0 -6
- package/ngrok.yml +0 -10
- package/release.config.cjs +0 -15
- package/skills-lock.json +0 -10
- package/types/context-processor.ts +0 -45
- package/types/enums/eval-types.ts +0 -5
- package/types/enums/field-types.ts +0 -1
- package/types/enums/jobs.ts +0 -11
- package/types/enums/statistics.ts +0 -13
- package/types/exulu-table-definition.ts +0 -79
- package/types/file-types.ts +0 -18
- package/types/models/agent-session.ts +0 -27
- package/types/models/agent.ts +0 -68
- package/types/models/context.ts +0 -53
- package/types/models/embedding.ts +0 -17
- package/types/models/eval-run.ts +0 -40
- package/types/models/exulu-agent-tool-config.ts +0 -11
- package/types/models/item.ts +0 -21
- package/types/models/job.ts +0 -8
- package/types/models/project.ts +0 -16
- package/types/models/rate-limiter-rules.ts +0 -7
- package/types/models/test-case.ts +0 -25
- package/types/models/tool.ts +0 -9
- package/types/models/user-role.ts +0 -12
- package/types/models/user.ts +0 -20
- package/types/models/variable.ts +0 -8
- package/types/models/vector-methods.ts +0 -7
- package/types/provider-config.ts +0 -21
- package/types/queue-config.ts +0 -16
- package/types/rbac-rights-modes.ts +0 -1
- package/types/statistics.ts +0 -20
- package/types/workflow.ts +0 -31
- /package/ee/{documents → python/documents}/THIRD_PARTY_LICENSES/docling.txt +0 -0
- /package/ee/{documents/processing → python}/requirements.txt +0 -0
package/dist/index.cjs
CHANGED
|
@@ -44,12 +44,19 @@ __export(index_exports, {
|
|
|
44
44
|
ExuluJobs: () => ExuluJobs,
|
|
45
45
|
ExuluOtel: () => ExuluOtel,
|
|
46
46
|
ExuluProvider: () => ExuluProvider,
|
|
47
|
+
ExuluPython: () => ExuluPython,
|
|
47
48
|
ExuluQueues: () => queues,
|
|
48
49
|
ExuluReranker: () => ExuluReranker,
|
|
49
50
|
ExuluTool: () => ExuluTool,
|
|
50
51
|
ExuluVariables: () => ExuluVariables
|
|
51
52
|
});
|
|
52
53
|
module.exports = __toCommonJS(index_exports);
|
|
54
|
+
|
|
55
|
+
// node_modules/tsup/assets/cjs_shims.js
|
|
56
|
+
var getImportMetaUrl = () => typeof document === "undefined" ? new URL(`file:${__filename}`).href : document.currentScript && document.currentScript.src || new URL("main.js", document.baseURI).href;
|
|
57
|
+
var importMetaUrl = /* @__PURE__ */ getImportMetaUrl();
|
|
58
|
+
|
|
59
|
+
// src/index.ts
|
|
53
60
|
var import_config = require("dotenv/config");
|
|
54
61
|
|
|
55
62
|
// src/redis/client.ts
|
|
@@ -911,7 +918,7 @@ var uploadFile = async (file, fileName, config, options = {}, user, customBucket
|
|
|
911
918
|
if (error.name === "SignatureDoesNotMatch" || error.name === "InvalidAccessKeyId" || error.name === "AccessDenied") {
|
|
912
919
|
if (attempt < maxRetries) {
|
|
913
920
|
const backoffMs = Math.pow(2, attempt) * 1e3;
|
|
914
|
-
await new Promise((
|
|
921
|
+
await new Promise((resolve3) => setTimeout(resolve3, backoffMs));
|
|
915
922
|
s3Client = void 0;
|
|
916
923
|
getS3Client(config);
|
|
917
924
|
continue;
|
|
@@ -4641,7 +4648,7 @@ async function withRetry(generateFn, maxRetries = 3) {
|
|
|
4641
4648
|
if (attempt === maxRetries) {
|
|
4642
4649
|
throw error;
|
|
4643
4650
|
}
|
|
4644
|
-
await new Promise((
|
|
4651
|
+
await new Promise((resolve3) => setTimeout(resolve3, Math.pow(2, attempt) * 1e3));
|
|
4645
4652
|
}
|
|
4646
4653
|
}
|
|
4647
4654
|
throw lastError;
|
|
@@ -5297,8 +5304,8 @@ async function* executeAgenticRetrieval({
|
|
|
5297
5304
|
try {
|
|
5298
5305
|
let finishResolver;
|
|
5299
5306
|
let finishRejector;
|
|
5300
|
-
const finishPromise = new Promise((
|
|
5301
|
-
finishResolver =
|
|
5307
|
+
const finishPromise = new Promise((resolve3, reject) => {
|
|
5308
|
+
finishResolver = resolve3;
|
|
5302
5309
|
finishRejector = reject;
|
|
5303
5310
|
});
|
|
5304
5311
|
const timeoutId = setTimeout(() => {
|
|
@@ -7377,7 +7384,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
|
|
|
7377
7384
|
} = await validateWorkflowPayload(data, providers);
|
|
7378
7385
|
const retries = 3;
|
|
7379
7386
|
let attempts = 0;
|
|
7380
|
-
const promise = new Promise(async (
|
|
7387
|
+
const promise = new Promise(async (resolve3, reject) => {
|
|
7381
7388
|
while (attempts < retries) {
|
|
7382
7389
|
try {
|
|
7383
7390
|
const messages2 = await processUiMessagesFlow({
|
|
@@ -7392,7 +7399,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
|
|
|
7392
7399
|
config,
|
|
7393
7400
|
variables: data.inputs
|
|
7394
7401
|
});
|
|
7395
|
-
|
|
7402
|
+
resolve3(messages2);
|
|
7396
7403
|
break;
|
|
7397
7404
|
} catch (error) {
|
|
7398
7405
|
console.error(
|
|
@@ -7403,7 +7410,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
|
|
|
7403
7410
|
if (attempts >= retries) {
|
|
7404
7411
|
reject(new Error(error instanceof Error ? error.message : String(error)));
|
|
7405
7412
|
}
|
|
7406
|
-
await new Promise((
|
|
7413
|
+
await new Promise((resolve4) => setTimeout((resolve5) => resolve5(true), 2e3));
|
|
7407
7414
|
}
|
|
7408
7415
|
}
|
|
7409
7416
|
});
|
|
@@ -7453,7 +7460,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
|
|
|
7453
7460
|
} = await validateEvalPayload(data, providers);
|
|
7454
7461
|
const retries = 3;
|
|
7455
7462
|
let attempts = 0;
|
|
7456
|
-
const promise = new Promise(async (
|
|
7463
|
+
const promise = new Promise(async (resolve3, reject) => {
|
|
7457
7464
|
while (attempts < retries) {
|
|
7458
7465
|
try {
|
|
7459
7466
|
const messages2 = await processUiMessagesFlow({
|
|
@@ -7467,7 +7474,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
|
|
|
7467
7474
|
tools,
|
|
7468
7475
|
config
|
|
7469
7476
|
});
|
|
7470
|
-
|
|
7477
|
+
resolve3(messages2);
|
|
7471
7478
|
break;
|
|
7472
7479
|
} catch (error) {
|
|
7473
7480
|
console.error(
|
|
@@ -7478,7 +7485,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
|
|
|
7478
7485
|
if (attempts >= retries) {
|
|
7479
7486
|
reject(new Error(error instanceof Error ? error.message : String(error)));
|
|
7480
7487
|
}
|
|
7481
|
-
await new Promise((
|
|
7488
|
+
await new Promise((resolve4) => setTimeout((resolve5) => resolve5(true), 2e3));
|
|
7482
7489
|
}
|
|
7483
7490
|
}
|
|
7484
7491
|
});
|
|
@@ -7893,7 +7900,7 @@ var pollJobResult = async ({
|
|
|
7893
7900
|
attempts++;
|
|
7894
7901
|
const job = await import_bullmq3.Job.fromId(queue.queue, jobId);
|
|
7895
7902
|
if (!job) {
|
|
7896
|
-
await new Promise((
|
|
7903
|
+
await new Promise((resolve3) => setTimeout((resolve4) => resolve4(true), 2e3));
|
|
7897
7904
|
continue;
|
|
7898
7905
|
}
|
|
7899
7906
|
const elapsedTime = Date.now() - startTime;
|
|
@@ -7923,7 +7930,7 @@ var pollJobResult = async ({
|
|
|
7923
7930
|
console.log(`[EXULU] eval function ${job.id} result: ${result}`);
|
|
7924
7931
|
break;
|
|
7925
7932
|
}
|
|
7926
|
-
await new Promise((
|
|
7933
|
+
await new Promise((resolve3) => setTimeout((resolve4) => resolve4(true), 2e3));
|
|
7927
7934
|
}
|
|
7928
7935
|
return result;
|
|
7929
7936
|
};
|
|
@@ -8031,7 +8038,7 @@ var processUiMessagesFlow = async ({
|
|
|
8031
8038
|
label: agent.name,
|
|
8032
8039
|
trigger: "agent"
|
|
8033
8040
|
};
|
|
8034
|
-
messageHistory = await new Promise(async (
|
|
8041
|
+
messageHistory = await new Promise(async (resolve3, reject) => {
|
|
8035
8042
|
const startTime = Date.now();
|
|
8036
8043
|
try {
|
|
8037
8044
|
const result = await provider.generateStream({
|
|
@@ -8108,7 +8115,7 @@ var processUiMessagesFlow = async ({
|
|
|
8108
8115
|
})
|
|
8109
8116
|
] : []
|
|
8110
8117
|
]);
|
|
8111
|
-
|
|
8118
|
+
resolve3({
|
|
8112
8119
|
messages,
|
|
8113
8120
|
metadata: {
|
|
8114
8121
|
tokens: {
|
|
@@ -8856,7 +8863,7 @@ type PageInfo {
|
|
|
8856
8863
|
} = await validateWorkflowPayload(jobData, providers);
|
|
8857
8864
|
const retries = 3;
|
|
8858
8865
|
let attempts = 0;
|
|
8859
|
-
const promise = new Promise(async (
|
|
8866
|
+
const promise = new Promise(async (resolve3, reject) => {
|
|
8860
8867
|
while (attempts < retries) {
|
|
8861
8868
|
try {
|
|
8862
8869
|
const messages2 = await processUiMessagesFlow({
|
|
@@ -8871,7 +8878,7 @@ type PageInfo {
|
|
|
8871
8878
|
config,
|
|
8872
8879
|
variables: args.variables
|
|
8873
8880
|
});
|
|
8874
|
-
|
|
8881
|
+
resolve3(messages2);
|
|
8875
8882
|
break;
|
|
8876
8883
|
} catch (error) {
|
|
8877
8884
|
console.error(
|
|
@@ -8885,7 +8892,7 @@ type PageInfo {
|
|
|
8885
8892
|
if (attempts >= retries) {
|
|
8886
8893
|
reject(error instanceof Error ? error : new Error(String(error)));
|
|
8887
8894
|
}
|
|
8888
|
-
await new Promise((
|
|
8895
|
+
await new Promise((resolve4) => setTimeout((resolve5) => resolve5(true), 2e3));
|
|
8889
8896
|
}
|
|
8890
8897
|
}
|
|
8891
8898
|
});
|
|
@@ -9138,10 +9145,10 @@ type PageInfo {
|
|
|
9138
9145
|
contexts.map(async (context2) => {
|
|
9139
9146
|
let processor = null;
|
|
9140
9147
|
if (context2.processor) {
|
|
9141
|
-
processor = await new Promise(async (
|
|
9148
|
+
processor = await new Promise(async (resolve3, reject) => {
|
|
9142
9149
|
const config2 = context2.processor?.config;
|
|
9143
9150
|
const queue = await config2?.queue;
|
|
9144
|
-
|
|
9151
|
+
resolve3({
|
|
9145
9152
|
name: context2.processor.name,
|
|
9146
9153
|
description: context2.processor.description,
|
|
9147
9154
|
queue: queue?.queue?.name || void 0,
|
|
@@ -9222,10 +9229,10 @@ type PageInfo {
|
|
|
9222
9229
|
}
|
|
9223
9230
|
let processor = null;
|
|
9224
9231
|
if (data.processor) {
|
|
9225
|
-
processor = await new Promise(async (
|
|
9232
|
+
processor = await new Promise(async (resolve3, reject) => {
|
|
9226
9233
|
const config2 = data.processor?.config;
|
|
9227
9234
|
const queue = await config2?.queue;
|
|
9228
|
-
|
|
9235
|
+
resolve3({
|
|
9229
9236
|
name: data.processor.name,
|
|
9230
9237
|
description: data.processor.description,
|
|
9231
9238
|
queue: queue?.queue?.name || void 0,
|
|
@@ -13113,7 +13120,7 @@ var internetSearchTool = new ExuluTool({
|
|
|
13113
13120
|
} catch (error) {
|
|
13114
13121
|
if (error instanceof import_perplexity_ai.default.RateLimitError && attempt < maxRetries - 1) {
|
|
13115
13122
|
const delay = Math.pow(2, attempt) * 1e3 + Math.random() * 1e3;
|
|
13116
|
-
await new Promise((
|
|
13123
|
+
await new Promise((resolve3) => setTimeout(resolve3, delay));
|
|
13117
13124
|
continue;
|
|
13118
13125
|
}
|
|
13119
13126
|
throw error;
|
|
@@ -15310,7 +15317,7 @@ var create = ({
|
|
|
15310
15317
|
// src/index.ts
|
|
15311
15318
|
var import_crypto_js9 = __toESM(require("crypto-js"), 1);
|
|
15312
15319
|
|
|
15313
|
-
// ee/markdown.ts
|
|
15320
|
+
// ee/chunking/markdown.ts
|
|
15314
15321
|
var extractPageTag = (text) => {
|
|
15315
15322
|
let match;
|
|
15316
15323
|
let lastPageNumber = void 0;
|
|
@@ -15737,9 +15744,172 @@ var MarkdownChunker = class {
|
|
|
15737
15744
|
}
|
|
15738
15745
|
};
|
|
15739
15746
|
|
|
15740
|
-
//
|
|
15747
|
+
// src/utils/python-setup.ts
|
|
15741
15748
|
var import_child_process = require("child_process");
|
|
15742
15749
|
var import_util2 = require("util");
|
|
15750
|
+
var import_path = require("path");
|
|
15751
|
+
var import_fs2 = require("fs");
|
|
15752
|
+
var import_url = require("url");
|
|
15753
|
+
var execAsync = (0, import_util2.promisify)(import_child_process.exec);
|
|
15754
|
+
function getPackageRoot() {
|
|
15755
|
+
const currentFile = (0, import_url.fileURLToPath)(importMetaUrl);
|
|
15756
|
+
let currentDir = (0, import_path.dirname)(currentFile);
|
|
15757
|
+
let attempts = 0;
|
|
15758
|
+
const maxAttempts = 10;
|
|
15759
|
+
while (attempts < maxAttempts) {
|
|
15760
|
+
const packageJsonPath = (0, import_path.join)(currentDir, "package.json");
|
|
15761
|
+
if ((0, import_fs2.existsSync)(packageJsonPath)) {
|
|
15762
|
+
try {
|
|
15763
|
+
const packageJson = JSON.parse((0, import_fs2.readFileSync)(packageJsonPath, "utf-8"));
|
|
15764
|
+
if (packageJson.name === "@exulu/backend") {
|
|
15765
|
+
return currentDir;
|
|
15766
|
+
}
|
|
15767
|
+
} catch {
|
|
15768
|
+
}
|
|
15769
|
+
}
|
|
15770
|
+
const parentDir = (0, import_path.resolve)(currentDir, "..");
|
|
15771
|
+
if (parentDir === currentDir) {
|
|
15772
|
+
break;
|
|
15773
|
+
}
|
|
15774
|
+
currentDir = parentDir;
|
|
15775
|
+
attempts++;
|
|
15776
|
+
}
|
|
15777
|
+
const fallback = (0, import_path.resolve)((0, import_path.dirname)((0, import_url.fileURLToPath)(importMetaUrl)), "../..");
|
|
15778
|
+
return fallback;
|
|
15779
|
+
}
|
|
15780
|
+
function getSetupScriptPath(packageRoot) {
|
|
15781
|
+
return (0, import_path.resolve)(packageRoot, "ee/python/setup.sh");
|
|
15782
|
+
}
|
|
15783
|
+
function getVenvPath(packageRoot) {
|
|
15784
|
+
return (0, import_path.resolve)(packageRoot, "ee/python/.venv");
|
|
15785
|
+
}
|
|
15786
|
+
function isPythonEnvironmentSetup(packageRoot) {
|
|
15787
|
+
const root = packageRoot ?? getPackageRoot();
|
|
15788
|
+
const venvPath = getVenvPath(root);
|
|
15789
|
+
const pythonPath = (0, import_path.join)(venvPath, "bin", "python");
|
|
15790
|
+
return (0, import_fs2.existsSync)(venvPath) && (0, import_fs2.existsSync)(pythonPath);
|
|
15791
|
+
}
|
|
15792
|
+
async function setupPythonEnvironment(options = {}) {
|
|
15793
|
+
const {
|
|
15794
|
+
packageRoot = getPackageRoot(),
|
|
15795
|
+
force = false,
|
|
15796
|
+
verbose = false,
|
|
15797
|
+
timeout = 6e5
|
|
15798
|
+
// 10 minutes
|
|
15799
|
+
} = options;
|
|
15800
|
+
if (!force && isPythonEnvironmentSetup(packageRoot)) {
|
|
15801
|
+
if (verbose) {
|
|
15802
|
+
console.log("\u2713 Python environment already set up");
|
|
15803
|
+
}
|
|
15804
|
+
return {
|
|
15805
|
+
success: true,
|
|
15806
|
+
message: "Python environment already exists",
|
|
15807
|
+
alreadyExists: true
|
|
15808
|
+
};
|
|
15809
|
+
}
|
|
15810
|
+
const setupScriptPath = getSetupScriptPath(packageRoot);
|
|
15811
|
+
if (!(0, import_fs2.existsSync)(setupScriptPath)) {
|
|
15812
|
+
return {
|
|
15813
|
+
success: false,
|
|
15814
|
+
message: `Setup script not found at: ${setupScriptPath}`,
|
|
15815
|
+
alreadyExists: false
|
|
15816
|
+
};
|
|
15817
|
+
}
|
|
15818
|
+
try {
|
|
15819
|
+
if (verbose) {
|
|
15820
|
+
console.log("Setting up Python environment...");
|
|
15821
|
+
}
|
|
15822
|
+
const { stdout, stderr } = await execAsync(`bash "${setupScriptPath}"`, {
|
|
15823
|
+
cwd: packageRoot,
|
|
15824
|
+
timeout,
|
|
15825
|
+
env: {
|
|
15826
|
+
...process.env,
|
|
15827
|
+
// Ensure script can write to the directory
|
|
15828
|
+
PYTHONDONTWRITEBYTECODE: "1"
|
|
15829
|
+
},
|
|
15830
|
+
maxBuffer: 10 * 1024 * 1024
|
|
15831
|
+
// 10MB buffer
|
|
15832
|
+
});
|
|
15833
|
+
const output = stdout + stderr;
|
|
15834
|
+
const versionMatch = output.match(/Python (\d+\.\d+\.\d+)/);
|
|
15835
|
+
const pythonVersion = versionMatch ? versionMatch[1] : void 0;
|
|
15836
|
+
if (verbose) {
|
|
15837
|
+
console.log(output);
|
|
15838
|
+
}
|
|
15839
|
+
return {
|
|
15840
|
+
success: true,
|
|
15841
|
+
message: "Python environment set up successfully",
|
|
15842
|
+
alreadyExists: false,
|
|
15843
|
+
pythonVersion,
|
|
15844
|
+
output
|
|
15845
|
+
};
|
|
15846
|
+
} catch (error) {
|
|
15847
|
+
const errorOutput = error.stdout + error.stderr;
|
|
15848
|
+
return {
|
|
15849
|
+
success: false,
|
|
15850
|
+
message: `Setup failed: ${error.message}`,
|
|
15851
|
+
alreadyExists: false,
|
|
15852
|
+
output: errorOutput
|
|
15853
|
+
};
|
|
15854
|
+
}
|
|
15855
|
+
}
|
|
15856
|
+
function getPythonSetupInstructions() {
|
|
15857
|
+
return `
|
|
15858
|
+
Python environment not set up. Please run one of the following:
|
|
15859
|
+
|
|
15860
|
+
Option 1 (Automatic):
|
|
15861
|
+
import { setupPythonEnvironment } from '@exulu/backend';
|
|
15862
|
+
await setupPythonEnvironment();
|
|
15863
|
+
|
|
15864
|
+
Option 2 (Manual - for package consumers):
|
|
15865
|
+
npx @exulu/backend setup-python
|
|
15866
|
+
|
|
15867
|
+
Option 3 (Manual - for contributors):
|
|
15868
|
+
npm run python:setup
|
|
15869
|
+
|
|
15870
|
+
Requirements:
|
|
15871
|
+
- Python 3.10 or higher must be installed
|
|
15872
|
+
- pip must be available
|
|
15873
|
+
|
|
15874
|
+
Installing Python:
|
|
15875
|
+
- macOS: brew install python@3.12
|
|
15876
|
+
- Ubuntu/Debian: sudo apt-get install python3.12
|
|
15877
|
+
- Windows: Download from https://www.python.org/downloads/
|
|
15878
|
+
`.trim();
|
|
15879
|
+
}
|
|
15880
|
+
async function validatePythonEnvironment(packageRoot) {
|
|
15881
|
+
const root = packageRoot ?? getPackageRoot();
|
|
15882
|
+
const venvPath = getVenvPath(root);
|
|
15883
|
+
const pythonPath = (0, import_path.join)(venvPath, "bin", "python");
|
|
15884
|
+
if (!(0, import_fs2.existsSync)(venvPath)) {
|
|
15885
|
+
return {
|
|
15886
|
+
valid: false,
|
|
15887
|
+
message: getPythonSetupInstructions()
|
|
15888
|
+
};
|
|
15889
|
+
}
|
|
15890
|
+
if (!(0, import_fs2.existsSync)(pythonPath)) {
|
|
15891
|
+
return {
|
|
15892
|
+
valid: false,
|
|
15893
|
+
message: "Python virtual environment is corrupted. Please run:\n await setupPythonEnvironment({ force: true })"
|
|
15894
|
+
};
|
|
15895
|
+
}
|
|
15896
|
+
try {
|
|
15897
|
+
await execAsync(`"${pythonPath}" --version`, { cwd: root });
|
|
15898
|
+
return {
|
|
15899
|
+
valid: true,
|
|
15900
|
+
message: "Python environment is valid"
|
|
15901
|
+
};
|
|
15902
|
+
} catch {
|
|
15903
|
+
return {
|
|
15904
|
+
valid: false,
|
|
15905
|
+
message: "Python executable is not working. Please run:\n await setupPythonEnvironment({ force: true })"
|
|
15906
|
+
};
|
|
15907
|
+
}
|
|
15908
|
+
}
|
|
15909
|
+
|
|
15910
|
+
// ee/python/documents/processing/doc_processor.ts
|
|
15911
|
+
var import_child_process3 = require("child_process");
|
|
15912
|
+
var import_util4 = require("util");
|
|
15743
15913
|
var fs2 = __toESM(require("fs"), 1);
|
|
15744
15914
|
var path = __toESM(require("path"), 1);
|
|
15745
15915
|
var import_ai7 = require("ai");
|
|
@@ -15750,7 +15920,131 @@ var mammoth = __toESM(require("mammoth"), 1);
|
|
|
15750
15920
|
var import_turndown = __toESM(require("turndown"), 1);
|
|
15751
15921
|
var import_word_extractor = __toESM(require("word-extractor"), 1);
|
|
15752
15922
|
var import_officeparser2 = require("officeparser");
|
|
15753
|
-
|
|
15923
|
+
|
|
15924
|
+
// src/utils/python-executor.ts
|
|
15925
|
+
var import_child_process2 = require("child_process");
|
|
15926
|
+
var import_util3 = require("util");
|
|
15927
|
+
var import_path2 = require("path");
|
|
15928
|
+
var import_fs3 = require("fs");
|
|
15929
|
+
var import_url2 = require("url");
|
|
15930
|
+
var execAsync2 = (0, import_util3.promisify)(import_child_process2.exec);
|
|
15931
|
+
function getPackageRoot2() {
|
|
15932
|
+
const currentFile = (0, import_url2.fileURLToPath)(importMetaUrl);
|
|
15933
|
+
let currentDir = (0, import_path2.dirname)(currentFile);
|
|
15934
|
+
let attempts = 0;
|
|
15935
|
+
const maxAttempts = 10;
|
|
15936
|
+
while (attempts < maxAttempts) {
|
|
15937
|
+
const packageJsonPath = (0, import_path2.join)(currentDir, "package.json");
|
|
15938
|
+
if ((0, import_fs3.existsSync)(packageJsonPath)) {
|
|
15939
|
+
try {
|
|
15940
|
+
const packageJson = JSON.parse((0, import_fs3.readFileSync)(packageJsonPath, "utf-8"));
|
|
15941
|
+
if (packageJson.name === "@exulu/backend") {
|
|
15942
|
+
return currentDir;
|
|
15943
|
+
}
|
|
15944
|
+
} catch {
|
|
15945
|
+
}
|
|
15946
|
+
}
|
|
15947
|
+
const parentDir = (0, import_path2.resolve)(currentDir, "..");
|
|
15948
|
+
if (parentDir === currentDir) {
|
|
15949
|
+
break;
|
|
15950
|
+
}
|
|
15951
|
+
currentDir = parentDir;
|
|
15952
|
+
attempts++;
|
|
15953
|
+
}
|
|
15954
|
+
return (0, import_path2.resolve)((0, import_path2.dirname)((0, import_url2.fileURLToPath)(importMetaUrl)), "../..");
|
|
15955
|
+
}
|
|
15956
|
+
var PythonEnvironmentError = class extends Error {
|
|
15957
|
+
constructor(message) {
|
|
15958
|
+
super(message);
|
|
15959
|
+
this.name = "PythonEnvironmentError";
|
|
15960
|
+
}
|
|
15961
|
+
};
|
|
15962
|
+
var PythonExecutionError = class extends Error {
|
|
15963
|
+
stdout;
|
|
15964
|
+
stderr;
|
|
15965
|
+
exitCode;
|
|
15966
|
+
constructor(message, stdout, stderr, exitCode) {
|
|
15967
|
+
super(message);
|
|
15968
|
+
this.name = "PythonExecutionError";
|
|
15969
|
+
this.stdout = stdout;
|
|
15970
|
+
this.stderr = stderr;
|
|
15971
|
+
this.exitCode = exitCode;
|
|
15972
|
+
}
|
|
15973
|
+
};
|
|
15974
|
+
function getVenvPath2(packageRoot) {
|
|
15975
|
+
return (0, import_path2.resolve)(packageRoot, "ee/python/.venv");
|
|
15976
|
+
}
|
|
15977
|
+
function getPythonExecutable(packageRoot) {
|
|
15978
|
+
const venvPath = getVenvPath2(packageRoot);
|
|
15979
|
+
return (0, import_path2.join)(venvPath, "bin", "python");
|
|
15980
|
+
}
|
|
15981
|
+
async function validatePythonEnvironmentForExecution(packageRoot) {
|
|
15982
|
+
const validation = await validatePythonEnvironment(packageRoot);
|
|
15983
|
+
if (!validation.valid) {
|
|
15984
|
+
throw new PythonEnvironmentError(validation.message);
|
|
15985
|
+
}
|
|
15986
|
+
}
|
|
15987
|
+
async function executePythonScript(config) {
|
|
15988
|
+
const {
|
|
15989
|
+
scriptPath,
|
|
15990
|
+
args = [],
|
|
15991
|
+
packageRoot = getPackageRoot2(),
|
|
15992
|
+
cwd = process.cwd(),
|
|
15993
|
+
timeout = 3e5,
|
|
15994
|
+
// 5 minutes default
|
|
15995
|
+
env = {},
|
|
15996
|
+
validateEnvironment = true
|
|
15997
|
+
} = config;
|
|
15998
|
+
if (validateEnvironment) {
|
|
15999
|
+
await validatePythonEnvironmentForExecution(packageRoot);
|
|
16000
|
+
}
|
|
16001
|
+
const resolvedScriptPath = (0, import_path2.resolve)(packageRoot, scriptPath);
|
|
16002
|
+
if (!(0, import_fs3.existsSync)(resolvedScriptPath)) {
|
|
16003
|
+
throw new PythonExecutionError(
|
|
16004
|
+
`Python script not found: ${resolvedScriptPath}`,
|
|
16005
|
+
"",
|
|
16006
|
+
"",
|
|
16007
|
+
1
|
|
16008
|
+
);
|
|
16009
|
+
}
|
|
16010
|
+
const pythonExecutable = getPythonExecutable(packageRoot);
|
|
16011
|
+
const quotedArgs = args.map((arg) => {
|
|
16012
|
+
return arg.includes(" ") ? `"${arg}"` : arg;
|
|
16013
|
+
});
|
|
16014
|
+
const command = `${pythonExecutable} "${resolvedScriptPath}" ${quotedArgs.join(" ")}`;
|
|
16015
|
+
try {
|
|
16016
|
+
const { stdout, stderr } = await execAsync2(command, {
|
|
16017
|
+
cwd,
|
|
16018
|
+
timeout,
|
|
16019
|
+
env: {
|
|
16020
|
+
...process.env,
|
|
16021
|
+
...env,
|
|
16022
|
+
// Ensure Python doesn't write bytecode files
|
|
16023
|
+
PYTHONDONTWRITEBYTECODE: "1"
|
|
16024
|
+
},
|
|
16025
|
+
maxBuffer: 10 * 1024 * 1024
|
|
16026
|
+
// 10MB buffer for output
|
|
16027
|
+
});
|
|
16028
|
+
return {
|
|
16029
|
+
stdout: stdout.trim(),
|
|
16030
|
+
stderr: stderr.trim(),
|
|
16031
|
+
exitCode: 0,
|
|
16032
|
+
success: true
|
|
16033
|
+
};
|
|
16034
|
+
} catch (error) {
|
|
16035
|
+
const stdout = error.stdout?.toString() ?? "";
|
|
16036
|
+
const stderr = error.stderr?.toString() ?? "";
|
|
16037
|
+
const exitCode = error.code ?? 1;
|
|
16038
|
+
throw new PythonExecutionError(
|
|
16039
|
+
`Python script execution failed: ${error.message}`,
|
|
16040
|
+
stdout,
|
|
16041
|
+
stderr,
|
|
16042
|
+
exitCode
|
|
16043
|
+
);
|
|
16044
|
+
}
|
|
16045
|
+
}
|
|
16046
|
+
|
|
16047
|
+
// ee/python/documents/processing/doc_processor.ts
|
|
15754
16048
|
async function processDocx(file) {
|
|
15755
16049
|
const html = await mammoth.convertToHtml({ buffer: file });
|
|
15756
16050
|
const turndownService = new import_turndown.default();
|
|
@@ -15895,15 +16189,15 @@ Specific notes and guidelines:
|
|
|
15895
16189
|
};
|
|
15896
16190
|
return validation;
|
|
15897
16191
|
}
|
|
15898
|
-
async function validateWithVLM(
|
|
15899
|
-
console.log(`[EXULU] Starting VLM validation for docling output, ${
|
|
16192
|
+
async function validateWithVLM(document2, model, verbose = false, concurrency = 10) {
|
|
16193
|
+
console.log(`[EXULU] Starting VLM validation for docling output, ${document2.length} pages...`);
|
|
15900
16194
|
console.log(
|
|
15901
16195
|
`[EXULU] Concurrency limit: ${concurrency}`
|
|
15902
16196
|
);
|
|
15903
16197
|
let validatedCount = 0;
|
|
15904
16198
|
let correctedCount = 0;
|
|
15905
16199
|
const limit = (0, import_p_limit.default)(concurrency);
|
|
15906
|
-
const validationTasks =
|
|
16200
|
+
const validationTasks = document2.map(
|
|
15907
16201
|
(page) => limit(async () => {
|
|
15908
16202
|
const imagePath = page.image;
|
|
15909
16203
|
if (!imagePath) {
|
|
@@ -15948,7 +16242,7 @@ async function validateWithVLM(document, model, verbose = false, concurrency = 1
|
|
|
15948
16242
|
console.log(`[EXULU] VLM validation complete:`);
|
|
15949
16243
|
console.log(`[EXULU] Validated: ${validatedCount} chunks`);
|
|
15950
16244
|
console.log(`[EXULU] Corrected: ${correctedCount} chunks`);
|
|
15951
|
-
return
|
|
16245
|
+
return document2;
|
|
15952
16246
|
}
|
|
15953
16247
|
async function processDocument(filePath, fileType, buffer, tempDir, config, verbose = false) {
|
|
15954
16248
|
console.log("Starting document processing...");
|
|
@@ -16000,16 +16294,24 @@ async function processPdf(buffer, paths, config, verbose = false) {
|
|
|
16000
16294
|
try {
|
|
16001
16295
|
let json;
|
|
16002
16296
|
if (config?.docling) {
|
|
16003
|
-
|
|
16004
|
-
|
|
16005
|
-
|
|
16006
|
-
|
|
16007
|
-
|
|
16008
|
-
|
|
16009
|
-
|
|
16010
|
-
|
|
16011
|
-
|
|
16012
|
-
|
|
16297
|
+
console.log(`[EXULU] Processing document with document_to_markdown.py`);
|
|
16298
|
+
const result = await executePythonScript({
|
|
16299
|
+
scriptPath: "ee/python/documents/processing/document_to_markdown.py",
|
|
16300
|
+
args: [
|
|
16301
|
+
paths.source,
|
|
16302
|
+
"-o",
|
|
16303
|
+
paths.json,
|
|
16304
|
+
"--images-dir",
|
|
16305
|
+
paths.images
|
|
16306
|
+
],
|
|
16307
|
+
timeout: 30 * 60 * 1e3
|
|
16308
|
+
// 30 minutes for large documents
|
|
16309
|
+
});
|
|
16310
|
+
if (result.stderr) {
|
|
16311
|
+
console.log("Processing info:", result.stderr.trim());
|
|
16312
|
+
}
|
|
16313
|
+
if (!result.success) {
|
|
16314
|
+
throw new Error(`Document processing failed: ${result.stderr}`);
|
|
16013
16315
|
}
|
|
16014
16316
|
const jsonContent = await fs2.promises.readFile(paths.json, "utf-8");
|
|
16015
16317
|
json = JSON.parse(jsonContent);
|
|
@@ -16024,10 +16326,10 @@ async function processPdf(buffer, paths, config, verbose = false) {
|
|
|
16024
16326
|
headings: []
|
|
16025
16327
|
}];
|
|
16026
16328
|
}
|
|
16027
|
-
console.log(`
|
|
16329
|
+
console.log(`[EXULU]
|
|
16028
16330
|
\u2713 Document processing completed successfully`);
|
|
16029
|
-
console.log(`
|
|
16030
|
-
console.log(`
|
|
16331
|
+
console.log(`[EXULU] Total pages: ${json.length}`);
|
|
16332
|
+
console.log(`[EXULU] Output file: ${paths.json}`);
|
|
16031
16333
|
if (!config?.docling && config?.vlm?.model) {
|
|
16032
16334
|
console.error("[EXULU] VLM validation is only supported when docling is enabled, skipping validation.");
|
|
16033
16335
|
}
|
|
@@ -16240,6 +16542,12 @@ var ExuluChunkers = {
|
|
|
16240
16542
|
rules: RecursiveRules
|
|
16241
16543
|
}
|
|
16242
16544
|
};
|
|
16545
|
+
var ExuluPython = {
|
|
16546
|
+
setup: setupPythonEnvironment,
|
|
16547
|
+
check: isPythonEnvironmentSetup,
|
|
16548
|
+
validate: validatePythonEnvironment,
|
|
16549
|
+
instructions: getPythonSetupInstructions
|
|
16550
|
+
};
|
|
16243
16551
|
// Annotate the CommonJS export names for ESM import in node:
|
|
16244
16552
|
0 && (module.exports = {
|
|
16245
16553
|
EXULU_JOB_STATUS_ENUM,
|
|
@@ -16256,6 +16564,7 @@ var ExuluChunkers = {
|
|
|
16256
16564
|
ExuluJobs,
|
|
16257
16565
|
ExuluOtel,
|
|
16258
16566
|
ExuluProvider,
|
|
16567
|
+
ExuluPython,
|
|
16259
16568
|
ExuluQueues,
|
|
16260
16569
|
ExuluReranker,
|
|
16261
16570
|
ExuluTool,
|