@exulu/backend 1.48.2 → 1.49.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +351 -42
- package/dist/index.d.cts +96 -1
- package/dist/index.d.ts +96 -1
- package/dist/index.js +340 -38
- package/ee/{markdown.ts → chunking/markdown.ts} +2 -2
- package/ee/python/README.md +295 -0
- package/ee/python/documents/processing/README.md +155 -0
- package/ee/{documents → python/documents}/processing/doc_processor.ts +25 -17
- package/ee/{documents/processing/pdf_to_markdown.py → python/documents/processing/document_to_markdown.py} +3 -10
- package/ee/python/setup.sh +180 -0
- package/package.json +14 -3
- package/scripts/postinstall.cjs +149 -0
- package/.agents/skills/mintlify/SKILL.md +0 -347
- package/.editorconfig +0 -15
- package/.eslintrc.json +0 -52
- package/.github/workflows/release-backend.yml +0 -38
- package/.husky/commit-msg +0 -1
- package/.jscpd.json +0 -18
- package/.mcp.json +0 -25
- package/.nvmrc +0 -1
- package/.prettierignore +0 -5
- package/.prettierrc.json +0 -12
- package/CHANGELOG.md +0 -8
- package/SECURITY.md +0 -5
- package/commitlint.config.js +0 -4
- package/devops/documentation/patch-older-releases.md +0 -42
- package/ee/documents/processing/build_pdf_processor.sh +0 -35
- package/ee/documents/processing/chunk_markdown.py +0 -263
- package/ee/documents/processing/pdf_processor.spec +0 -115
- package/eslint.config.js +0 -88
- package/jest.config.ts +0 -25
- package/mintlify-docs/.mintignore +0 -7
- package/mintlify-docs/AGENTS.md +0 -33
- package/mintlify-docs/CLAUDE.MD +0 -50
- package/mintlify-docs/CONTRIBUTING.md +0 -32
- package/mintlify-docs/LICENSE +0 -21
- package/mintlify-docs/README.md +0 -55
- package/mintlify-docs/ai-tools/claude-code.mdx +0 -43
- package/mintlify-docs/ai-tools/cursor.mdx +0 -39
- package/mintlify-docs/ai-tools/windsurf.mdx +0 -39
- package/mintlify-docs/api-reference/core-types/agent-types.mdx +0 -110
- package/mintlify-docs/api-reference/core-types/analytics-types.mdx +0 -95
- package/mintlify-docs/api-reference/core-types/configuration-types.mdx +0 -83
- package/mintlify-docs/api-reference/core-types/evaluation-types.mdx +0 -106
- package/mintlify-docs/api-reference/core-types/job-types.mdx +0 -135
- package/mintlify-docs/api-reference/core-types/overview.mdx +0 -73
- package/mintlify-docs/api-reference/core-types/prompt-types.mdx +0 -102
- package/mintlify-docs/api-reference/core-types/rbac-types.mdx +0 -163
- package/mintlify-docs/api-reference/core-types/session-types.mdx +0 -77
- package/mintlify-docs/api-reference/core-types/user-management.mdx +0 -112
- package/mintlify-docs/api-reference/core-types/workflow-types.mdx +0 -88
- package/mintlify-docs/api-reference/core-types.mdx +0 -585
- package/mintlify-docs/api-reference/dynamic-types.mdx +0 -851
- package/mintlify-docs/api-reference/endpoint/create.mdx +0 -4
- package/mintlify-docs/api-reference/endpoint/delete.mdx +0 -4
- package/mintlify-docs/api-reference/endpoint/get.mdx +0 -4
- package/mintlify-docs/api-reference/endpoint/webhook.mdx +0 -4
- package/mintlify-docs/api-reference/introduction.mdx +0 -661
- package/mintlify-docs/api-reference/mutations.mdx +0 -1012
- package/mintlify-docs/api-reference/openapi.json +0 -217
- package/mintlify-docs/api-reference/queries.mdx +0 -1154
- package/mintlify-docs/backend/introduction.mdx +0 -218
- package/mintlify-docs/changelog.mdx +0 -387
- package/mintlify-docs/community-edition.mdx +0 -304
- package/mintlify-docs/core/exulu-agent/api-reference.mdx +0 -894
- package/mintlify-docs/core/exulu-agent/configuration.mdx +0 -690
- package/mintlify-docs/core/exulu-agent/introduction.mdx +0 -552
- package/mintlify-docs/core/exulu-app/api-reference.mdx +0 -481
- package/mintlify-docs/core/exulu-app/configuration.mdx +0 -319
- package/mintlify-docs/core/exulu-app/introduction.mdx +0 -117
- package/mintlify-docs/core/exulu-authentication.mdx +0 -810
- package/mintlify-docs/core/exulu-chunkers/api-reference.mdx +0 -1011
- package/mintlify-docs/core/exulu-chunkers/configuration.mdx +0 -596
- package/mintlify-docs/core/exulu-chunkers/introduction.mdx +0 -403
- package/mintlify-docs/core/exulu-context/api-reference.mdx +0 -911
- package/mintlify-docs/core/exulu-context/configuration.mdx +0 -648
- package/mintlify-docs/core/exulu-context/introduction.mdx +0 -394
- package/mintlify-docs/core/exulu-database.mdx +0 -811
- package/mintlify-docs/core/exulu-default-agents.mdx +0 -545
- package/mintlify-docs/core/exulu-eval/api-reference.mdx +0 -772
- package/mintlify-docs/core/exulu-eval/configuration.mdx +0 -680
- package/mintlify-docs/core/exulu-eval/introduction.mdx +0 -459
- package/mintlify-docs/core/exulu-logging.mdx +0 -464
- package/mintlify-docs/core/exulu-otel.mdx +0 -670
- package/mintlify-docs/core/exulu-queues/api-reference.mdx +0 -648
- package/mintlify-docs/core/exulu-queues/configuration.mdx +0 -650
- package/mintlify-docs/core/exulu-queues/introduction.mdx +0 -474
- package/mintlify-docs/core/exulu-reranker/api-reference.mdx +0 -630
- package/mintlify-docs/core/exulu-reranker/configuration.mdx +0 -663
- package/mintlify-docs/core/exulu-reranker/introduction.mdx +0 -516
- package/mintlify-docs/core/exulu-tool/api-reference.mdx +0 -723
- package/mintlify-docs/core/exulu-tool/configuration.mdx +0 -805
- package/mintlify-docs/core/exulu-tool/introduction.mdx +0 -539
- package/mintlify-docs/core/exulu-variables/api-reference.mdx +0 -699
- package/mintlify-docs/core/exulu-variables/configuration.mdx +0 -736
- package/mintlify-docs/core/exulu-variables/introduction.mdx +0 -511
- package/mintlify-docs/development.mdx +0 -94
- package/mintlify-docs/docs.json +0 -248
- package/mintlify-docs/enterprise-edition.mdx +0 -538
- package/mintlify-docs/essentials/code.mdx +0 -35
- package/mintlify-docs/essentials/images.mdx +0 -59
- package/mintlify-docs/essentials/markdown.mdx +0 -88
- package/mintlify-docs/essentials/navigation.mdx +0 -87
- package/mintlify-docs/essentials/reusable-snippets.mdx +0 -110
- package/mintlify-docs/essentials/settings.mdx +0 -318
- package/mintlify-docs/favicon.svg +0 -3
- package/mintlify-docs/frontend/introduction.mdx +0 -39
- package/mintlify-docs/getting-started.mdx +0 -267
- package/mintlify-docs/guides/custom-agent.mdx +0 -608
- package/mintlify-docs/guides/first-agent.mdx +0 -315
- package/mintlify-docs/images/admin_ui.png +0 -0
- package/mintlify-docs/images/contexts.png +0 -0
- package/mintlify-docs/images/create_agents.png +0 -0
- package/mintlify-docs/images/evals.png +0 -0
- package/mintlify-docs/images/graphql.png +0 -0
- package/mintlify-docs/images/graphql_api.png +0 -0
- package/mintlify-docs/images/hero-dark.png +0 -0
- package/mintlify-docs/images/hero-light.png +0 -0
- package/mintlify-docs/images/hero.png +0 -0
- package/mintlify-docs/images/knowledge_sources.png +0 -0
- package/mintlify-docs/images/mcp.png +0 -0
- package/mintlify-docs/images/scaling.png +0 -0
- package/mintlify-docs/index.mdx +0 -411
- package/mintlify-docs/logo/dark.svg +0 -9
- package/mintlify-docs/logo/light.svg +0 -9
- package/mintlify-docs/partners.mdx +0 -558
- package/mintlify-docs/products.mdx +0 -77
- package/mintlify-docs/snippets/snippet-intro.mdx +0 -4
- package/mintlify-docs/styles.css +0 -207
- package/ngrok.bash +0 -1
- package/ngrok.md +0 -6
- package/ngrok.yml +0 -10
- package/release.config.cjs +0 -15
- package/skills-lock.json +0 -10
- package/types/context-processor.ts +0 -45
- package/types/enums/eval-types.ts +0 -5
- package/types/enums/field-types.ts +0 -1
- package/types/enums/jobs.ts +0 -11
- package/types/enums/statistics.ts +0 -13
- package/types/exulu-table-definition.ts +0 -79
- package/types/file-types.ts +0 -18
- package/types/models/agent-session.ts +0 -27
- package/types/models/agent.ts +0 -68
- package/types/models/context.ts +0 -53
- package/types/models/embedding.ts +0 -17
- package/types/models/eval-run.ts +0 -40
- package/types/models/exulu-agent-tool-config.ts +0 -11
- package/types/models/item.ts +0 -21
- package/types/models/job.ts +0 -8
- package/types/models/project.ts +0 -16
- package/types/models/rate-limiter-rules.ts +0 -7
- package/types/models/test-case.ts +0 -25
- package/types/models/tool.ts +0 -9
- package/types/models/user-role.ts +0 -12
- package/types/models/user.ts +0 -20
- package/types/models/variable.ts +0 -8
- package/types/models/vector-methods.ts +0 -7
- package/types/provider-config.ts +0 -21
- package/types/queue-config.ts +0 -16
- package/types/rbac-rights-modes.ts +0 -1
- package/types/statistics.ts +0 -20
- package/types/workflow.ts +0 -31
- /package/ee/{documents → python/documents}/THIRD_PARTY_LICENSES/docling.txt +0 -0
- /package/ee/{documents/processing → python}/requirements.txt +0 -0
package/dist/index.js
CHANGED
|
@@ -877,7 +877,7 @@ var uploadFile = async (file, fileName, config, options = {}, user, customBucket
|
|
|
877
877
|
if (error.name === "SignatureDoesNotMatch" || error.name === "InvalidAccessKeyId" || error.name === "AccessDenied") {
|
|
878
878
|
if (attempt < maxRetries) {
|
|
879
879
|
const backoffMs = Math.pow(2, attempt) * 1e3;
|
|
880
|
-
await new Promise((
|
|
880
|
+
await new Promise((resolve3) => setTimeout(resolve3, backoffMs));
|
|
881
881
|
s3Client = void 0;
|
|
882
882
|
getS3Client(config);
|
|
883
883
|
continue;
|
|
@@ -4607,7 +4607,7 @@ async function withRetry(generateFn, maxRetries = 3) {
|
|
|
4607
4607
|
if (attempt === maxRetries) {
|
|
4608
4608
|
throw error;
|
|
4609
4609
|
}
|
|
4610
|
-
await new Promise((
|
|
4610
|
+
await new Promise((resolve3) => setTimeout(resolve3, Math.pow(2, attempt) * 1e3));
|
|
4611
4611
|
}
|
|
4612
4612
|
}
|
|
4613
4613
|
throw lastError;
|
|
@@ -5263,8 +5263,8 @@ async function* executeAgenticRetrieval({
|
|
|
5263
5263
|
try {
|
|
5264
5264
|
let finishResolver;
|
|
5265
5265
|
let finishRejector;
|
|
5266
|
-
const finishPromise = new Promise((
|
|
5267
|
-
finishResolver =
|
|
5266
|
+
const finishPromise = new Promise((resolve3, reject) => {
|
|
5267
|
+
finishResolver = resolve3;
|
|
5268
5268
|
finishRejector = reject;
|
|
5269
5269
|
});
|
|
5270
5270
|
const timeoutId = setTimeout(() => {
|
|
@@ -7343,7 +7343,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
|
|
|
7343
7343
|
} = await validateWorkflowPayload(data, providers);
|
|
7344
7344
|
const retries = 3;
|
|
7345
7345
|
let attempts = 0;
|
|
7346
|
-
const promise = new Promise(async (
|
|
7346
|
+
const promise = new Promise(async (resolve3, reject) => {
|
|
7347
7347
|
while (attempts < retries) {
|
|
7348
7348
|
try {
|
|
7349
7349
|
const messages2 = await processUiMessagesFlow({
|
|
@@ -7358,7 +7358,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
|
|
|
7358
7358
|
config,
|
|
7359
7359
|
variables: data.inputs
|
|
7360
7360
|
});
|
|
7361
|
-
|
|
7361
|
+
resolve3(messages2);
|
|
7362
7362
|
break;
|
|
7363
7363
|
} catch (error) {
|
|
7364
7364
|
console.error(
|
|
@@ -7369,7 +7369,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
|
|
|
7369
7369
|
if (attempts >= retries) {
|
|
7370
7370
|
reject(new Error(error instanceof Error ? error.message : String(error)));
|
|
7371
7371
|
}
|
|
7372
|
-
await new Promise((
|
|
7372
|
+
await new Promise((resolve4) => setTimeout((resolve5) => resolve5(true), 2e3));
|
|
7373
7373
|
}
|
|
7374
7374
|
}
|
|
7375
7375
|
});
|
|
@@ -7419,7 +7419,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
|
|
|
7419
7419
|
} = await validateEvalPayload(data, providers);
|
|
7420
7420
|
const retries = 3;
|
|
7421
7421
|
let attempts = 0;
|
|
7422
|
-
const promise = new Promise(async (
|
|
7422
|
+
const promise = new Promise(async (resolve3, reject) => {
|
|
7423
7423
|
while (attempts < retries) {
|
|
7424
7424
|
try {
|
|
7425
7425
|
const messages2 = await processUiMessagesFlow({
|
|
@@ -7433,7 +7433,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
|
|
|
7433
7433
|
tools,
|
|
7434
7434
|
config
|
|
7435
7435
|
});
|
|
7436
|
-
|
|
7436
|
+
resolve3(messages2);
|
|
7437
7437
|
break;
|
|
7438
7438
|
} catch (error) {
|
|
7439
7439
|
console.error(
|
|
@@ -7444,7 +7444,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
|
|
|
7444
7444
|
if (attempts >= retries) {
|
|
7445
7445
|
reject(new Error(error instanceof Error ? error.message : String(error)));
|
|
7446
7446
|
}
|
|
7447
|
-
await new Promise((
|
|
7447
|
+
await new Promise((resolve4) => setTimeout((resolve5) => resolve5(true), 2e3));
|
|
7448
7448
|
}
|
|
7449
7449
|
}
|
|
7450
7450
|
});
|
|
@@ -7859,7 +7859,7 @@ var pollJobResult = async ({
|
|
|
7859
7859
|
attempts++;
|
|
7860
7860
|
const job = await Job.fromId(queue.queue, jobId);
|
|
7861
7861
|
if (!job) {
|
|
7862
|
-
await new Promise((
|
|
7862
|
+
await new Promise((resolve3) => setTimeout((resolve4) => resolve4(true), 2e3));
|
|
7863
7863
|
continue;
|
|
7864
7864
|
}
|
|
7865
7865
|
const elapsedTime = Date.now() - startTime;
|
|
@@ -7889,7 +7889,7 @@ var pollJobResult = async ({
|
|
|
7889
7889
|
console.log(`[EXULU] eval function ${job.id} result: ${result}`);
|
|
7890
7890
|
break;
|
|
7891
7891
|
}
|
|
7892
|
-
await new Promise((
|
|
7892
|
+
await new Promise((resolve3) => setTimeout((resolve4) => resolve4(true), 2e3));
|
|
7893
7893
|
}
|
|
7894
7894
|
return result;
|
|
7895
7895
|
};
|
|
@@ -7997,7 +7997,7 @@ var processUiMessagesFlow = async ({
|
|
|
7997
7997
|
label: agent.name,
|
|
7998
7998
|
trigger: "agent"
|
|
7999
7999
|
};
|
|
8000
|
-
messageHistory = await new Promise(async (
|
|
8000
|
+
messageHistory = await new Promise(async (resolve3, reject) => {
|
|
8001
8001
|
const startTime = Date.now();
|
|
8002
8002
|
try {
|
|
8003
8003
|
const result = await provider.generateStream({
|
|
@@ -8074,7 +8074,7 @@ var processUiMessagesFlow = async ({
|
|
|
8074
8074
|
})
|
|
8075
8075
|
] : []
|
|
8076
8076
|
]);
|
|
8077
|
-
|
|
8077
|
+
resolve3({
|
|
8078
8078
|
messages,
|
|
8079
8079
|
metadata: {
|
|
8080
8080
|
tokens: {
|
|
@@ -8822,7 +8822,7 @@ type PageInfo {
|
|
|
8822
8822
|
} = await validateWorkflowPayload(jobData, providers);
|
|
8823
8823
|
const retries = 3;
|
|
8824
8824
|
let attempts = 0;
|
|
8825
|
-
const promise = new Promise(async (
|
|
8825
|
+
const promise = new Promise(async (resolve3, reject) => {
|
|
8826
8826
|
while (attempts < retries) {
|
|
8827
8827
|
try {
|
|
8828
8828
|
const messages2 = await processUiMessagesFlow({
|
|
@@ -8837,7 +8837,7 @@ type PageInfo {
|
|
|
8837
8837
|
config,
|
|
8838
8838
|
variables: args.variables
|
|
8839
8839
|
});
|
|
8840
|
-
|
|
8840
|
+
resolve3(messages2);
|
|
8841
8841
|
break;
|
|
8842
8842
|
} catch (error) {
|
|
8843
8843
|
console.error(
|
|
@@ -8851,7 +8851,7 @@ type PageInfo {
|
|
|
8851
8851
|
if (attempts >= retries) {
|
|
8852
8852
|
reject(error instanceof Error ? error : new Error(String(error)));
|
|
8853
8853
|
}
|
|
8854
|
-
await new Promise((
|
|
8854
|
+
await new Promise((resolve4) => setTimeout((resolve5) => resolve5(true), 2e3));
|
|
8855
8855
|
}
|
|
8856
8856
|
}
|
|
8857
8857
|
});
|
|
@@ -9104,10 +9104,10 @@ type PageInfo {
|
|
|
9104
9104
|
contexts.map(async (context2) => {
|
|
9105
9105
|
let processor = null;
|
|
9106
9106
|
if (context2.processor) {
|
|
9107
|
-
processor = await new Promise(async (
|
|
9107
|
+
processor = await new Promise(async (resolve3, reject) => {
|
|
9108
9108
|
const config2 = context2.processor?.config;
|
|
9109
9109
|
const queue = await config2?.queue;
|
|
9110
|
-
|
|
9110
|
+
resolve3({
|
|
9111
9111
|
name: context2.processor.name,
|
|
9112
9112
|
description: context2.processor.description,
|
|
9113
9113
|
queue: queue?.queue?.name || void 0,
|
|
@@ -9188,10 +9188,10 @@ type PageInfo {
|
|
|
9188
9188
|
}
|
|
9189
9189
|
let processor = null;
|
|
9190
9190
|
if (data.processor) {
|
|
9191
|
-
processor = await new Promise(async (
|
|
9191
|
+
processor = await new Promise(async (resolve3, reject) => {
|
|
9192
9192
|
const config2 = data.processor?.config;
|
|
9193
9193
|
const queue = await config2?.queue;
|
|
9194
|
-
|
|
9194
|
+
resolve3({
|
|
9195
9195
|
name: data.processor.name,
|
|
9196
9196
|
description: data.processor.description,
|
|
9197
9197
|
queue: queue?.queue?.name || void 0,
|
|
@@ -13086,7 +13086,7 @@ var internetSearchTool = new ExuluTool({
|
|
|
13086
13086
|
} catch (error) {
|
|
13087
13087
|
if (error instanceof Perplexity.RateLimitError && attempt < maxRetries - 1) {
|
|
13088
13088
|
const delay = Math.pow(2, attempt) * 1e3 + Math.random() * 1e3;
|
|
13089
|
-
await new Promise((
|
|
13089
|
+
await new Promise((resolve3) => setTimeout(resolve3, delay));
|
|
13090
13090
|
continue;
|
|
13091
13091
|
}
|
|
13092
13092
|
throw error;
|
|
@@ -15283,7 +15283,7 @@ var create = ({
|
|
|
15283
15283
|
// src/index.ts
|
|
15284
15284
|
import CryptoJS9 from "crypto-js";
|
|
15285
15285
|
|
|
15286
|
-
// ee/markdown.ts
|
|
15286
|
+
// ee/chunking/markdown.ts
|
|
15287
15287
|
var extractPageTag = (text) => {
|
|
15288
15288
|
let match;
|
|
15289
15289
|
let lastPageNumber = void 0;
|
|
@@ -15710,9 +15710,172 @@ var MarkdownChunker = class {
|
|
|
15710
15710
|
}
|
|
15711
15711
|
};
|
|
15712
15712
|
|
|
15713
|
-
//
|
|
15713
|
+
// src/utils/python-setup.ts
|
|
15714
15714
|
import { exec } from "child_process";
|
|
15715
15715
|
import { promisify } from "util";
|
|
15716
|
+
import { resolve, join, dirname } from "path";
|
|
15717
|
+
import { existsSync, readFileSync } from "fs";
|
|
15718
|
+
import { fileURLToPath } from "url";
|
|
15719
|
+
var execAsync = promisify(exec);
|
|
15720
|
+
function getPackageRoot() {
|
|
15721
|
+
const currentFile = fileURLToPath(import.meta.url);
|
|
15722
|
+
let currentDir = dirname(currentFile);
|
|
15723
|
+
let attempts = 0;
|
|
15724
|
+
const maxAttempts = 10;
|
|
15725
|
+
while (attempts < maxAttempts) {
|
|
15726
|
+
const packageJsonPath = join(currentDir, "package.json");
|
|
15727
|
+
if (existsSync(packageJsonPath)) {
|
|
15728
|
+
try {
|
|
15729
|
+
const packageJson = JSON.parse(readFileSync(packageJsonPath, "utf-8"));
|
|
15730
|
+
if (packageJson.name === "@exulu/backend") {
|
|
15731
|
+
return currentDir;
|
|
15732
|
+
}
|
|
15733
|
+
} catch {
|
|
15734
|
+
}
|
|
15735
|
+
}
|
|
15736
|
+
const parentDir = resolve(currentDir, "..");
|
|
15737
|
+
if (parentDir === currentDir) {
|
|
15738
|
+
break;
|
|
15739
|
+
}
|
|
15740
|
+
currentDir = parentDir;
|
|
15741
|
+
attempts++;
|
|
15742
|
+
}
|
|
15743
|
+
const fallback = resolve(dirname(fileURLToPath(import.meta.url)), "../..");
|
|
15744
|
+
return fallback;
|
|
15745
|
+
}
|
|
15746
|
+
function getSetupScriptPath(packageRoot) {
|
|
15747
|
+
return resolve(packageRoot, "ee/python/setup.sh");
|
|
15748
|
+
}
|
|
15749
|
+
function getVenvPath(packageRoot) {
|
|
15750
|
+
return resolve(packageRoot, "ee/python/.venv");
|
|
15751
|
+
}
|
|
15752
|
+
function isPythonEnvironmentSetup(packageRoot) {
|
|
15753
|
+
const root = packageRoot ?? getPackageRoot();
|
|
15754
|
+
const venvPath = getVenvPath(root);
|
|
15755
|
+
const pythonPath = join(venvPath, "bin", "python");
|
|
15756
|
+
return existsSync(venvPath) && existsSync(pythonPath);
|
|
15757
|
+
}
|
|
15758
|
+
async function setupPythonEnvironment(options = {}) {
|
|
15759
|
+
const {
|
|
15760
|
+
packageRoot = getPackageRoot(),
|
|
15761
|
+
force = false,
|
|
15762
|
+
verbose = false,
|
|
15763
|
+
timeout = 6e5
|
|
15764
|
+
// 10 minutes
|
|
15765
|
+
} = options;
|
|
15766
|
+
if (!force && isPythonEnvironmentSetup(packageRoot)) {
|
|
15767
|
+
if (verbose) {
|
|
15768
|
+
console.log("\u2713 Python environment already set up");
|
|
15769
|
+
}
|
|
15770
|
+
return {
|
|
15771
|
+
success: true,
|
|
15772
|
+
message: "Python environment already exists",
|
|
15773
|
+
alreadyExists: true
|
|
15774
|
+
};
|
|
15775
|
+
}
|
|
15776
|
+
const setupScriptPath = getSetupScriptPath(packageRoot);
|
|
15777
|
+
if (!existsSync(setupScriptPath)) {
|
|
15778
|
+
return {
|
|
15779
|
+
success: false,
|
|
15780
|
+
message: `Setup script not found at: ${setupScriptPath}`,
|
|
15781
|
+
alreadyExists: false
|
|
15782
|
+
};
|
|
15783
|
+
}
|
|
15784
|
+
try {
|
|
15785
|
+
if (verbose) {
|
|
15786
|
+
console.log("Setting up Python environment...");
|
|
15787
|
+
}
|
|
15788
|
+
const { stdout, stderr } = await execAsync(`bash "${setupScriptPath}"`, {
|
|
15789
|
+
cwd: packageRoot,
|
|
15790
|
+
timeout,
|
|
15791
|
+
env: {
|
|
15792
|
+
...process.env,
|
|
15793
|
+
// Ensure script can write to the directory
|
|
15794
|
+
PYTHONDONTWRITEBYTECODE: "1"
|
|
15795
|
+
},
|
|
15796
|
+
maxBuffer: 10 * 1024 * 1024
|
|
15797
|
+
// 10MB buffer
|
|
15798
|
+
});
|
|
15799
|
+
const output = stdout + stderr;
|
|
15800
|
+
const versionMatch = output.match(/Python (\d+\.\d+\.\d+)/);
|
|
15801
|
+
const pythonVersion = versionMatch ? versionMatch[1] : void 0;
|
|
15802
|
+
if (verbose) {
|
|
15803
|
+
console.log(output);
|
|
15804
|
+
}
|
|
15805
|
+
return {
|
|
15806
|
+
success: true,
|
|
15807
|
+
message: "Python environment set up successfully",
|
|
15808
|
+
alreadyExists: false,
|
|
15809
|
+
pythonVersion,
|
|
15810
|
+
output
|
|
15811
|
+
};
|
|
15812
|
+
} catch (error) {
|
|
15813
|
+
const errorOutput = error.stdout + error.stderr;
|
|
15814
|
+
return {
|
|
15815
|
+
success: false,
|
|
15816
|
+
message: `Setup failed: ${error.message}`,
|
|
15817
|
+
alreadyExists: false,
|
|
15818
|
+
output: errorOutput
|
|
15819
|
+
};
|
|
15820
|
+
}
|
|
15821
|
+
}
|
|
15822
|
+
function getPythonSetupInstructions() {
|
|
15823
|
+
return `
|
|
15824
|
+
Python environment not set up. Please run one of the following:
|
|
15825
|
+
|
|
15826
|
+
Option 1 (Automatic):
|
|
15827
|
+
import { setupPythonEnvironment } from '@exulu/backend';
|
|
15828
|
+
await setupPythonEnvironment();
|
|
15829
|
+
|
|
15830
|
+
Option 2 (Manual - for package consumers):
|
|
15831
|
+
npx @exulu/backend setup-python
|
|
15832
|
+
|
|
15833
|
+
Option 3 (Manual - for contributors):
|
|
15834
|
+
npm run python:setup
|
|
15835
|
+
|
|
15836
|
+
Requirements:
|
|
15837
|
+
- Python 3.10 or higher must be installed
|
|
15838
|
+
- pip must be available
|
|
15839
|
+
|
|
15840
|
+
Installing Python:
|
|
15841
|
+
- macOS: brew install python@3.12
|
|
15842
|
+
- Ubuntu/Debian: sudo apt-get install python3.12
|
|
15843
|
+
- Windows: Download from https://www.python.org/downloads/
|
|
15844
|
+
`.trim();
|
|
15845
|
+
}
|
|
15846
|
+
async function validatePythonEnvironment(packageRoot) {
|
|
15847
|
+
const root = packageRoot ?? getPackageRoot();
|
|
15848
|
+
const venvPath = getVenvPath(root);
|
|
15849
|
+
const pythonPath = join(venvPath, "bin", "python");
|
|
15850
|
+
if (!existsSync(venvPath)) {
|
|
15851
|
+
return {
|
|
15852
|
+
valid: false,
|
|
15853
|
+
message: getPythonSetupInstructions()
|
|
15854
|
+
};
|
|
15855
|
+
}
|
|
15856
|
+
if (!existsSync(pythonPath)) {
|
|
15857
|
+
return {
|
|
15858
|
+
valid: false,
|
|
15859
|
+
message: "Python virtual environment is corrupted. Please run:\n await setupPythonEnvironment({ force: true })"
|
|
15860
|
+
};
|
|
15861
|
+
}
|
|
15862
|
+
try {
|
|
15863
|
+
await execAsync(`"${pythonPath}" --version`, { cwd: root });
|
|
15864
|
+
return {
|
|
15865
|
+
valid: true,
|
|
15866
|
+
message: "Python environment is valid"
|
|
15867
|
+
};
|
|
15868
|
+
} catch {
|
|
15869
|
+
return {
|
|
15870
|
+
valid: false,
|
|
15871
|
+
message: "Python executable is not working. Please run:\n await setupPythonEnvironment({ force: true })"
|
|
15872
|
+
};
|
|
15873
|
+
}
|
|
15874
|
+
}
|
|
15875
|
+
|
|
15876
|
+
// ee/python/documents/processing/doc_processor.ts
|
|
15877
|
+
import "child_process";
|
|
15878
|
+
import "util";
|
|
15716
15879
|
import * as fs2 from "fs";
|
|
15717
15880
|
import * as path from "path";
|
|
15718
15881
|
import { generateText as generateText3, Output as Output2 } from "ai";
|
|
@@ -15723,7 +15886,131 @@ import * as mammoth from "mammoth";
|
|
|
15723
15886
|
import TurndownService from "turndown";
|
|
15724
15887
|
import WordExtractor from "word-extractor";
|
|
15725
15888
|
import { parseOfficeAsync as parseOfficeAsync2 } from "officeparser";
|
|
15726
|
-
|
|
15889
|
+
|
|
15890
|
+
// src/utils/python-executor.ts
|
|
15891
|
+
import { exec as exec2 } from "child_process";
|
|
15892
|
+
import { promisify as promisify2 } from "util";
|
|
15893
|
+
import { resolve as resolve2, join as join2, dirname as dirname2 } from "path";
|
|
15894
|
+
import { existsSync as existsSync2, readFileSync as readFileSync2 } from "fs";
|
|
15895
|
+
import { fileURLToPath as fileURLToPath2 } from "url";
|
|
15896
|
+
var execAsync2 = promisify2(exec2);
|
|
15897
|
+
function getPackageRoot2() {
|
|
15898
|
+
const currentFile = fileURLToPath2(import.meta.url);
|
|
15899
|
+
let currentDir = dirname2(currentFile);
|
|
15900
|
+
let attempts = 0;
|
|
15901
|
+
const maxAttempts = 10;
|
|
15902
|
+
while (attempts < maxAttempts) {
|
|
15903
|
+
const packageJsonPath = join2(currentDir, "package.json");
|
|
15904
|
+
if (existsSync2(packageJsonPath)) {
|
|
15905
|
+
try {
|
|
15906
|
+
const packageJson = JSON.parse(readFileSync2(packageJsonPath, "utf-8"));
|
|
15907
|
+
if (packageJson.name === "@exulu/backend") {
|
|
15908
|
+
return currentDir;
|
|
15909
|
+
}
|
|
15910
|
+
} catch {
|
|
15911
|
+
}
|
|
15912
|
+
}
|
|
15913
|
+
const parentDir = resolve2(currentDir, "..");
|
|
15914
|
+
if (parentDir === currentDir) {
|
|
15915
|
+
break;
|
|
15916
|
+
}
|
|
15917
|
+
currentDir = parentDir;
|
|
15918
|
+
attempts++;
|
|
15919
|
+
}
|
|
15920
|
+
return resolve2(dirname2(fileURLToPath2(import.meta.url)), "../..");
|
|
15921
|
+
}
|
|
15922
|
+
var PythonEnvironmentError = class extends Error {
|
|
15923
|
+
constructor(message) {
|
|
15924
|
+
super(message);
|
|
15925
|
+
this.name = "PythonEnvironmentError";
|
|
15926
|
+
}
|
|
15927
|
+
};
|
|
15928
|
+
var PythonExecutionError = class extends Error {
|
|
15929
|
+
stdout;
|
|
15930
|
+
stderr;
|
|
15931
|
+
exitCode;
|
|
15932
|
+
constructor(message, stdout, stderr, exitCode) {
|
|
15933
|
+
super(message);
|
|
15934
|
+
this.name = "PythonExecutionError";
|
|
15935
|
+
this.stdout = stdout;
|
|
15936
|
+
this.stderr = stderr;
|
|
15937
|
+
this.exitCode = exitCode;
|
|
15938
|
+
}
|
|
15939
|
+
};
|
|
15940
|
+
function getVenvPath2(packageRoot) {
|
|
15941
|
+
return resolve2(packageRoot, "ee/python/.venv");
|
|
15942
|
+
}
|
|
15943
|
+
function getPythonExecutable(packageRoot) {
|
|
15944
|
+
const venvPath = getVenvPath2(packageRoot);
|
|
15945
|
+
return join2(venvPath, "bin", "python");
|
|
15946
|
+
}
|
|
15947
|
+
async function validatePythonEnvironmentForExecution(packageRoot) {
|
|
15948
|
+
const validation = await validatePythonEnvironment(packageRoot);
|
|
15949
|
+
if (!validation.valid) {
|
|
15950
|
+
throw new PythonEnvironmentError(validation.message);
|
|
15951
|
+
}
|
|
15952
|
+
}
|
|
15953
|
+
async function executePythonScript(config) {
|
|
15954
|
+
const {
|
|
15955
|
+
scriptPath,
|
|
15956
|
+
args = [],
|
|
15957
|
+
packageRoot = getPackageRoot2(),
|
|
15958
|
+
cwd = process.cwd(),
|
|
15959
|
+
timeout = 3e5,
|
|
15960
|
+
// 5 minutes default
|
|
15961
|
+
env = {},
|
|
15962
|
+
validateEnvironment = true
|
|
15963
|
+
} = config;
|
|
15964
|
+
if (validateEnvironment) {
|
|
15965
|
+
await validatePythonEnvironmentForExecution(packageRoot);
|
|
15966
|
+
}
|
|
15967
|
+
const resolvedScriptPath = resolve2(packageRoot, scriptPath);
|
|
15968
|
+
if (!existsSync2(resolvedScriptPath)) {
|
|
15969
|
+
throw new PythonExecutionError(
|
|
15970
|
+
`Python script not found: ${resolvedScriptPath}`,
|
|
15971
|
+
"",
|
|
15972
|
+
"",
|
|
15973
|
+
1
|
|
15974
|
+
);
|
|
15975
|
+
}
|
|
15976
|
+
const pythonExecutable = getPythonExecutable(packageRoot);
|
|
15977
|
+
const quotedArgs = args.map((arg) => {
|
|
15978
|
+
return arg.includes(" ") ? `"${arg}"` : arg;
|
|
15979
|
+
});
|
|
15980
|
+
const command = `${pythonExecutable} "${resolvedScriptPath}" ${quotedArgs.join(" ")}`;
|
|
15981
|
+
try {
|
|
15982
|
+
const { stdout, stderr } = await execAsync2(command, {
|
|
15983
|
+
cwd,
|
|
15984
|
+
timeout,
|
|
15985
|
+
env: {
|
|
15986
|
+
...process.env,
|
|
15987
|
+
...env,
|
|
15988
|
+
// Ensure Python doesn't write bytecode files
|
|
15989
|
+
PYTHONDONTWRITEBYTECODE: "1"
|
|
15990
|
+
},
|
|
15991
|
+
maxBuffer: 10 * 1024 * 1024
|
|
15992
|
+
// 10MB buffer for output
|
|
15993
|
+
});
|
|
15994
|
+
return {
|
|
15995
|
+
stdout: stdout.trim(),
|
|
15996
|
+
stderr: stderr.trim(),
|
|
15997
|
+
exitCode: 0,
|
|
15998
|
+
success: true
|
|
15999
|
+
};
|
|
16000
|
+
} catch (error) {
|
|
16001
|
+
const stdout = error.stdout?.toString() ?? "";
|
|
16002
|
+
const stderr = error.stderr?.toString() ?? "";
|
|
16003
|
+
const exitCode = error.code ?? 1;
|
|
16004
|
+
throw new PythonExecutionError(
|
|
16005
|
+
`Python script execution failed: ${error.message}`,
|
|
16006
|
+
stdout,
|
|
16007
|
+
stderr,
|
|
16008
|
+
exitCode
|
|
16009
|
+
);
|
|
16010
|
+
}
|
|
16011
|
+
}
|
|
16012
|
+
|
|
16013
|
+
// ee/python/documents/processing/doc_processor.ts
|
|
15727
16014
|
async function processDocx(file) {
|
|
15728
16015
|
const html = await mammoth.convertToHtml({ buffer: file });
|
|
15729
16016
|
const turndownService = new TurndownService();
|
|
@@ -15973,16 +16260,24 @@ async function processPdf(buffer, paths, config, verbose = false) {
|
|
|
15973
16260
|
try {
|
|
15974
16261
|
let json;
|
|
15975
16262
|
if (config?.docling) {
|
|
15976
|
-
|
|
15977
|
-
|
|
15978
|
-
|
|
15979
|
-
|
|
15980
|
-
|
|
15981
|
-
|
|
15982
|
-
|
|
15983
|
-
|
|
15984
|
-
|
|
15985
|
-
|
|
16263
|
+
console.log(`[EXULU] Processing document with document_to_markdown.py`);
|
|
16264
|
+
const result = await executePythonScript({
|
|
16265
|
+
scriptPath: "ee/python/documents/processing/document_to_markdown.py",
|
|
16266
|
+
args: [
|
|
16267
|
+
paths.source,
|
|
16268
|
+
"-o",
|
|
16269
|
+
paths.json,
|
|
16270
|
+
"--images-dir",
|
|
16271
|
+
paths.images
|
|
16272
|
+
],
|
|
16273
|
+
timeout: 30 * 60 * 1e3
|
|
16274
|
+
// 30 minutes for large documents
|
|
16275
|
+
});
|
|
16276
|
+
if (result.stderr) {
|
|
16277
|
+
console.log("Processing info:", result.stderr.trim());
|
|
16278
|
+
}
|
|
16279
|
+
if (!result.success) {
|
|
16280
|
+
throw new Error(`Document processing failed: ${result.stderr}`);
|
|
15986
16281
|
}
|
|
15987
16282
|
const jsonContent = await fs2.promises.readFile(paths.json, "utf-8");
|
|
15988
16283
|
json = JSON.parse(jsonContent);
|
|
@@ -15997,10 +16292,10 @@ async function processPdf(buffer, paths, config, verbose = false) {
|
|
|
15997
16292
|
headings: []
|
|
15998
16293
|
}];
|
|
15999
16294
|
}
|
|
16000
|
-
console.log(`
|
|
16295
|
+
console.log(`[EXULU]
|
|
16001
16296
|
\u2713 Document processing completed successfully`);
|
|
16002
|
-
console.log(`
|
|
16003
|
-
console.log(`
|
|
16297
|
+
console.log(`[EXULU] Total pages: ${json.length}`);
|
|
16298
|
+
console.log(`[EXULU] Output file: ${paths.json}`);
|
|
16004
16299
|
if (!config?.docling && config?.vlm?.model) {
|
|
16005
16300
|
console.error("[EXULU] VLM validation is only supported when docling is enabled, skipping validation.");
|
|
16006
16301
|
}
|
|
@@ -16213,6 +16508,12 @@ var ExuluChunkers = {
|
|
|
16213
16508
|
rules: RecursiveRules
|
|
16214
16509
|
}
|
|
16215
16510
|
};
|
|
16511
|
+
var ExuluPython = {
|
|
16512
|
+
setup: setupPythonEnvironment,
|
|
16513
|
+
check: isPythonEnvironmentSetup,
|
|
16514
|
+
validate: validatePythonEnvironment,
|
|
16515
|
+
instructions: getPythonSetupInstructions
|
|
16516
|
+
};
|
|
16216
16517
|
export {
|
|
16217
16518
|
JOB_STATUS_ENUM as EXULU_JOB_STATUS_ENUM,
|
|
16218
16519
|
STATISTICS_TYPE_ENUM as EXULU_STATISTICS_TYPE_ENUM,
|
|
@@ -16228,6 +16529,7 @@ export {
|
|
|
16228
16529
|
ExuluJobs,
|
|
16229
16530
|
ExuluOtel,
|
|
16230
16531
|
ExuluProvider,
|
|
16532
|
+
ExuluPython,
|
|
16231
16533
|
queues as ExuluQueues,
|
|
16232
16534
|
ExuluReranker,
|
|
16233
16535
|
ExuluTool,
|