@exulu/backend 1.48.2 → 1.49.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. package/dist/index.cjs +351 -42
  2. package/dist/index.d.cts +96 -1
  3. package/dist/index.d.ts +96 -1
  4. package/dist/index.js +340 -38
  5. package/ee/{markdown.ts → chunking/markdown.ts} +2 -2
  6. package/ee/python/README.md +295 -0
  7. package/ee/python/documents/processing/README.md +155 -0
  8. package/ee/{documents → python/documents}/processing/doc_processor.ts +25 -17
  9. package/ee/{documents/processing/pdf_to_markdown.py → python/documents/processing/document_to_markdown.py} +3 -10
  10. package/ee/python/setup.sh +180 -0
  11. package/package.json +14 -3
  12. package/scripts/postinstall.cjs +149 -0
  13. package/.agents/skills/mintlify/SKILL.md +0 -347
  14. package/.editorconfig +0 -15
  15. package/.eslintrc.json +0 -52
  16. package/.github/workflows/release-backend.yml +0 -38
  17. package/.husky/commit-msg +0 -1
  18. package/.jscpd.json +0 -18
  19. package/.mcp.json +0 -25
  20. package/.nvmrc +0 -1
  21. package/.prettierignore +0 -5
  22. package/.prettierrc.json +0 -12
  23. package/CHANGELOG.md +0 -8
  24. package/SECURITY.md +0 -5
  25. package/commitlint.config.js +0 -4
  26. package/devops/documentation/patch-older-releases.md +0 -42
  27. package/ee/documents/processing/build_pdf_processor.sh +0 -35
  28. package/ee/documents/processing/chunk_markdown.py +0 -263
  29. package/ee/documents/processing/pdf_processor.spec +0 -115
  30. package/eslint.config.js +0 -88
  31. package/jest.config.ts +0 -25
  32. package/mintlify-docs/.mintignore +0 -7
  33. package/mintlify-docs/AGENTS.md +0 -33
  34. package/mintlify-docs/CLAUDE.MD +0 -50
  35. package/mintlify-docs/CONTRIBUTING.md +0 -32
  36. package/mintlify-docs/LICENSE +0 -21
  37. package/mintlify-docs/README.md +0 -55
  38. package/mintlify-docs/ai-tools/claude-code.mdx +0 -43
  39. package/mintlify-docs/ai-tools/cursor.mdx +0 -39
  40. package/mintlify-docs/ai-tools/windsurf.mdx +0 -39
  41. package/mintlify-docs/api-reference/core-types/agent-types.mdx +0 -110
  42. package/mintlify-docs/api-reference/core-types/analytics-types.mdx +0 -95
  43. package/mintlify-docs/api-reference/core-types/configuration-types.mdx +0 -83
  44. package/mintlify-docs/api-reference/core-types/evaluation-types.mdx +0 -106
  45. package/mintlify-docs/api-reference/core-types/job-types.mdx +0 -135
  46. package/mintlify-docs/api-reference/core-types/overview.mdx +0 -73
  47. package/mintlify-docs/api-reference/core-types/prompt-types.mdx +0 -102
  48. package/mintlify-docs/api-reference/core-types/rbac-types.mdx +0 -163
  49. package/mintlify-docs/api-reference/core-types/session-types.mdx +0 -77
  50. package/mintlify-docs/api-reference/core-types/user-management.mdx +0 -112
  51. package/mintlify-docs/api-reference/core-types/workflow-types.mdx +0 -88
  52. package/mintlify-docs/api-reference/core-types.mdx +0 -585
  53. package/mintlify-docs/api-reference/dynamic-types.mdx +0 -851
  54. package/mintlify-docs/api-reference/endpoint/create.mdx +0 -4
  55. package/mintlify-docs/api-reference/endpoint/delete.mdx +0 -4
  56. package/mintlify-docs/api-reference/endpoint/get.mdx +0 -4
  57. package/mintlify-docs/api-reference/endpoint/webhook.mdx +0 -4
  58. package/mintlify-docs/api-reference/introduction.mdx +0 -661
  59. package/mintlify-docs/api-reference/mutations.mdx +0 -1012
  60. package/mintlify-docs/api-reference/openapi.json +0 -217
  61. package/mintlify-docs/api-reference/queries.mdx +0 -1154
  62. package/mintlify-docs/backend/introduction.mdx +0 -218
  63. package/mintlify-docs/changelog.mdx +0 -387
  64. package/mintlify-docs/community-edition.mdx +0 -304
  65. package/mintlify-docs/core/exulu-agent/api-reference.mdx +0 -894
  66. package/mintlify-docs/core/exulu-agent/configuration.mdx +0 -690
  67. package/mintlify-docs/core/exulu-agent/introduction.mdx +0 -552
  68. package/mintlify-docs/core/exulu-app/api-reference.mdx +0 -481
  69. package/mintlify-docs/core/exulu-app/configuration.mdx +0 -319
  70. package/mintlify-docs/core/exulu-app/introduction.mdx +0 -117
  71. package/mintlify-docs/core/exulu-authentication.mdx +0 -810
  72. package/mintlify-docs/core/exulu-chunkers/api-reference.mdx +0 -1011
  73. package/mintlify-docs/core/exulu-chunkers/configuration.mdx +0 -596
  74. package/mintlify-docs/core/exulu-chunkers/introduction.mdx +0 -403
  75. package/mintlify-docs/core/exulu-context/api-reference.mdx +0 -911
  76. package/mintlify-docs/core/exulu-context/configuration.mdx +0 -648
  77. package/mintlify-docs/core/exulu-context/introduction.mdx +0 -394
  78. package/mintlify-docs/core/exulu-database.mdx +0 -811
  79. package/mintlify-docs/core/exulu-default-agents.mdx +0 -545
  80. package/mintlify-docs/core/exulu-eval/api-reference.mdx +0 -772
  81. package/mintlify-docs/core/exulu-eval/configuration.mdx +0 -680
  82. package/mintlify-docs/core/exulu-eval/introduction.mdx +0 -459
  83. package/mintlify-docs/core/exulu-logging.mdx +0 -464
  84. package/mintlify-docs/core/exulu-otel.mdx +0 -670
  85. package/mintlify-docs/core/exulu-queues/api-reference.mdx +0 -648
  86. package/mintlify-docs/core/exulu-queues/configuration.mdx +0 -650
  87. package/mintlify-docs/core/exulu-queues/introduction.mdx +0 -474
  88. package/mintlify-docs/core/exulu-reranker/api-reference.mdx +0 -630
  89. package/mintlify-docs/core/exulu-reranker/configuration.mdx +0 -663
  90. package/mintlify-docs/core/exulu-reranker/introduction.mdx +0 -516
  91. package/mintlify-docs/core/exulu-tool/api-reference.mdx +0 -723
  92. package/mintlify-docs/core/exulu-tool/configuration.mdx +0 -805
  93. package/mintlify-docs/core/exulu-tool/introduction.mdx +0 -539
  94. package/mintlify-docs/core/exulu-variables/api-reference.mdx +0 -699
  95. package/mintlify-docs/core/exulu-variables/configuration.mdx +0 -736
  96. package/mintlify-docs/core/exulu-variables/introduction.mdx +0 -511
  97. package/mintlify-docs/development.mdx +0 -94
  98. package/mintlify-docs/docs.json +0 -248
  99. package/mintlify-docs/enterprise-edition.mdx +0 -538
  100. package/mintlify-docs/essentials/code.mdx +0 -35
  101. package/mintlify-docs/essentials/images.mdx +0 -59
  102. package/mintlify-docs/essentials/markdown.mdx +0 -88
  103. package/mintlify-docs/essentials/navigation.mdx +0 -87
  104. package/mintlify-docs/essentials/reusable-snippets.mdx +0 -110
  105. package/mintlify-docs/essentials/settings.mdx +0 -318
  106. package/mintlify-docs/favicon.svg +0 -3
  107. package/mintlify-docs/frontend/introduction.mdx +0 -39
  108. package/mintlify-docs/getting-started.mdx +0 -267
  109. package/mintlify-docs/guides/custom-agent.mdx +0 -608
  110. package/mintlify-docs/guides/first-agent.mdx +0 -315
  111. package/mintlify-docs/images/admin_ui.png +0 -0
  112. package/mintlify-docs/images/contexts.png +0 -0
  113. package/mintlify-docs/images/create_agents.png +0 -0
  114. package/mintlify-docs/images/evals.png +0 -0
  115. package/mintlify-docs/images/graphql.png +0 -0
  116. package/mintlify-docs/images/graphql_api.png +0 -0
  117. package/mintlify-docs/images/hero-dark.png +0 -0
  118. package/mintlify-docs/images/hero-light.png +0 -0
  119. package/mintlify-docs/images/hero.png +0 -0
  120. package/mintlify-docs/images/knowledge_sources.png +0 -0
  121. package/mintlify-docs/images/mcp.png +0 -0
  122. package/mintlify-docs/images/scaling.png +0 -0
  123. package/mintlify-docs/index.mdx +0 -411
  124. package/mintlify-docs/logo/dark.svg +0 -9
  125. package/mintlify-docs/logo/light.svg +0 -9
  126. package/mintlify-docs/partners.mdx +0 -558
  127. package/mintlify-docs/products.mdx +0 -77
  128. package/mintlify-docs/snippets/snippet-intro.mdx +0 -4
  129. package/mintlify-docs/styles.css +0 -207
  130. package/ngrok.bash +0 -1
  131. package/ngrok.md +0 -6
  132. package/ngrok.yml +0 -10
  133. package/release.config.cjs +0 -15
  134. package/skills-lock.json +0 -10
  135. package/types/context-processor.ts +0 -45
  136. package/types/enums/eval-types.ts +0 -5
  137. package/types/enums/field-types.ts +0 -1
  138. package/types/enums/jobs.ts +0 -11
  139. package/types/enums/statistics.ts +0 -13
  140. package/types/exulu-table-definition.ts +0 -79
  141. package/types/file-types.ts +0 -18
  142. package/types/models/agent-session.ts +0 -27
  143. package/types/models/agent.ts +0 -68
  144. package/types/models/context.ts +0 -53
  145. package/types/models/embedding.ts +0 -17
  146. package/types/models/eval-run.ts +0 -40
  147. package/types/models/exulu-agent-tool-config.ts +0 -11
  148. package/types/models/item.ts +0 -21
  149. package/types/models/job.ts +0 -8
  150. package/types/models/project.ts +0 -16
  151. package/types/models/rate-limiter-rules.ts +0 -7
  152. package/types/models/test-case.ts +0 -25
  153. package/types/models/tool.ts +0 -9
  154. package/types/models/user-role.ts +0 -12
  155. package/types/models/user.ts +0 -20
  156. package/types/models/variable.ts +0 -8
  157. package/types/models/vector-methods.ts +0 -7
  158. package/types/provider-config.ts +0 -21
  159. package/types/queue-config.ts +0 -16
  160. package/types/rbac-rights-modes.ts +0 -1
  161. package/types/statistics.ts +0 -20
  162. package/types/workflow.ts +0 -31
  163. /package/ee/{documents → python/documents}/THIRD_PARTY_LICENSES/docling.txt +0 -0
  164. /package/ee/{documents/processing → python}/requirements.txt +0 -0
package/dist/index.cjs CHANGED
@@ -44,12 +44,19 @@ __export(index_exports, {
44
44
  ExuluJobs: () => ExuluJobs,
45
45
  ExuluOtel: () => ExuluOtel,
46
46
  ExuluProvider: () => ExuluProvider,
47
+ ExuluPython: () => ExuluPython,
47
48
  ExuluQueues: () => queues,
48
49
  ExuluReranker: () => ExuluReranker,
49
50
  ExuluTool: () => ExuluTool,
50
51
  ExuluVariables: () => ExuluVariables
51
52
  });
52
53
  module.exports = __toCommonJS(index_exports);
54
+
55
+ // node_modules/tsup/assets/cjs_shims.js
56
+ var getImportMetaUrl = () => typeof document === "undefined" ? new URL(`file:${__filename}`).href : document.currentScript && document.currentScript.src || new URL("main.js", document.baseURI).href;
57
+ var importMetaUrl = /* @__PURE__ */ getImportMetaUrl();
58
+
59
+ // src/index.ts
53
60
  var import_config = require("dotenv/config");
54
61
 
55
62
  // src/redis/client.ts
@@ -911,7 +918,7 @@ var uploadFile = async (file, fileName, config, options = {}, user, customBucket
911
918
  if (error.name === "SignatureDoesNotMatch" || error.name === "InvalidAccessKeyId" || error.name === "AccessDenied") {
912
919
  if (attempt < maxRetries) {
913
920
  const backoffMs = Math.pow(2, attempt) * 1e3;
914
- await new Promise((resolve) => setTimeout(resolve, backoffMs));
921
+ await new Promise((resolve3) => setTimeout(resolve3, backoffMs));
915
922
  s3Client = void 0;
916
923
  getS3Client(config);
917
924
  continue;
@@ -4641,7 +4648,7 @@ async function withRetry(generateFn, maxRetries = 3) {
4641
4648
  if (attempt === maxRetries) {
4642
4649
  throw error;
4643
4650
  }
4644
- await new Promise((resolve) => setTimeout(resolve, Math.pow(2, attempt) * 1e3));
4651
+ await new Promise((resolve3) => setTimeout(resolve3, Math.pow(2, attempt) * 1e3));
4645
4652
  }
4646
4653
  }
4647
4654
  throw lastError;
@@ -5297,8 +5304,8 @@ async function* executeAgenticRetrieval({
5297
5304
  try {
5298
5305
  let finishResolver;
5299
5306
  let finishRejector;
5300
- const finishPromise = new Promise((resolve, reject) => {
5301
- finishResolver = resolve;
5307
+ const finishPromise = new Promise((resolve3, reject) => {
5308
+ finishResolver = resolve3;
5302
5309
  finishRejector = reject;
5303
5310
  });
5304
5311
  const timeoutId = setTimeout(() => {
@@ -7377,7 +7384,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
7377
7384
  } = await validateWorkflowPayload(data, providers);
7378
7385
  const retries = 3;
7379
7386
  let attempts = 0;
7380
- const promise = new Promise(async (resolve, reject) => {
7387
+ const promise = new Promise(async (resolve3, reject) => {
7381
7388
  while (attempts < retries) {
7382
7389
  try {
7383
7390
  const messages2 = await processUiMessagesFlow({
@@ -7392,7 +7399,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
7392
7399
  config,
7393
7400
  variables: data.inputs
7394
7401
  });
7395
- resolve(messages2);
7402
+ resolve3(messages2);
7396
7403
  break;
7397
7404
  } catch (error) {
7398
7405
  console.error(
@@ -7403,7 +7410,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
7403
7410
  if (attempts >= retries) {
7404
7411
  reject(new Error(error instanceof Error ? error.message : String(error)));
7405
7412
  }
7406
- await new Promise((resolve2) => setTimeout((resolve3) => resolve3(true), 2e3));
7413
+ await new Promise((resolve4) => setTimeout((resolve5) => resolve5(true), 2e3));
7407
7414
  }
7408
7415
  }
7409
7416
  });
@@ -7453,7 +7460,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
7453
7460
  } = await validateEvalPayload(data, providers);
7454
7461
  const retries = 3;
7455
7462
  let attempts = 0;
7456
- const promise = new Promise(async (resolve, reject) => {
7463
+ const promise = new Promise(async (resolve3, reject) => {
7457
7464
  while (attempts < retries) {
7458
7465
  try {
7459
7466
  const messages2 = await processUiMessagesFlow({
@@ -7467,7 +7474,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
7467
7474
  tools,
7468
7475
  config
7469
7476
  });
7470
- resolve(messages2);
7477
+ resolve3(messages2);
7471
7478
  break;
7472
7479
  } catch (error) {
7473
7480
  console.error(
@@ -7478,7 +7485,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
7478
7485
  if (attempts >= retries) {
7479
7486
  reject(new Error(error instanceof Error ? error.message : String(error)));
7480
7487
  }
7481
- await new Promise((resolve2) => setTimeout((resolve3) => resolve3(true), 2e3));
7488
+ await new Promise((resolve4) => setTimeout((resolve5) => resolve5(true), 2e3));
7482
7489
  }
7483
7490
  }
7484
7491
  });
@@ -7893,7 +7900,7 @@ var pollJobResult = async ({
7893
7900
  attempts++;
7894
7901
  const job = await import_bullmq3.Job.fromId(queue.queue, jobId);
7895
7902
  if (!job) {
7896
- await new Promise((resolve) => setTimeout((resolve2) => resolve2(true), 2e3));
7903
+ await new Promise((resolve3) => setTimeout((resolve4) => resolve4(true), 2e3));
7897
7904
  continue;
7898
7905
  }
7899
7906
  const elapsedTime = Date.now() - startTime;
@@ -7923,7 +7930,7 @@ var pollJobResult = async ({
7923
7930
  console.log(`[EXULU] eval function ${job.id} result: ${result}`);
7924
7931
  break;
7925
7932
  }
7926
- await new Promise((resolve) => setTimeout((resolve2) => resolve2(true), 2e3));
7933
+ await new Promise((resolve3) => setTimeout((resolve4) => resolve4(true), 2e3));
7927
7934
  }
7928
7935
  return result;
7929
7936
  };
@@ -8031,7 +8038,7 @@ var processUiMessagesFlow = async ({
8031
8038
  label: agent.name,
8032
8039
  trigger: "agent"
8033
8040
  };
8034
- messageHistory = await new Promise(async (resolve, reject) => {
8041
+ messageHistory = await new Promise(async (resolve3, reject) => {
8035
8042
  const startTime = Date.now();
8036
8043
  try {
8037
8044
  const result = await provider.generateStream({
@@ -8108,7 +8115,7 @@ var processUiMessagesFlow = async ({
8108
8115
  })
8109
8116
  ] : []
8110
8117
  ]);
8111
- resolve({
8118
+ resolve3({
8112
8119
  messages,
8113
8120
  metadata: {
8114
8121
  tokens: {
@@ -8856,7 +8863,7 @@ type PageInfo {
8856
8863
  } = await validateWorkflowPayload(jobData, providers);
8857
8864
  const retries = 3;
8858
8865
  let attempts = 0;
8859
- const promise = new Promise(async (resolve, reject) => {
8866
+ const promise = new Promise(async (resolve3, reject) => {
8860
8867
  while (attempts < retries) {
8861
8868
  try {
8862
8869
  const messages2 = await processUiMessagesFlow({
@@ -8871,7 +8878,7 @@ type PageInfo {
8871
8878
  config,
8872
8879
  variables: args.variables
8873
8880
  });
8874
- resolve(messages2);
8881
+ resolve3(messages2);
8875
8882
  break;
8876
8883
  } catch (error) {
8877
8884
  console.error(
@@ -8885,7 +8892,7 @@ type PageInfo {
8885
8892
  if (attempts >= retries) {
8886
8893
  reject(error instanceof Error ? error : new Error(String(error)));
8887
8894
  }
8888
- await new Promise((resolve2) => setTimeout((resolve3) => resolve3(true), 2e3));
8895
+ await new Promise((resolve4) => setTimeout((resolve5) => resolve5(true), 2e3));
8889
8896
  }
8890
8897
  }
8891
8898
  });
@@ -9138,10 +9145,10 @@ type PageInfo {
9138
9145
  contexts.map(async (context2) => {
9139
9146
  let processor = null;
9140
9147
  if (context2.processor) {
9141
- processor = await new Promise(async (resolve, reject) => {
9148
+ processor = await new Promise(async (resolve3, reject) => {
9142
9149
  const config2 = context2.processor?.config;
9143
9150
  const queue = await config2?.queue;
9144
- resolve({
9151
+ resolve3({
9145
9152
  name: context2.processor.name,
9146
9153
  description: context2.processor.description,
9147
9154
  queue: queue?.queue?.name || void 0,
@@ -9222,10 +9229,10 @@ type PageInfo {
9222
9229
  }
9223
9230
  let processor = null;
9224
9231
  if (data.processor) {
9225
- processor = await new Promise(async (resolve, reject) => {
9232
+ processor = await new Promise(async (resolve3, reject) => {
9226
9233
  const config2 = data.processor?.config;
9227
9234
  const queue = await config2?.queue;
9228
- resolve({
9235
+ resolve3({
9229
9236
  name: data.processor.name,
9230
9237
  description: data.processor.description,
9231
9238
  queue: queue?.queue?.name || void 0,
@@ -13113,7 +13120,7 @@ var internetSearchTool = new ExuluTool({
13113
13120
  } catch (error) {
13114
13121
  if (error instanceof import_perplexity_ai.default.RateLimitError && attempt < maxRetries - 1) {
13115
13122
  const delay = Math.pow(2, attempt) * 1e3 + Math.random() * 1e3;
13116
- await new Promise((resolve) => setTimeout(resolve, delay));
13123
+ await new Promise((resolve3) => setTimeout(resolve3, delay));
13117
13124
  continue;
13118
13125
  }
13119
13126
  throw error;
@@ -15310,7 +15317,7 @@ var create = ({
15310
15317
  // src/index.ts
15311
15318
  var import_crypto_js9 = __toESM(require("crypto-js"), 1);
15312
15319
 
15313
- // ee/markdown.ts
15320
+ // ee/chunking/markdown.ts
15314
15321
  var extractPageTag = (text) => {
15315
15322
  let match;
15316
15323
  let lastPageNumber = void 0;
@@ -15737,9 +15744,172 @@ var MarkdownChunker = class {
15737
15744
  }
15738
15745
  };
15739
15746
 
15740
- // ee/documents/processing/doc_processor.ts
15747
+ // src/utils/python-setup.ts
15741
15748
  var import_child_process = require("child_process");
15742
15749
  var import_util2 = require("util");
15750
+ var import_path = require("path");
15751
+ var import_fs2 = require("fs");
15752
+ var import_url = require("url");
15753
+ var execAsync = (0, import_util2.promisify)(import_child_process.exec);
15754
+ function getPackageRoot() {
15755
+ const currentFile = (0, import_url.fileURLToPath)(importMetaUrl);
15756
+ let currentDir = (0, import_path.dirname)(currentFile);
15757
+ let attempts = 0;
15758
+ const maxAttempts = 10;
15759
+ while (attempts < maxAttempts) {
15760
+ const packageJsonPath = (0, import_path.join)(currentDir, "package.json");
15761
+ if ((0, import_fs2.existsSync)(packageJsonPath)) {
15762
+ try {
15763
+ const packageJson = JSON.parse((0, import_fs2.readFileSync)(packageJsonPath, "utf-8"));
15764
+ if (packageJson.name === "@exulu/backend") {
15765
+ return currentDir;
15766
+ }
15767
+ } catch {
15768
+ }
15769
+ }
15770
+ const parentDir = (0, import_path.resolve)(currentDir, "..");
15771
+ if (parentDir === currentDir) {
15772
+ break;
15773
+ }
15774
+ currentDir = parentDir;
15775
+ attempts++;
15776
+ }
15777
+ const fallback = (0, import_path.resolve)((0, import_path.dirname)((0, import_url.fileURLToPath)(importMetaUrl)), "../..");
15778
+ return fallback;
15779
+ }
15780
+ function getSetupScriptPath(packageRoot) {
15781
+ return (0, import_path.resolve)(packageRoot, "ee/python/setup.sh");
15782
+ }
15783
+ function getVenvPath(packageRoot) {
15784
+ return (0, import_path.resolve)(packageRoot, "ee/python/.venv");
15785
+ }
15786
+ function isPythonEnvironmentSetup(packageRoot) {
15787
+ const root = packageRoot ?? getPackageRoot();
15788
+ const venvPath = getVenvPath(root);
15789
+ const pythonPath = (0, import_path.join)(venvPath, "bin", "python");
15790
+ return (0, import_fs2.existsSync)(venvPath) && (0, import_fs2.existsSync)(pythonPath);
15791
+ }
15792
+ async function setupPythonEnvironment(options = {}) {
15793
+ const {
15794
+ packageRoot = getPackageRoot(),
15795
+ force = false,
15796
+ verbose = false,
15797
+ timeout = 6e5
15798
+ // 10 minutes
15799
+ } = options;
15800
+ if (!force && isPythonEnvironmentSetup(packageRoot)) {
15801
+ if (verbose) {
15802
+ console.log("\u2713 Python environment already set up");
15803
+ }
15804
+ return {
15805
+ success: true,
15806
+ message: "Python environment already exists",
15807
+ alreadyExists: true
15808
+ };
15809
+ }
15810
+ const setupScriptPath = getSetupScriptPath(packageRoot);
15811
+ if (!(0, import_fs2.existsSync)(setupScriptPath)) {
15812
+ return {
15813
+ success: false,
15814
+ message: `Setup script not found at: ${setupScriptPath}`,
15815
+ alreadyExists: false
15816
+ };
15817
+ }
15818
+ try {
15819
+ if (verbose) {
15820
+ console.log("Setting up Python environment...");
15821
+ }
15822
+ const { stdout, stderr } = await execAsync(`bash "${setupScriptPath}"`, {
15823
+ cwd: packageRoot,
15824
+ timeout,
15825
+ env: {
15826
+ ...process.env,
15827
+ // Ensure script can write to the directory
15828
+ PYTHONDONTWRITEBYTECODE: "1"
15829
+ },
15830
+ maxBuffer: 10 * 1024 * 1024
15831
+ // 10MB buffer
15832
+ });
15833
+ const output = stdout + stderr;
15834
+ const versionMatch = output.match(/Python (\d+\.\d+\.\d+)/);
15835
+ const pythonVersion = versionMatch ? versionMatch[1] : void 0;
15836
+ if (verbose) {
15837
+ console.log(output);
15838
+ }
15839
+ return {
15840
+ success: true,
15841
+ message: "Python environment set up successfully",
15842
+ alreadyExists: false,
15843
+ pythonVersion,
15844
+ output
15845
+ };
15846
+ } catch (error) {
15847
+ const errorOutput = error.stdout + error.stderr;
15848
+ return {
15849
+ success: false,
15850
+ message: `Setup failed: ${error.message}`,
15851
+ alreadyExists: false,
15852
+ output: errorOutput
15853
+ };
15854
+ }
15855
+ }
15856
+ function getPythonSetupInstructions() {
15857
+ return `
15858
+ Python environment not set up. Please run one of the following:
15859
+
15860
+ Option 1 (Automatic):
15861
+ import { setupPythonEnvironment } from '@exulu/backend';
15862
+ await setupPythonEnvironment();
15863
+
15864
+ Option 2 (Manual - for package consumers):
15865
+ npx @exulu/backend setup-python
15866
+
15867
+ Option 3 (Manual - for contributors):
15868
+ npm run python:setup
15869
+
15870
+ Requirements:
15871
+ - Python 3.10 or higher must be installed
15872
+ - pip must be available
15873
+
15874
+ Installing Python:
15875
+ - macOS: brew install python@3.12
15876
+ - Ubuntu/Debian: sudo apt-get install python3.12
15877
+ - Windows: Download from https://www.python.org/downloads/
15878
+ `.trim();
15879
+ }
15880
+ async function validatePythonEnvironment(packageRoot) {
15881
+ const root = packageRoot ?? getPackageRoot();
15882
+ const venvPath = getVenvPath(root);
15883
+ const pythonPath = (0, import_path.join)(venvPath, "bin", "python");
15884
+ if (!(0, import_fs2.existsSync)(venvPath)) {
15885
+ return {
15886
+ valid: false,
15887
+ message: getPythonSetupInstructions()
15888
+ };
15889
+ }
15890
+ if (!(0, import_fs2.existsSync)(pythonPath)) {
15891
+ return {
15892
+ valid: false,
15893
+ message: "Python virtual environment is corrupted. Please run:\n await setupPythonEnvironment({ force: true })"
15894
+ };
15895
+ }
15896
+ try {
15897
+ await execAsync(`"${pythonPath}" --version`, { cwd: root });
15898
+ return {
15899
+ valid: true,
15900
+ message: "Python environment is valid"
15901
+ };
15902
+ } catch {
15903
+ return {
15904
+ valid: false,
15905
+ message: "Python executable is not working. Please run:\n await setupPythonEnvironment({ force: true })"
15906
+ };
15907
+ }
15908
+ }
15909
+
15910
+ // ee/python/documents/processing/doc_processor.ts
15911
+ var import_child_process3 = require("child_process");
15912
+ var import_util4 = require("util");
15743
15913
  var fs2 = __toESM(require("fs"), 1);
15744
15914
  var path = __toESM(require("path"), 1);
15745
15915
  var import_ai7 = require("ai");
@@ -15750,7 +15920,131 @@ var mammoth = __toESM(require("mammoth"), 1);
15750
15920
  var import_turndown = __toESM(require("turndown"), 1);
15751
15921
  var import_word_extractor = __toESM(require("word-extractor"), 1);
15752
15922
  var import_officeparser2 = require("officeparser");
15753
- var execAsync = (0, import_util2.promisify)(import_child_process.exec);
15923
+
15924
+ // src/utils/python-executor.ts
15925
+ var import_child_process2 = require("child_process");
15926
+ var import_util3 = require("util");
15927
+ var import_path2 = require("path");
15928
+ var import_fs3 = require("fs");
15929
+ var import_url2 = require("url");
15930
+ var execAsync2 = (0, import_util3.promisify)(import_child_process2.exec);
15931
+ function getPackageRoot2() {
15932
+ const currentFile = (0, import_url2.fileURLToPath)(importMetaUrl);
15933
+ let currentDir = (0, import_path2.dirname)(currentFile);
15934
+ let attempts = 0;
15935
+ const maxAttempts = 10;
15936
+ while (attempts < maxAttempts) {
15937
+ const packageJsonPath = (0, import_path2.join)(currentDir, "package.json");
15938
+ if ((0, import_fs3.existsSync)(packageJsonPath)) {
15939
+ try {
15940
+ const packageJson = JSON.parse((0, import_fs3.readFileSync)(packageJsonPath, "utf-8"));
15941
+ if (packageJson.name === "@exulu/backend") {
15942
+ return currentDir;
15943
+ }
15944
+ } catch {
15945
+ }
15946
+ }
15947
+ const parentDir = (0, import_path2.resolve)(currentDir, "..");
15948
+ if (parentDir === currentDir) {
15949
+ break;
15950
+ }
15951
+ currentDir = parentDir;
15952
+ attempts++;
15953
+ }
15954
+ return (0, import_path2.resolve)((0, import_path2.dirname)((0, import_url2.fileURLToPath)(importMetaUrl)), "../..");
15955
+ }
15956
+ var PythonEnvironmentError = class extends Error {
15957
+ constructor(message) {
15958
+ super(message);
15959
+ this.name = "PythonEnvironmentError";
15960
+ }
15961
+ };
15962
+ var PythonExecutionError = class extends Error {
15963
+ stdout;
15964
+ stderr;
15965
+ exitCode;
15966
+ constructor(message, stdout, stderr, exitCode) {
15967
+ super(message);
15968
+ this.name = "PythonExecutionError";
15969
+ this.stdout = stdout;
15970
+ this.stderr = stderr;
15971
+ this.exitCode = exitCode;
15972
+ }
15973
+ };
15974
+ function getVenvPath2(packageRoot) {
15975
+ return (0, import_path2.resolve)(packageRoot, "ee/python/.venv");
15976
+ }
15977
+ function getPythonExecutable(packageRoot) {
15978
+ const venvPath = getVenvPath2(packageRoot);
15979
+ return (0, import_path2.join)(venvPath, "bin", "python");
15980
+ }
15981
+ async function validatePythonEnvironmentForExecution(packageRoot) {
15982
+ const validation = await validatePythonEnvironment(packageRoot);
15983
+ if (!validation.valid) {
15984
+ throw new PythonEnvironmentError(validation.message);
15985
+ }
15986
+ }
15987
+ async function executePythonScript(config) {
15988
+ const {
15989
+ scriptPath,
15990
+ args = [],
15991
+ packageRoot = getPackageRoot2(),
15992
+ cwd = process.cwd(),
15993
+ timeout = 3e5,
15994
+ // 5 minutes default
15995
+ env = {},
15996
+ validateEnvironment = true
15997
+ } = config;
15998
+ if (validateEnvironment) {
15999
+ await validatePythonEnvironmentForExecution(packageRoot);
16000
+ }
16001
+ const resolvedScriptPath = (0, import_path2.resolve)(packageRoot, scriptPath);
16002
+ if (!(0, import_fs3.existsSync)(resolvedScriptPath)) {
16003
+ throw new PythonExecutionError(
16004
+ `Python script not found: ${resolvedScriptPath}`,
16005
+ "",
16006
+ "",
16007
+ 1
16008
+ );
16009
+ }
16010
+ const pythonExecutable = getPythonExecutable(packageRoot);
16011
+ const quotedArgs = args.map((arg) => {
16012
+ return arg.includes(" ") ? `"${arg}"` : arg;
16013
+ });
16014
+ const command = `${pythonExecutable} "${resolvedScriptPath}" ${quotedArgs.join(" ")}`;
16015
+ try {
16016
+ const { stdout, stderr } = await execAsync2(command, {
16017
+ cwd,
16018
+ timeout,
16019
+ env: {
16020
+ ...process.env,
16021
+ ...env,
16022
+ // Ensure Python doesn't write bytecode files
16023
+ PYTHONDONTWRITEBYTECODE: "1"
16024
+ },
16025
+ maxBuffer: 10 * 1024 * 1024
16026
+ // 10MB buffer for output
16027
+ });
16028
+ return {
16029
+ stdout: stdout.trim(),
16030
+ stderr: stderr.trim(),
16031
+ exitCode: 0,
16032
+ success: true
16033
+ };
16034
+ } catch (error) {
16035
+ const stdout = error.stdout?.toString() ?? "";
16036
+ const stderr = error.stderr?.toString() ?? "";
16037
+ const exitCode = error.code ?? 1;
16038
+ throw new PythonExecutionError(
16039
+ `Python script execution failed: ${error.message}`,
16040
+ stdout,
16041
+ stderr,
16042
+ exitCode
16043
+ );
16044
+ }
16045
+ }
16046
+
16047
+ // ee/python/documents/processing/doc_processor.ts
15754
16048
  async function processDocx(file) {
15755
16049
  const html = await mammoth.convertToHtml({ buffer: file });
15756
16050
  const turndownService = new import_turndown.default();
@@ -15895,15 +16189,15 @@ Specific notes and guidelines:
15895
16189
  };
15896
16190
  return validation;
15897
16191
  }
15898
- async function validateWithVLM(document, model, verbose = false, concurrency = 10) {
15899
- console.log(`[EXULU] Starting VLM validation for docling output, ${document.length} pages...`);
16192
+ async function validateWithVLM(document2, model, verbose = false, concurrency = 10) {
16193
+ console.log(`[EXULU] Starting VLM validation for docling output, ${document2.length} pages...`);
15900
16194
  console.log(
15901
16195
  `[EXULU] Concurrency limit: ${concurrency}`
15902
16196
  );
15903
16197
  let validatedCount = 0;
15904
16198
  let correctedCount = 0;
15905
16199
  const limit = (0, import_p_limit.default)(concurrency);
15906
- const validationTasks = document.map(
16200
+ const validationTasks = document2.map(
15907
16201
  (page) => limit(async () => {
15908
16202
  const imagePath = page.image;
15909
16203
  if (!imagePath) {
@@ -15948,7 +16242,7 @@ async function validateWithVLM(document, model, verbose = false, concurrency = 1
15948
16242
  console.log(`[EXULU] VLM validation complete:`);
15949
16243
  console.log(`[EXULU] Validated: ${validatedCount} chunks`);
15950
16244
  console.log(`[EXULU] Corrected: ${correctedCount} chunks`);
15951
- return document;
16245
+ return document2;
15952
16246
  }
15953
16247
  async function processDocument(filePath, fileType, buffer, tempDir, config, verbose = false) {
15954
16248
  console.log("Starting document processing...");
@@ -16000,16 +16294,24 @@ async function processPdf(buffer, paths, config, verbose = false) {
16000
16294
  try {
16001
16295
  let json;
16002
16296
  if (config?.docling) {
16003
- const script = `modal run modal_script.py --pdf-path "${paths.source}" --output "${paths.json}" --images-dir "${paths.images}"`;
16004
- console.log(`[EXULU] Running python script: ${script}`);
16005
- const { stderr } = await execAsync(
16006
- // todo replace python3 with the compiled executable
16007
- script,
16008
- { maxBuffer: 2e3 * 1024 * 1024 }
16009
- // 2000 MB buffer for large outputs
16010
- );
16011
- if (stderr) {
16012
- console.log("Processing info:", stderr.trim());
16297
+ console.log(`[EXULU] Processing document with document_to_markdown.py`);
16298
+ const result = await executePythonScript({
16299
+ scriptPath: "ee/python/documents/processing/document_to_markdown.py",
16300
+ args: [
16301
+ paths.source,
16302
+ "-o",
16303
+ paths.json,
16304
+ "--images-dir",
16305
+ paths.images
16306
+ ],
16307
+ timeout: 30 * 60 * 1e3
16308
+ // 30 minutes for large documents
16309
+ });
16310
+ if (result.stderr) {
16311
+ console.log("Processing info:", result.stderr.trim());
16312
+ }
16313
+ if (!result.success) {
16314
+ throw new Error(`Document processing failed: ${result.stderr}`);
16013
16315
  }
16014
16316
  const jsonContent = await fs2.promises.readFile(paths.json, "utf-8");
16015
16317
  json = JSON.parse(jsonContent);
@@ -16024,10 +16326,10 @@ async function processPdf(buffer, paths, config, verbose = false) {
16024
16326
  headings: []
16025
16327
  }];
16026
16328
  }
16027
- console.log(`
16329
+ console.log(`[EXULU]
16028
16330
  \u2713 Document processing completed successfully`);
16029
- console.log(` Total pages: ${json.length}`);
16030
- console.log(` Output file: ${paths.json}`);
16331
+ console.log(`[EXULU] Total pages: ${json.length}`);
16332
+ console.log(`[EXULU] Output file: ${paths.json}`);
16031
16333
  if (!config?.docling && config?.vlm?.model) {
16032
16334
  console.error("[EXULU] VLM validation is only supported when docling is enabled, skipping validation.");
16033
16335
  }
@@ -16240,6 +16542,12 @@ var ExuluChunkers = {
16240
16542
  rules: RecursiveRules
16241
16543
  }
16242
16544
  };
16545
+ var ExuluPython = {
16546
+ setup: setupPythonEnvironment,
16547
+ check: isPythonEnvironmentSetup,
16548
+ validate: validatePythonEnvironment,
16549
+ instructions: getPythonSetupInstructions
16550
+ };
16243
16551
  // Annotate the CommonJS export names for ESM import in node:
16244
16552
  0 && (module.exports = {
16245
16553
  EXULU_JOB_STATUS_ENUM,
@@ -16256,6 +16564,7 @@ var ExuluChunkers = {
16256
16564
  ExuluJobs,
16257
16565
  ExuluOtel,
16258
16566
  ExuluProvider,
16567
+ ExuluPython,
16259
16568
  ExuluQueues,
16260
16569
  ExuluReranker,
16261
16570
  ExuluTool,