@exulu/backend 1.48.1 → 1.49.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. package/README.md +2 -2
  2. package/dist/index.cjs +353 -42
  3. package/dist/index.d.cts +422 -327
  4. package/dist/index.d.ts +422 -327
  5. package/dist/index.js +341 -38
  6. package/ee/{markdown.ts → chunking/markdown.ts} +2 -2
  7. package/ee/python/README.md +295 -0
  8. package/ee/python/documents/processing/README.md +155 -0
  9. package/ee/{documents → python/documents}/processing/doc_processor.ts +25 -17
  10. package/ee/{documents/processing/pdf_to_markdown.py → python/documents/processing/document_to_markdown.py} +3 -10
  11. package/ee/python/setup.sh +180 -0
  12. package/package.json +17 -5
  13. package/scripts/postinstall.cjs +149 -0
  14. package/.agents/skills/mintlify/SKILL.md +0 -347
  15. package/.editorconfig +0 -15
  16. package/.eslintrc.json +0 -52
  17. package/.github/workflows/release-backend.yml +0 -38
  18. package/.husky/commit-msg +0 -1
  19. package/.jscpd.json +0 -18
  20. package/.mcp.json +0 -18
  21. package/.nvmrc +0 -1
  22. package/.prettierignore +0 -5
  23. package/.prettierrc.json +0 -12
  24. package/CHANGELOG.md +0 -6
  25. package/SECURITY.md +0 -5
  26. package/commitlint.config.js +0 -4
  27. package/devops/documentation/patch-older-releases.md +0 -42
  28. package/ee/documents/processing/build_pdf_processor.sh +0 -35
  29. package/ee/documents/processing/chunk_markdown.py +0 -263
  30. package/ee/documents/processing/pdf_processor.spec +0 -115
  31. package/eslint.config.js +0 -88
  32. package/jest.config.ts +0 -25
  33. package/mintlify-docs/.mintignore +0 -7
  34. package/mintlify-docs/AGENTS.md +0 -33
  35. package/mintlify-docs/CLAUDE.MD +0 -50
  36. package/mintlify-docs/CONTRIBUTING.md +0 -32
  37. package/mintlify-docs/LICENSE +0 -21
  38. package/mintlify-docs/README.md +0 -55
  39. package/mintlify-docs/ai-tools/claude-code.mdx +0 -43
  40. package/mintlify-docs/ai-tools/cursor.mdx +0 -39
  41. package/mintlify-docs/ai-tools/windsurf.mdx +0 -39
  42. package/mintlify-docs/api-reference/core-types/agent-types.mdx +0 -110
  43. package/mintlify-docs/api-reference/core-types/analytics-types.mdx +0 -95
  44. package/mintlify-docs/api-reference/core-types/configuration-types.mdx +0 -83
  45. package/mintlify-docs/api-reference/core-types/evaluation-types.mdx +0 -106
  46. package/mintlify-docs/api-reference/core-types/job-types.mdx +0 -135
  47. package/mintlify-docs/api-reference/core-types/overview.mdx +0 -73
  48. package/mintlify-docs/api-reference/core-types/prompt-types.mdx +0 -102
  49. package/mintlify-docs/api-reference/core-types/rbac-types.mdx +0 -163
  50. package/mintlify-docs/api-reference/core-types/session-types.mdx +0 -77
  51. package/mintlify-docs/api-reference/core-types/user-management.mdx +0 -112
  52. package/mintlify-docs/api-reference/core-types/workflow-types.mdx +0 -88
  53. package/mintlify-docs/api-reference/core-types.mdx +0 -585
  54. package/mintlify-docs/api-reference/dynamic-types.mdx +0 -851
  55. package/mintlify-docs/api-reference/endpoint/create.mdx +0 -4
  56. package/mintlify-docs/api-reference/endpoint/delete.mdx +0 -4
  57. package/mintlify-docs/api-reference/endpoint/get.mdx +0 -4
  58. package/mintlify-docs/api-reference/endpoint/webhook.mdx +0 -4
  59. package/mintlify-docs/api-reference/introduction.mdx +0 -661
  60. package/mintlify-docs/api-reference/mutations.mdx +0 -1012
  61. package/mintlify-docs/api-reference/openapi.json +0 -217
  62. package/mintlify-docs/api-reference/queries.mdx +0 -1154
  63. package/mintlify-docs/backend/introduction.mdx +0 -218
  64. package/mintlify-docs/changelog.mdx +0 -387
  65. package/mintlify-docs/community-edition.mdx +0 -304
  66. package/mintlify-docs/core/exulu-agent/api-reference.mdx +0 -894
  67. package/mintlify-docs/core/exulu-agent/configuration.mdx +0 -690
  68. package/mintlify-docs/core/exulu-agent/introduction.mdx +0 -552
  69. package/mintlify-docs/core/exulu-app/api-reference.mdx +0 -481
  70. package/mintlify-docs/core/exulu-app/configuration.mdx +0 -319
  71. package/mintlify-docs/core/exulu-app/introduction.mdx +0 -117
  72. package/mintlify-docs/core/exulu-authentication.mdx +0 -810
  73. package/mintlify-docs/core/exulu-chunkers/api-reference.mdx +0 -1011
  74. package/mintlify-docs/core/exulu-chunkers/configuration.mdx +0 -596
  75. package/mintlify-docs/core/exulu-chunkers/introduction.mdx +0 -403
  76. package/mintlify-docs/core/exulu-context/api-reference.mdx +0 -911
  77. package/mintlify-docs/core/exulu-context/configuration.mdx +0 -648
  78. package/mintlify-docs/core/exulu-context/introduction.mdx +0 -394
  79. package/mintlify-docs/core/exulu-database.mdx +0 -811
  80. package/mintlify-docs/core/exulu-default-agents.mdx +0 -545
  81. package/mintlify-docs/core/exulu-eval/api-reference.mdx +0 -772
  82. package/mintlify-docs/core/exulu-eval/configuration.mdx +0 -680
  83. package/mintlify-docs/core/exulu-eval/introduction.mdx +0 -459
  84. package/mintlify-docs/core/exulu-logging.mdx +0 -464
  85. package/mintlify-docs/core/exulu-otel.mdx +0 -670
  86. package/mintlify-docs/core/exulu-queues/api-reference.mdx +0 -648
  87. package/mintlify-docs/core/exulu-queues/configuration.mdx +0 -650
  88. package/mintlify-docs/core/exulu-queues/introduction.mdx +0 -474
  89. package/mintlify-docs/core/exulu-reranker/api-reference.mdx +0 -630
  90. package/mintlify-docs/core/exulu-reranker/configuration.mdx +0 -663
  91. package/mintlify-docs/core/exulu-reranker/introduction.mdx +0 -516
  92. package/mintlify-docs/core/exulu-tool/api-reference.mdx +0 -723
  93. package/mintlify-docs/core/exulu-tool/configuration.mdx +0 -805
  94. package/mintlify-docs/core/exulu-tool/introduction.mdx +0 -539
  95. package/mintlify-docs/core/exulu-variables/api-reference.mdx +0 -699
  96. package/mintlify-docs/core/exulu-variables/configuration.mdx +0 -736
  97. package/mintlify-docs/core/exulu-variables/introduction.mdx +0 -511
  98. package/mintlify-docs/development.mdx +0 -94
  99. package/mintlify-docs/docs.json +0 -248
  100. package/mintlify-docs/enterprise-edition.mdx +0 -538
  101. package/mintlify-docs/essentials/code.mdx +0 -35
  102. package/mintlify-docs/essentials/images.mdx +0 -59
  103. package/mintlify-docs/essentials/markdown.mdx +0 -88
  104. package/mintlify-docs/essentials/navigation.mdx +0 -87
  105. package/mintlify-docs/essentials/reusable-snippets.mdx +0 -110
  106. package/mintlify-docs/essentials/settings.mdx +0 -318
  107. package/mintlify-docs/favicon.svg +0 -3
  108. package/mintlify-docs/frontend/introduction.mdx +0 -39
  109. package/mintlify-docs/getting-started.mdx +0 -267
  110. package/mintlify-docs/guides/custom-agent.mdx +0 -608
  111. package/mintlify-docs/guides/first-agent.mdx +0 -315
  112. package/mintlify-docs/images/admin_ui.png +0 -0
  113. package/mintlify-docs/images/contexts.png +0 -0
  114. package/mintlify-docs/images/create_agents.png +0 -0
  115. package/mintlify-docs/images/evals.png +0 -0
  116. package/mintlify-docs/images/graphql.png +0 -0
  117. package/mintlify-docs/images/graphql_api.png +0 -0
  118. package/mintlify-docs/images/hero-dark.png +0 -0
  119. package/mintlify-docs/images/hero-light.png +0 -0
  120. package/mintlify-docs/images/hero.png +0 -0
  121. package/mintlify-docs/images/knowledge_sources.png +0 -0
  122. package/mintlify-docs/images/mcp.png +0 -0
  123. package/mintlify-docs/images/scaling.png +0 -0
  124. package/mintlify-docs/index.mdx +0 -411
  125. package/mintlify-docs/logo/dark.svg +0 -9
  126. package/mintlify-docs/logo/light.svg +0 -9
  127. package/mintlify-docs/partners.mdx +0 -558
  128. package/mintlify-docs/products.mdx +0 -77
  129. package/mintlify-docs/snippets/snippet-intro.mdx +0 -4
  130. package/mintlify-docs/styles.css +0 -207
  131. package/ngrok.bash +0 -1
  132. package/ngrok.md +0 -6
  133. package/ngrok.yml +0 -10
  134. package/release.config.cjs +0 -15
  135. package/skills-lock.json +0 -10
  136. package/types/context-processor.ts +0 -45
  137. package/types/enums/eval-types.ts +0 -5
  138. package/types/enums/field-types.ts +0 -1
  139. package/types/enums/jobs.ts +0 -11
  140. package/types/enums/statistics.ts +0 -13
  141. package/types/exulu-table-definition.ts +0 -79
  142. package/types/file-types.ts +0 -18
  143. package/types/models/agent-session.ts +0 -27
  144. package/types/models/agent.ts +0 -68
  145. package/types/models/context.ts +0 -53
  146. package/types/models/embedding.ts +0 -17
  147. package/types/models/eval-run.ts +0 -40
  148. package/types/models/exulu-agent-tool-config.ts +0 -11
  149. package/types/models/item.ts +0 -21
  150. package/types/models/job.ts +0 -8
  151. package/types/models/project.ts +0 -16
  152. package/types/models/rate-limiter-rules.ts +0 -7
  153. package/types/models/test-case.ts +0 -25
  154. package/types/models/tool.ts +0 -9
  155. package/types/models/user-role.ts +0 -12
  156. package/types/models/user.ts +0 -20
  157. package/types/models/variable.ts +0 -8
  158. package/types/models/vector-methods.ts +0 -7
  159. package/types/provider-config.ts +0 -21
  160. package/types/queue-config.ts +0 -16
  161. package/types/rbac-rights-modes.ts +0 -1
  162. package/types/statistics.ts +0 -20
  163. package/types/workflow.ts +0 -31
  164. /package/ee/{documents → python/documents}/THIRD_PARTY_LICENSES/docling.txt +0 -0
  165. /package/ee/{documents/processing → python}/requirements.txt +0 -0
package/README.md CHANGED
@@ -1,3 +1,5 @@
1
+ **Exulu IMP** (Intelligence Management Platform) is an open-source TypeScript framework for building production-ready AI agent applications. It provides the backend infrastructure for managing agents, semantic search contexts, background job processing, and tool integrations.
2
+
1
3
  <a href="https://exulu.com/"><img width="100%" src="https://mintlify.s3.us-west-1.amazonaws.com/exulu/images/create_agents.png" alt="Exulu IMP - Create, deploy and manage AI agents" /></a>
2
4
 
3
5
  <br />
@@ -26,8 +28,6 @@
26
28
  > [!IMPORTANT]
27
29
  > ⭐ Star this repo to follow development and updates
28
30
 
29
- **Exulu IMP** (Intelligence Management Platform) is an open-source TypeScript framework for building production-ready AI agent applications. It provides the backend infrastructure for managing agents, semantic search contexts, background job processing, and tool integrations.
30
-
31
31
  <h3>✨ Why Exulu IMP?</h3>
32
32
 
33
33
  <ul>
package/dist/index.cjs CHANGED
@@ -43,12 +43,20 @@ __export(index_exports, {
43
43
  ExuluEval: () => ExuluEval,
44
44
  ExuluJobs: () => ExuluJobs,
45
45
  ExuluOtel: () => ExuluOtel,
46
+ ExuluProvider: () => ExuluProvider,
47
+ ExuluPython: () => ExuluPython,
46
48
  ExuluQueues: () => queues,
47
49
  ExuluReranker: () => ExuluReranker,
48
50
  ExuluTool: () => ExuluTool,
49
51
  ExuluVariables: () => ExuluVariables
50
52
  });
51
53
  module.exports = __toCommonJS(index_exports);
54
+
55
+ // node_modules/tsup/assets/cjs_shims.js
56
+ var getImportMetaUrl = () => typeof document === "undefined" ? new URL(`file:${__filename}`).href : document.currentScript && document.currentScript.src || new URL("main.js", document.baseURI).href;
57
+ var importMetaUrl = /* @__PURE__ */ getImportMetaUrl();
58
+
59
+ // src/index.ts
52
60
  var import_config = require("dotenv/config");
53
61
 
54
62
  // src/redis/client.ts
@@ -910,7 +918,7 @@ var uploadFile = async (file, fileName, config, options = {}, user, customBucket
910
918
  if (error.name === "SignatureDoesNotMatch" || error.name === "InvalidAccessKeyId" || error.name === "AccessDenied") {
911
919
  if (attempt < maxRetries) {
912
920
  const backoffMs = Math.pow(2, attempt) * 1e3;
913
- await new Promise((resolve) => setTimeout(resolve, backoffMs));
921
+ await new Promise((resolve3) => setTimeout(resolve3, backoffMs));
914
922
  s3Client = void 0;
915
923
  getS3Client(config);
916
924
  continue;
@@ -4640,7 +4648,7 @@ async function withRetry(generateFn, maxRetries = 3) {
4640
4648
  if (attempt === maxRetries) {
4641
4649
  throw error;
4642
4650
  }
4643
- await new Promise((resolve) => setTimeout(resolve, Math.pow(2, attempt) * 1e3));
4651
+ await new Promise((resolve3) => setTimeout(resolve3, Math.pow(2, attempt) * 1e3));
4644
4652
  }
4645
4653
  }
4646
4654
  throw lastError;
@@ -5296,8 +5304,8 @@ async function* executeAgenticRetrieval({
5296
5304
  try {
5297
5305
  let finishResolver;
5298
5306
  let finishRejector;
5299
- const finishPromise = new Promise((resolve, reject) => {
5300
- finishResolver = resolve;
5307
+ const finishPromise = new Promise((resolve3, reject) => {
5308
+ finishResolver = resolve3;
5301
5309
  finishRejector = reject;
5302
5310
  });
5303
5311
  const timeoutId = setTimeout(() => {
@@ -7376,7 +7384,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
7376
7384
  } = await validateWorkflowPayload(data, providers);
7377
7385
  const retries = 3;
7378
7386
  let attempts = 0;
7379
- const promise = new Promise(async (resolve, reject) => {
7387
+ const promise = new Promise(async (resolve3, reject) => {
7380
7388
  while (attempts < retries) {
7381
7389
  try {
7382
7390
  const messages2 = await processUiMessagesFlow({
@@ -7391,7 +7399,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
7391
7399
  config,
7392
7400
  variables: data.inputs
7393
7401
  });
7394
- resolve(messages2);
7402
+ resolve3(messages2);
7395
7403
  break;
7396
7404
  } catch (error) {
7397
7405
  console.error(
@@ -7402,7 +7410,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
7402
7410
  if (attempts >= retries) {
7403
7411
  reject(new Error(error instanceof Error ? error.message : String(error)));
7404
7412
  }
7405
- await new Promise((resolve2) => setTimeout((resolve3) => resolve3(true), 2e3));
7413
+ await new Promise((resolve4) => setTimeout((resolve5) => resolve5(true), 2e3));
7406
7414
  }
7407
7415
  }
7408
7416
  });
@@ -7452,7 +7460,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
7452
7460
  } = await validateEvalPayload(data, providers);
7453
7461
  const retries = 3;
7454
7462
  let attempts = 0;
7455
- const promise = new Promise(async (resolve, reject) => {
7463
+ const promise = new Promise(async (resolve3, reject) => {
7456
7464
  while (attempts < retries) {
7457
7465
  try {
7458
7466
  const messages2 = await processUiMessagesFlow({
@@ -7466,7 +7474,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
7466
7474
  tools,
7467
7475
  config
7468
7476
  });
7469
- resolve(messages2);
7477
+ resolve3(messages2);
7470
7478
  break;
7471
7479
  } catch (error) {
7472
7480
  console.error(
@@ -7477,7 +7485,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
7477
7485
  if (attempts >= retries) {
7478
7486
  reject(new Error(error instanceof Error ? error.message : String(error)));
7479
7487
  }
7480
- await new Promise((resolve2) => setTimeout((resolve3) => resolve3(true), 2e3));
7488
+ await new Promise((resolve4) => setTimeout((resolve5) => resolve5(true), 2e3));
7481
7489
  }
7482
7490
  }
7483
7491
  });
@@ -7892,7 +7900,7 @@ var pollJobResult = async ({
7892
7900
  attempts++;
7893
7901
  const job = await import_bullmq3.Job.fromId(queue.queue, jobId);
7894
7902
  if (!job) {
7895
- await new Promise((resolve) => setTimeout((resolve2) => resolve2(true), 2e3));
7903
+ await new Promise((resolve3) => setTimeout((resolve4) => resolve4(true), 2e3));
7896
7904
  continue;
7897
7905
  }
7898
7906
  const elapsedTime = Date.now() - startTime;
@@ -7922,7 +7930,7 @@ var pollJobResult = async ({
7922
7930
  console.log(`[EXULU] eval function ${job.id} result: ${result}`);
7923
7931
  break;
7924
7932
  }
7925
- await new Promise((resolve) => setTimeout((resolve2) => resolve2(true), 2e3));
7933
+ await new Promise((resolve3) => setTimeout((resolve4) => resolve4(true), 2e3));
7926
7934
  }
7927
7935
  return result;
7928
7936
  };
@@ -8030,7 +8038,7 @@ var processUiMessagesFlow = async ({
8030
8038
  label: agent.name,
8031
8039
  trigger: "agent"
8032
8040
  };
8033
- messageHistory = await new Promise(async (resolve, reject) => {
8041
+ messageHistory = await new Promise(async (resolve3, reject) => {
8034
8042
  const startTime = Date.now();
8035
8043
  try {
8036
8044
  const result = await provider.generateStream({
@@ -8107,7 +8115,7 @@ var processUiMessagesFlow = async ({
8107
8115
  })
8108
8116
  ] : []
8109
8117
  ]);
8110
- resolve({
8118
+ resolve3({
8111
8119
  messages,
8112
8120
  metadata: {
8113
8121
  tokens: {
@@ -8855,7 +8863,7 @@ type PageInfo {
8855
8863
  } = await validateWorkflowPayload(jobData, providers);
8856
8864
  const retries = 3;
8857
8865
  let attempts = 0;
8858
- const promise = new Promise(async (resolve, reject) => {
8866
+ const promise = new Promise(async (resolve3, reject) => {
8859
8867
  while (attempts < retries) {
8860
8868
  try {
8861
8869
  const messages2 = await processUiMessagesFlow({
@@ -8870,7 +8878,7 @@ type PageInfo {
8870
8878
  config,
8871
8879
  variables: args.variables
8872
8880
  });
8873
- resolve(messages2);
8881
+ resolve3(messages2);
8874
8882
  break;
8875
8883
  } catch (error) {
8876
8884
  console.error(
@@ -8884,7 +8892,7 @@ type PageInfo {
8884
8892
  if (attempts >= retries) {
8885
8893
  reject(error instanceof Error ? error : new Error(String(error)));
8886
8894
  }
8887
- await new Promise((resolve2) => setTimeout((resolve3) => resolve3(true), 2e3));
8895
+ await new Promise((resolve4) => setTimeout((resolve5) => resolve5(true), 2e3));
8888
8896
  }
8889
8897
  }
8890
8898
  });
@@ -9137,10 +9145,10 @@ type PageInfo {
9137
9145
  contexts.map(async (context2) => {
9138
9146
  let processor = null;
9139
9147
  if (context2.processor) {
9140
- processor = await new Promise(async (resolve, reject) => {
9148
+ processor = await new Promise(async (resolve3, reject) => {
9141
9149
  const config2 = context2.processor?.config;
9142
9150
  const queue = await config2?.queue;
9143
- resolve({
9151
+ resolve3({
9144
9152
  name: context2.processor.name,
9145
9153
  description: context2.processor.description,
9146
9154
  queue: queue?.queue?.name || void 0,
@@ -9221,10 +9229,10 @@ type PageInfo {
9221
9229
  }
9222
9230
  let processor = null;
9223
9231
  if (data.processor) {
9224
- processor = await new Promise(async (resolve, reject) => {
9232
+ processor = await new Promise(async (resolve3, reject) => {
9225
9233
  const config2 = data.processor?.config;
9226
9234
  const queue = await config2?.queue;
9227
- resolve({
9235
+ resolve3({
9228
9236
  name: data.processor.name,
9229
9237
  description: data.processor.description,
9230
9238
  queue: queue?.queue?.name || void 0,
@@ -13112,7 +13120,7 @@ var internetSearchTool = new ExuluTool({
13112
13120
  } catch (error) {
13113
13121
  if (error instanceof import_perplexity_ai.default.RateLimitError && attempt < maxRetries - 1) {
13114
13122
  const delay = Math.pow(2, attempt) * 1e3 + Math.random() * 1e3;
13115
- await new Promise((resolve) => setTimeout(resolve, delay));
13123
+ await new Promise((resolve3) => setTimeout(resolve3, delay));
13116
13124
  continue;
13117
13125
  }
13118
13126
  throw error;
@@ -15309,7 +15317,7 @@ var create = ({
15309
15317
  // src/index.ts
15310
15318
  var import_crypto_js9 = __toESM(require("crypto-js"), 1);
15311
15319
 
15312
- // ee/markdown.ts
15320
+ // ee/chunking/markdown.ts
15313
15321
  var extractPageTag = (text) => {
15314
15322
  let match;
15315
15323
  let lastPageNumber = void 0;
@@ -15736,9 +15744,172 @@ var MarkdownChunker = class {
15736
15744
  }
15737
15745
  };
15738
15746
 
15739
- // ee/documents/processing/doc_processor.ts
15747
+ // src/utils/python-setup.ts
15740
15748
  var import_child_process = require("child_process");
15741
15749
  var import_util2 = require("util");
15750
+ var import_path = require("path");
15751
+ var import_fs2 = require("fs");
15752
+ var import_url = require("url");
15753
+ var execAsync = (0, import_util2.promisify)(import_child_process.exec);
15754
+ function getPackageRoot() {
15755
+ const currentFile = (0, import_url.fileURLToPath)(importMetaUrl);
15756
+ let currentDir = (0, import_path.dirname)(currentFile);
15757
+ let attempts = 0;
15758
+ const maxAttempts = 10;
15759
+ while (attempts < maxAttempts) {
15760
+ const packageJsonPath = (0, import_path.join)(currentDir, "package.json");
15761
+ if ((0, import_fs2.existsSync)(packageJsonPath)) {
15762
+ try {
15763
+ const packageJson = JSON.parse((0, import_fs2.readFileSync)(packageJsonPath, "utf-8"));
15764
+ if (packageJson.name === "@exulu/backend") {
15765
+ return currentDir;
15766
+ }
15767
+ } catch {
15768
+ }
15769
+ }
15770
+ const parentDir = (0, import_path.resolve)(currentDir, "..");
15771
+ if (parentDir === currentDir) {
15772
+ break;
15773
+ }
15774
+ currentDir = parentDir;
15775
+ attempts++;
15776
+ }
15777
+ const fallback = (0, import_path.resolve)((0, import_path.dirname)((0, import_url.fileURLToPath)(importMetaUrl)), "../..");
15778
+ return fallback;
15779
+ }
15780
+ function getSetupScriptPath(packageRoot) {
15781
+ return (0, import_path.resolve)(packageRoot, "ee/python/setup.sh");
15782
+ }
15783
+ function getVenvPath(packageRoot) {
15784
+ return (0, import_path.resolve)(packageRoot, "ee/python/.venv");
15785
+ }
15786
+ function isPythonEnvironmentSetup(packageRoot) {
15787
+ const root = packageRoot ?? getPackageRoot();
15788
+ const venvPath = getVenvPath(root);
15789
+ const pythonPath = (0, import_path.join)(venvPath, "bin", "python");
15790
+ return (0, import_fs2.existsSync)(venvPath) && (0, import_fs2.existsSync)(pythonPath);
15791
+ }
15792
+ async function setupPythonEnvironment(options = {}) {
15793
+ const {
15794
+ packageRoot = getPackageRoot(),
15795
+ force = false,
15796
+ verbose = false,
15797
+ timeout = 6e5
15798
+ // 10 minutes
15799
+ } = options;
15800
+ if (!force && isPythonEnvironmentSetup(packageRoot)) {
15801
+ if (verbose) {
15802
+ console.log("\u2713 Python environment already set up");
15803
+ }
15804
+ return {
15805
+ success: true,
15806
+ message: "Python environment already exists",
15807
+ alreadyExists: true
15808
+ };
15809
+ }
15810
+ const setupScriptPath = getSetupScriptPath(packageRoot);
15811
+ if (!(0, import_fs2.existsSync)(setupScriptPath)) {
15812
+ return {
15813
+ success: false,
15814
+ message: `Setup script not found at: ${setupScriptPath}`,
15815
+ alreadyExists: false
15816
+ };
15817
+ }
15818
+ try {
15819
+ if (verbose) {
15820
+ console.log("Setting up Python environment...");
15821
+ }
15822
+ const { stdout, stderr } = await execAsync(`bash "${setupScriptPath}"`, {
15823
+ cwd: packageRoot,
15824
+ timeout,
15825
+ env: {
15826
+ ...process.env,
15827
+ // Ensure script can write to the directory
15828
+ PYTHONDONTWRITEBYTECODE: "1"
15829
+ },
15830
+ maxBuffer: 10 * 1024 * 1024
15831
+ // 10MB buffer
15832
+ });
15833
+ const output = stdout + stderr;
15834
+ const versionMatch = output.match(/Python (\d+\.\d+\.\d+)/);
15835
+ const pythonVersion = versionMatch ? versionMatch[1] : void 0;
15836
+ if (verbose) {
15837
+ console.log(output);
15838
+ }
15839
+ return {
15840
+ success: true,
15841
+ message: "Python environment set up successfully",
15842
+ alreadyExists: false,
15843
+ pythonVersion,
15844
+ output
15845
+ };
15846
+ } catch (error) {
15847
+ const errorOutput = error.stdout + error.stderr;
15848
+ return {
15849
+ success: false,
15850
+ message: `Setup failed: ${error.message}`,
15851
+ alreadyExists: false,
15852
+ output: errorOutput
15853
+ };
15854
+ }
15855
+ }
15856
+ function getPythonSetupInstructions() {
15857
+ return `
15858
+ Python environment not set up. Please run one of the following:
15859
+
15860
+ Option 1 (Automatic):
15861
+ import { setupPythonEnvironment } from '@exulu/backend';
15862
+ await setupPythonEnvironment();
15863
+
15864
+ Option 2 (Manual - for package consumers):
15865
+ npx @exulu/backend setup-python
15866
+
15867
+ Option 3 (Manual - for contributors):
15868
+ npm run python:setup
15869
+
15870
+ Requirements:
15871
+ - Python 3.10 or higher must be installed
15872
+ - pip must be available
15873
+
15874
+ Installing Python:
15875
+ - macOS: brew install python@3.12
15876
+ - Ubuntu/Debian: sudo apt-get install python3.12
15877
+ - Windows: Download from https://www.python.org/downloads/
15878
+ `.trim();
15879
+ }
15880
+ async function validatePythonEnvironment(packageRoot) {
15881
+ const root = packageRoot ?? getPackageRoot();
15882
+ const venvPath = getVenvPath(root);
15883
+ const pythonPath = (0, import_path.join)(venvPath, "bin", "python");
15884
+ if (!(0, import_fs2.existsSync)(venvPath)) {
15885
+ return {
15886
+ valid: false,
15887
+ message: getPythonSetupInstructions()
15888
+ };
15889
+ }
15890
+ if (!(0, import_fs2.existsSync)(pythonPath)) {
15891
+ return {
15892
+ valid: false,
15893
+ message: "Python virtual environment is corrupted. Please run:\n await setupPythonEnvironment({ force: true })"
15894
+ };
15895
+ }
15896
+ try {
15897
+ await execAsync(`"${pythonPath}" --version`, { cwd: root });
15898
+ return {
15899
+ valid: true,
15900
+ message: "Python environment is valid"
15901
+ };
15902
+ } catch {
15903
+ return {
15904
+ valid: false,
15905
+ message: "Python executable is not working. Please run:\n await setupPythonEnvironment({ force: true })"
15906
+ };
15907
+ }
15908
+ }
15909
+
15910
+ // ee/python/documents/processing/doc_processor.ts
15911
+ var import_child_process3 = require("child_process");
15912
+ var import_util4 = require("util");
15742
15913
  var fs2 = __toESM(require("fs"), 1);
15743
15914
  var path = __toESM(require("path"), 1);
15744
15915
  var import_ai7 = require("ai");
@@ -15749,7 +15920,131 @@ var mammoth = __toESM(require("mammoth"), 1);
15749
15920
  var import_turndown = __toESM(require("turndown"), 1);
15750
15921
  var import_word_extractor = __toESM(require("word-extractor"), 1);
15751
15922
  var import_officeparser2 = require("officeparser");
15752
- var execAsync = (0, import_util2.promisify)(import_child_process.exec);
15923
+
15924
+ // src/utils/python-executor.ts
15925
+ var import_child_process2 = require("child_process");
15926
+ var import_util3 = require("util");
15927
+ var import_path2 = require("path");
15928
+ var import_fs3 = require("fs");
15929
+ var import_url2 = require("url");
15930
+ var execAsync2 = (0, import_util3.promisify)(import_child_process2.exec);
15931
+ function getPackageRoot2() {
15932
+ const currentFile = (0, import_url2.fileURLToPath)(importMetaUrl);
15933
+ let currentDir = (0, import_path2.dirname)(currentFile);
15934
+ let attempts = 0;
15935
+ const maxAttempts = 10;
15936
+ while (attempts < maxAttempts) {
15937
+ const packageJsonPath = (0, import_path2.join)(currentDir, "package.json");
15938
+ if ((0, import_fs3.existsSync)(packageJsonPath)) {
15939
+ try {
15940
+ const packageJson = JSON.parse((0, import_fs3.readFileSync)(packageJsonPath, "utf-8"));
15941
+ if (packageJson.name === "@exulu/backend") {
15942
+ return currentDir;
15943
+ }
15944
+ } catch {
15945
+ }
15946
+ }
15947
+ const parentDir = (0, import_path2.resolve)(currentDir, "..");
15948
+ if (parentDir === currentDir) {
15949
+ break;
15950
+ }
15951
+ currentDir = parentDir;
15952
+ attempts++;
15953
+ }
15954
+ return (0, import_path2.resolve)((0, import_path2.dirname)((0, import_url2.fileURLToPath)(importMetaUrl)), "../..");
15955
+ }
15956
+ var PythonEnvironmentError = class extends Error {
15957
+ constructor(message) {
15958
+ super(message);
15959
+ this.name = "PythonEnvironmentError";
15960
+ }
15961
+ };
15962
+ var PythonExecutionError = class extends Error {
15963
+ stdout;
15964
+ stderr;
15965
+ exitCode;
15966
+ constructor(message, stdout, stderr, exitCode) {
15967
+ super(message);
15968
+ this.name = "PythonExecutionError";
15969
+ this.stdout = stdout;
15970
+ this.stderr = stderr;
15971
+ this.exitCode = exitCode;
15972
+ }
15973
+ };
15974
+ function getVenvPath2(packageRoot) {
15975
+ return (0, import_path2.resolve)(packageRoot, "ee/python/.venv");
15976
+ }
15977
+ function getPythonExecutable(packageRoot) {
15978
+ const venvPath = getVenvPath2(packageRoot);
15979
+ return (0, import_path2.join)(venvPath, "bin", "python");
15980
+ }
15981
+ async function validatePythonEnvironmentForExecution(packageRoot) {
15982
+ const validation = await validatePythonEnvironment(packageRoot);
15983
+ if (!validation.valid) {
15984
+ throw new PythonEnvironmentError(validation.message);
15985
+ }
15986
+ }
15987
+ async function executePythonScript(config) {
15988
+ const {
15989
+ scriptPath,
15990
+ args = [],
15991
+ packageRoot = getPackageRoot2(),
15992
+ cwd = process.cwd(),
15993
+ timeout = 3e5,
15994
+ // 5 minutes default
15995
+ env = {},
15996
+ validateEnvironment = true
15997
+ } = config;
15998
+ if (validateEnvironment) {
15999
+ await validatePythonEnvironmentForExecution(packageRoot);
16000
+ }
16001
+ const resolvedScriptPath = (0, import_path2.resolve)(packageRoot, scriptPath);
16002
+ if (!(0, import_fs3.existsSync)(resolvedScriptPath)) {
16003
+ throw new PythonExecutionError(
16004
+ `Python script not found: ${resolvedScriptPath}`,
16005
+ "",
16006
+ "",
16007
+ 1
16008
+ );
16009
+ }
16010
+ const pythonExecutable = getPythonExecutable(packageRoot);
16011
+ const quotedArgs = args.map((arg) => {
16012
+ return arg.includes(" ") ? `"${arg}"` : arg;
16013
+ });
16014
+ const command = `${pythonExecutable} "${resolvedScriptPath}" ${quotedArgs.join(" ")}`;
16015
+ try {
16016
+ const { stdout, stderr } = await execAsync2(command, {
16017
+ cwd,
16018
+ timeout,
16019
+ env: {
16020
+ ...process.env,
16021
+ ...env,
16022
+ // Ensure Python doesn't write bytecode files
16023
+ PYTHONDONTWRITEBYTECODE: "1"
16024
+ },
16025
+ maxBuffer: 10 * 1024 * 1024
16026
+ // 10MB buffer for output
16027
+ });
16028
+ return {
16029
+ stdout: stdout.trim(),
16030
+ stderr: stderr.trim(),
16031
+ exitCode: 0,
16032
+ success: true
16033
+ };
16034
+ } catch (error) {
16035
+ const stdout = error.stdout?.toString() ?? "";
16036
+ const stderr = error.stderr?.toString() ?? "";
16037
+ const exitCode = error.code ?? 1;
16038
+ throw new PythonExecutionError(
16039
+ `Python script execution failed: ${error.message}`,
16040
+ stdout,
16041
+ stderr,
16042
+ exitCode
16043
+ );
16044
+ }
16045
+ }
16046
+
16047
+ // ee/python/documents/processing/doc_processor.ts
15753
16048
  async function processDocx(file) {
15754
16049
  const html = await mammoth.convertToHtml({ buffer: file });
15755
16050
  const turndownService = new import_turndown.default();
@@ -15894,15 +16189,15 @@ Specific notes and guidelines:
15894
16189
  };
15895
16190
  return validation;
15896
16191
  }
15897
- async function validateWithVLM(document, model, verbose = false, concurrency = 10) {
15898
- console.log(`[EXULU] Starting VLM validation for docling output, ${document.length} pages...`);
16192
+ async function validateWithVLM(document2, model, verbose = false, concurrency = 10) {
16193
+ console.log(`[EXULU] Starting VLM validation for docling output, ${document2.length} pages...`);
15899
16194
  console.log(
15900
16195
  `[EXULU] Concurrency limit: ${concurrency}`
15901
16196
  );
15902
16197
  let validatedCount = 0;
15903
16198
  let correctedCount = 0;
15904
16199
  const limit = (0, import_p_limit.default)(concurrency);
15905
- const validationTasks = document.map(
16200
+ const validationTasks = document2.map(
15906
16201
  (page) => limit(async () => {
15907
16202
  const imagePath = page.image;
15908
16203
  if (!imagePath) {
@@ -15947,7 +16242,7 @@ async function validateWithVLM(document, model, verbose = false, concurrency = 1
15947
16242
  console.log(`[EXULU] VLM validation complete:`);
15948
16243
  console.log(`[EXULU] Validated: ${validatedCount} chunks`);
15949
16244
  console.log(`[EXULU] Corrected: ${correctedCount} chunks`);
15950
- return document;
16245
+ return document2;
15951
16246
  }
15952
16247
  async function processDocument(filePath, fileType, buffer, tempDir, config, verbose = false) {
15953
16248
  console.log("Starting document processing...");
@@ -15999,16 +16294,24 @@ async function processPdf(buffer, paths, config, verbose = false) {
15999
16294
  try {
16000
16295
  let json;
16001
16296
  if (config?.docling) {
16002
- const script = `modal run modal_script.py --pdf-path "${paths.source}" --output "${paths.json}" --images-dir "${paths.images}"`;
16003
- console.log(`[EXULU] Running python script: ${script}`);
16004
- const { stderr } = await execAsync(
16005
- // todo replace python3 with the compiled executable
16006
- script,
16007
- { maxBuffer: 2e3 * 1024 * 1024 }
16008
- // 2000 MB buffer for large outputs
16009
- );
16010
- if (stderr) {
16011
- console.log("Processing info:", stderr.trim());
16297
+ console.log(`[EXULU] Processing document with document_to_markdown.py`);
16298
+ const result = await executePythonScript({
16299
+ scriptPath: "ee/python/documents/processing/document_to_markdown.py",
16300
+ args: [
16301
+ paths.source,
16302
+ "-o",
16303
+ paths.json,
16304
+ "--images-dir",
16305
+ paths.images
16306
+ ],
16307
+ timeout: 30 * 60 * 1e3
16308
+ // 30 minutes for large documents
16309
+ });
16310
+ if (result.stderr) {
16311
+ console.log("Processing info:", result.stderr.trim());
16312
+ }
16313
+ if (!result.success) {
16314
+ throw new Error(`Document processing failed: ${result.stderr}`);
16012
16315
  }
16013
16316
  const jsonContent = await fs2.promises.readFile(paths.json, "utf-8");
16014
16317
  json = JSON.parse(jsonContent);
@@ -16023,10 +16326,10 @@ async function processPdf(buffer, paths, config, verbose = false) {
16023
16326
  headings: []
16024
16327
  }];
16025
16328
  }
16026
- console.log(`
16329
+ console.log(`[EXULU]
16027
16330
  \u2713 Document processing completed successfully`);
16028
- console.log(` Total pages: ${json.length}`);
16029
- console.log(` Output file: ${paths.json}`);
16331
+ console.log(`[EXULU] Total pages: ${json.length}`);
16332
+ console.log(`[EXULU] Output file: ${paths.json}`);
16030
16333
  if (!config?.docling && config?.vlm?.model) {
16031
16334
  console.error("[EXULU] VLM validation is only supported when docling is enabled, skipping validation.");
16032
16335
  }
@@ -16239,6 +16542,12 @@ var ExuluChunkers = {
16239
16542
  rules: RecursiveRules
16240
16543
  }
16241
16544
  };
16545
+ var ExuluPython = {
16546
+ setup: setupPythonEnvironment,
16547
+ check: isPythonEnvironmentSetup,
16548
+ validate: validatePythonEnvironment,
16549
+ instructions: getPythonSetupInstructions
16550
+ };
16242
16551
  // Annotate the CommonJS export names for ESM import in node:
16243
16552
  0 && (module.exports = {
16244
16553
  EXULU_JOB_STATUS_ENUM,
@@ -16254,6 +16563,8 @@ var ExuluChunkers = {
16254
16563
  ExuluEval,
16255
16564
  ExuluJobs,
16256
16565
  ExuluOtel,
16566
+ ExuluProvider,
16567
+ ExuluPython,
16257
16568
  ExuluQueues,
16258
16569
  ExuluReranker,
16259
16570
  ExuluTool,