@exulu/backend 1.48.2 → 1.49.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. package/dist/index.cjs +351 -42
  2. package/dist/index.d.cts +96 -1
  3. package/dist/index.d.ts +96 -1
  4. package/dist/index.js +340 -38
  5. package/ee/{markdown.ts → chunking/markdown.ts} +2 -2
  6. package/ee/python/README.md +295 -0
  7. package/ee/python/documents/processing/README.md +155 -0
  8. package/ee/{documents → python/documents}/processing/doc_processor.ts +25 -17
  9. package/ee/{documents/processing/pdf_to_markdown.py → python/documents/processing/document_to_markdown.py} +3 -10
  10. package/ee/python/setup.sh +180 -0
  11. package/package.json +14 -3
  12. package/scripts/postinstall.cjs +149 -0
  13. package/.agents/skills/mintlify/SKILL.md +0 -347
  14. package/.editorconfig +0 -15
  15. package/.eslintrc.json +0 -52
  16. package/.github/workflows/release-backend.yml +0 -38
  17. package/.husky/commit-msg +0 -1
  18. package/.jscpd.json +0 -18
  19. package/.mcp.json +0 -25
  20. package/.nvmrc +0 -1
  21. package/.prettierignore +0 -5
  22. package/.prettierrc.json +0 -12
  23. package/CHANGELOG.md +0 -8
  24. package/SECURITY.md +0 -5
  25. package/commitlint.config.js +0 -4
  26. package/devops/documentation/patch-older-releases.md +0 -42
  27. package/ee/documents/processing/build_pdf_processor.sh +0 -35
  28. package/ee/documents/processing/chunk_markdown.py +0 -263
  29. package/ee/documents/processing/pdf_processor.spec +0 -115
  30. package/eslint.config.js +0 -88
  31. package/jest.config.ts +0 -25
  32. package/mintlify-docs/.mintignore +0 -7
  33. package/mintlify-docs/AGENTS.md +0 -33
  34. package/mintlify-docs/CLAUDE.MD +0 -50
  35. package/mintlify-docs/CONTRIBUTING.md +0 -32
  36. package/mintlify-docs/LICENSE +0 -21
  37. package/mintlify-docs/README.md +0 -55
  38. package/mintlify-docs/ai-tools/claude-code.mdx +0 -43
  39. package/mintlify-docs/ai-tools/cursor.mdx +0 -39
  40. package/mintlify-docs/ai-tools/windsurf.mdx +0 -39
  41. package/mintlify-docs/api-reference/core-types/agent-types.mdx +0 -110
  42. package/mintlify-docs/api-reference/core-types/analytics-types.mdx +0 -95
  43. package/mintlify-docs/api-reference/core-types/configuration-types.mdx +0 -83
  44. package/mintlify-docs/api-reference/core-types/evaluation-types.mdx +0 -106
  45. package/mintlify-docs/api-reference/core-types/job-types.mdx +0 -135
  46. package/mintlify-docs/api-reference/core-types/overview.mdx +0 -73
  47. package/mintlify-docs/api-reference/core-types/prompt-types.mdx +0 -102
  48. package/mintlify-docs/api-reference/core-types/rbac-types.mdx +0 -163
  49. package/mintlify-docs/api-reference/core-types/session-types.mdx +0 -77
  50. package/mintlify-docs/api-reference/core-types/user-management.mdx +0 -112
  51. package/mintlify-docs/api-reference/core-types/workflow-types.mdx +0 -88
  52. package/mintlify-docs/api-reference/core-types.mdx +0 -585
  53. package/mintlify-docs/api-reference/dynamic-types.mdx +0 -851
  54. package/mintlify-docs/api-reference/endpoint/create.mdx +0 -4
  55. package/mintlify-docs/api-reference/endpoint/delete.mdx +0 -4
  56. package/mintlify-docs/api-reference/endpoint/get.mdx +0 -4
  57. package/mintlify-docs/api-reference/endpoint/webhook.mdx +0 -4
  58. package/mintlify-docs/api-reference/introduction.mdx +0 -661
  59. package/mintlify-docs/api-reference/mutations.mdx +0 -1012
  60. package/mintlify-docs/api-reference/openapi.json +0 -217
  61. package/mintlify-docs/api-reference/queries.mdx +0 -1154
  62. package/mintlify-docs/backend/introduction.mdx +0 -218
  63. package/mintlify-docs/changelog.mdx +0 -387
  64. package/mintlify-docs/community-edition.mdx +0 -304
  65. package/mintlify-docs/core/exulu-agent/api-reference.mdx +0 -894
  66. package/mintlify-docs/core/exulu-agent/configuration.mdx +0 -690
  67. package/mintlify-docs/core/exulu-agent/introduction.mdx +0 -552
  68. package/mintlify-docs/core/exulu-app/api-reference.mdx +0 -481
  69. package/mintlify-docs/core/exulu-app/configuration.mdx +0 -319
  70. package/mintlify-docs/core/exulu-app/introduction.mdx +0 -117
  71. package/mintlify-docs/core/exulu-authentication.mdx +0 -810
  72. package/mintlify-docs/core/exulu-chunkers/api-reference.mdx +0 -1011
  73. package/mintlify-docs/core/exulu-chunkers/configuration.mdx +0 -596
  74. package/mintlify-docs/core/exulu-chunkers/introduction.mdx +0 -403
  75. package/mintlify-docs/core/exulu-context/api-reference.mdx +0 -911
  76. package/mintlify-docs/core/exulu-context/configuration.mdx +0 -648
  77. package/mintlify-docs/core/exulu-context/introduction.mdx +0 -394
  78. package/mintlify-docs/core/exulu-database.mdx +0 -811
  79. package/mintlify-docs/core/exulu-default-agents.mdx +0 -545
  80. package/mintlify-docs/core/exulu-eval/api-reference.mdx +0 -772
  81. package/mintlify-docs/core/exulu-eval/configuration.mdx +0 -680
  82. package/mintlify-docs/core/exulu-eval/introduction.mdx +0 -459
  83. package/mintlify-docs/core/exulu-logging.mdx +0 -464
  84. package/mintlify-docs/core/exulu-otel.mdx +0 -670
  85. package/mintlify-docs/core/exulu-queues/api-reference.mdx +0 -648
  86. package/mintlify-docs/core/exulu-queues/configuration.mdx +0 -650
  87. package/mintlify-docs/core/exulu-queues/introduction.mdx +0 -474
  88. package/mintlify-docs/core/exulu-reranker/api-reference.mdx +0 -630
  89. package/mintlify-docs/core/exulu-reranker/configuration.mdx +0 -663
  90. package/mintlify-docs/core/exulu-reranker/introduction.mdx +0 -516
  91. package/mintlify-docs/core/exulu-tool/api-reference.mdx +0 -723
  92. package/mintlify-docs/core/exulu-tool/configuration.mdx +0 -805
  93. package/mintlify-docs/core/exulu-tool/introduction.mdx +0 -539
  94. package/mintlify-docs/core/exulu-variables/api-reference.mdx +0 -699
  95. package/mintlify-docs/core/exulu-variables/configuration.mdx +0 -736
  96. package/mintlify-docs/core/exulu-variables/introduction.mdx +0 -511
  97. package/mintlify-docs/development.mdx +0 -94
  98. package/mintlify-docs/docs.json +0 -248
  99. package/mintlify-docs/enterprise-edition.mdx +0 -538
  100. package/mintlify-docs/essentials/code.mdx +0 -35
  101. package/mintlify-docs/essentials/images.mdx +0 -59
  102. package/mintlify-docs/essentials/markdown.mdx +0 -88
  103. package/mintlify-docs/essentials/navigation.mdx +0 -87
  104. package/mintlify-docs/essentials/reusable-snippets.mdx +0 -110
  105. package/mintlify-docs/essentials/settings.mdx +0 -318
  106. package/mintlify-docs/favicon.svg +0 -3
  107. package/mintlify-docs/frontend/introduction.mdx +0 -39
  108. package/mintlify-docs/getting-started.mdx +0 -267
  109. package/mintlify-docs/guides/custom-agent.mdx +0 -608
  110. package/mintlify-docs/guides/first-agent.mdx +0 -315
  111. package/mintlify-docs/images/admin_ui.png +0 -0
  112. package/mintlify-docs/images/contexts.png +0 -0
  113. package/mintlify-docs/images/create_agents.png +0 -0
  114. package/mintlify-docs/images/evals.png +0 -0
  115. package/mintlify-docs/images/graphql.png +0 -0
  116. package/mintlify-docs/images/graphql_api.png +0 -0
  117. package/mintlify-docs/images/hero-dark.png +0 -0
  118. package/mintlify-docs/images/hero-light.png +0 -0
  119. package/mintlify-docs/images/hero.png +0 -0
  120. package/mintlify-docs/images/knowledge_sources.png +0 -0
  121. package/mintlify-docs/images/mcp.png +0 -0
  122. package/mintlify-docs/images/scaling.png +0 -0
  123. package/mintlify-docs/index.mdx +0 -411
  124. package/mintlify-docs/logo/dark.svg +0 -9
  125. package/mintlify-docs/logo/light.svg +0 -9
  126. package/mintlify-docs/partners.mdx +0 -558
  127. package/mintlify-docs/products.mdx +0 -77
  128. package/mintlify-docs/snippets/snippet-intro.mdx +0 -4
  129. package/mintlify-docs/styles.css +0 -207
  130. package/ngrok.bash +0 -1
  131. package/ngrok.md +0 -6
  132. package/ngrok.yml +0 -10
  133. package/release.config.cjs +0 -15
  134. package/skills-lock.json +0 -10
  135. package/types/context-processor.ts +0 -45
  136. package/types/enums/eval-types.ts +0 -5
  137. package/types/enums/field-types.ts +0 -1
  138. package/types/enums/jobs.ts +0 -11
  139. package/types/enums/statistics.ts +0 -13
  140. package/types/exulu-table-definition.ts +0 -79
  141. package/types/file-types.ts +0 -18
  142. package/types/models/agent-session.ts +0 -27
  143. package/types/models/agent.ts +0 -68
  144. package/types/models/context.ts +0 -53
  145. package/types/models/embedding.ts +0 -17
  146. package/types/models/eval-run.ts +0 -40
  147. package/types/models/exulu-agent-tool-config.ts +0 -11
  148. package/types/models/item.ts +0 -21
  149. package/types/models/job.ts +0 -8
  150. package/types/models/project.ts +0 -16
  151. package/types/models/rate-limiter-rules.ts +0 -7
  152. package/types/models/test-case.ts +0 -25
  153. package/types/models/tool.ts +0 -9
  154. package/types/models/user-role.ts +0 -12
  155. package/types/models/user.ts +0 -20
  156. package/types/models/variable.ts +0 -8
  157. package/types/models/vector-methods.ts +0 -7
  158. package/types/provider-config.ts +0 -21
  159. package/types/queue-config.ts +0 -16
  160. package/types/rbac-rights-modes.ts +0 -1
  161. package/types/statistics.ts +0 -20
  162. package/types/workflow.ts +0 -31
  163. /package/ee/{documents → python/documents}/THIRD_PARTY_LICENSES/docling.txt +0 -0
  164. /package/ee/{documents/processing → python}/requirements.txt +0 -0
package/dist/index.js CHANGED
@@ -877,7 +877,7 @@ var uploadFile = async (file, fileName, config, options = {}, user, customBucket
877
877
  if (error.name === "SignatureDoesNotMatch" || error.name === "InvalidAccessKeyId" || error.name === "AccessDenied") {
878
878
  if (attempt < maxRetries) {
879
879
  const backoffMs = Math.pow(2, attempt) * 1e3;
880
- await new Promise((resolve) => setTimeout(resolve, backoffMs));
880
+ await new Promise((resolve3) => setTimeout(resolve3, backoffMs));
881
881
  s3Client = void 0;
882
882
  getS3Client(config);
883
883
  continue;
@@ -4607,7 +4607,7 @@ async function withRetry(generateFn, maxRetries = 3) {
4607
4607
  if (attempt === maxRetries) {
4608
4608
  throw error;
4609
4609
  }
4610
- await new Promise((resolve) => setTimeout(resolve, Math.pow(2, attempt) * 1e3));
4610
+ await new Promise((resolve3) => setTimeout(resolve3, Math.pow(2, attempt) * 1e3));
4611
4611
  }
4612
4612
  }
4613
4613
  throw lastError;
@@ -5263,8 +5263,8 @@ async function* executeAgenticRetrieval({
5263
5263
  try {
5264
5264
  let finishResolver;
5265
5265
  let finishRejector;
5266
- const finishPromise = new Promise((resolve, reject) => {
5267
- finishResolver = resolve;
5266
+ const finishPromise = new Promise((resolve3, reject) => {
5267
+ finishResolver = resolve3;
5268
5268
  finishRejector = reject;
5269
5269
  });
5270
5270
  const timeoutId = setTimeout(() => {
@@ -7343,7 +7343,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
7343
7343
  } = await validateWorkflowPayload(data, providers);
7344
7344
  const retries = 3;
7345
7345
  let attempts = 0;
7346
- const promise = new Promise(async (resolve, reject) => {
7346
+ const promise = new Promise(async (resolve3, reject) => {
7347
7347
  while (attempts < retries) {
7348
7348
  try {
7349
7349
  const messages2 = await processUiMessagesFlow({
@@ -7358,7 +7358,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
7358
7358
  config,
7359
7359
  variables: data.inputs
7360
7360
  });
7361
- resolve(messages2);
7361
+ resolve3(messages2);
7362
7362
  break;
7363
7363
  } catch (error) {
7364
7364
  console.error(
@@ -7369,7 +7369,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
7369
7369
  if (attempts >= retries) {
7370
7370
  reject(new Error(error instanceof Error ? error.message : String(error)));
7371
7371
  }
7372
- await new Promise((resolve2) => setTimeout((resolve3) => resolve3(true), 2e3));
7372
+ await new Promise((resolve4) => setTimeout((resolve5) => resolve5(true), 2e3));
7373
7373
  }
7374
7374
  }
7375
7375
  });
@@ -7419,7 +7419,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
7419
7419
  } = await validateEvalPayload(data, providers);
7420
7420
  const retries = 3;
7421
7421
  let attempts = 0;
7422
- const promise = new Promise(async (resolve, reject) => {
7422
+ const promise = new Promise(async (resolve3, reject) => {
7423
7423
  while (attempts < retries) {
7424
7424
  try {
7425
7425
  const messages2 = await processUiMessagesFlow({
@@ -7433,7 +7433,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
7433
7433
  tools,
7434
7434
  config
7435
7435
  });
7436
- resolve(messages2);
7436
+ resolve3(messages2);
7437
7437
  break;
7438
7438
  } catch (error) {
7439
7439
  console.error(
@@ -7444,7 +7444,7 @@ var createWorkers = async (providers, queues2, config, contexts, rerankers, eval
7444
7444
  if (attempts >= retries) {
7445
7445
  reject(new Error(error instanceof Error ? error.message : String(error)));
7446
7446
  }
7447
- await new Promise((resolve2) => setTimeout((resolve3) => resolve3(true), 2e3));
7447
+ await new Promise((resolve4) => setTimeout((resolve5) => resolve5(true), 2e3));
7448
7448
  }
7449
7449
  }
7450
7450
  });
@@ -7859,7 +7859,7 @@ var pollJobResult = async ({
7859
7859
  attempts++;
7860
7860
  const job = await Job.fromId(queue.queue, jobId);
7861
7861
  if (!job) {
7862
- await new Promise((resolve) => setTimeout((resolve2) => resolve2(true), 2e3));
7862
+ await new Promise((resolve3) => setTimeout((resolve4) => resolve4(true), 2e3));
7863
7863
  continue;
7864
7864
  }
7865
7865
  const elapsedTime = Date.now() - startTime;
@@ -7889,7 +7889,7 @@ var pollJobResult = async ({
7889
7889
  console.log(`[EXULU] eval function ${job.id} result: ${result}`);
7890
7890
  break;
7891
7891
  }
7892
- await new Promise((resolve) => setTimeout((resolve2) => resolve2(true), 2e3));
7892
+ await new Promise((resolve3) => setTimeout((resolve4) => resolve4(true), 2e3));
7893
7893
  }
7894
7894
  return result;
7895
7895
  };
@@ -7997,7 +7997,7 @@ var processUiMessagesFlow = async ({
7997
7997
  label: agent.name,
7998
7998
  trigger: "agent"
7999
7999
  };
8000
- messageHistory = await new Promise(async (resolve, reject) => {
8000
+ messageHistory = await new Promise(async (resolve3, reject) => {
8001
8001
  const startTime = Date.now();
8002
8002
  try {
8003
8003
  const result = await provider.generateStream({
@@ -8074,7 +8074,7 @@ var processUiMessagesFlow = async ({
8074
8074
  })
8075
8075
  ] : []
8076
8076
  ]);
8077
- resolve({
8077
+ resolve3({
8078
8078
  messages,
8079
8079
  metadata: {
8080
8080
  tokens: {
@@ -8822,7 +8822,7 @@ type PageInfo {
8822
8822
  } = await validateWorkflowPayload(jobData, providers);
8823
8823
  const retries = 3;
8824
8824
  let attempts = 0;
8825
- const promise = new Promise(async (resolve, reject) => {
8825
+ const promise = new Promise(async (resolve3, reject) => {
8826
8826
  while (attempts < retries) {
8827
8827
  try {
8828
8828
  const messages2 = await processUiMessagesFlow({
@@ -8837,7 +8837,7 @@ type PageInfo {
8837
8837
  config,
8838
8838
  variables: args.variables
8839
8839
  });
8840
- resolve(messages2);
8840
+ resolve3(messages2);
8841
8841
  break;
8842
8842
  } catch (error) {
8843
8843
  console.error(
@@ -8851,7 +8851,7 @@ type PageInfo {
8851
8851
  if (attempts >= retries) {
8852
8852
  reject(error instanceof Error ? error : new Error(String(error)));
8853
8853
  }
8854
- await new Promise((resolve2) => setTimeout((resolve3) => resolve3(true), 2e3));
8854
+ await new Promise((resolve4) => setTimeout((resolve5) => resolve5(true), 2e3));
8855
8855
  }
8856
8856
  }
8857
8857
  });
@@ -9104,10 +9104,10 @@ type PageInfo {
9104
9104
  contexts.map(async (context2) => {
9105
9105
  let processor = null;
9106
9106
  if (context2.processor) {
9107
- processor = await new Promise(async (resolve, reject) => {
9107
+ processor = await new Promise(async (resolve3, reject) => {
9108
9108
  const config2 = context2.processor?.config;
9109
9109
  const queue = await config2?.queue;
9110
- resolve({
9110
+ resolve3({
9111
9111
  name: context2.processor.name,
9112
9112
  description: context2.processor.description,
9113
9113
  queue: queue?.queue?.name || void 0,
@@ -9188,10 +9188,10 @@ type PageInfo {
9188
9188
  }
9189
9189
  let processor = null;
9190
9190
  if (data.processor) {
9191
- processor = await new Promise(async (resolve, reject) => {
9191
+ processor = await new Promise(async (resolve3, reject) => {
9192
9192
  const config2 = data.processor?.config;
9193
9193
  const queue = await config2?.queue;
9194
- resolve({
9194
+ resolve3({
9195
9195
  name: data.processor.name,
9196
9196
  description: data.processor.description,
9197
9197
  queue: queue?.queue?.name || void 0,
@@ -13086,7 +13086,7 @@ var internetSearchTool = new ExuluTool({
13086
13086
  } catch (error) {
13087
13087
  if (error instanceof Perplexity.RateLimitError && attempt < maxRetries - 1) {
13088
13088
  const delay = Math.pow(2, attempt) * 1e3 + Math.random() * 1e3;
13089
- await new Promise((resolve) => setTimeout(resolve, delay));
13089
+ await new Promise((resolve3) => setTimeout(resolve3, delay));
13090
13090
  continue;
13091
13091
  }
13092
13092
  throw error;
@@ -15283,7 +15283,7 @@ var create = ({
15283
15283
  // src/index.ts
15284
15284
  import CryptoJS9 from "crypto-js";
15285
15285
 
15286
- // ee/markdown.ts
15286
+ // ee/chunking/markdown.ts
15287
15287
  var extractPageTag = (text) => {
15288
15288
  let match;
15289
15289
  let lastPageNumber = void 0;
@@ -15710,9 +15710,172 @@ var MarkdownChunker = class {
15710
15710
  }
15711
15711
  };
15712
15712
 
15713
- // ee/documents/processing/doc_processor.ts
15713
+ // src/utils/python-setup.ts
15714
15714
  import { exec } from "child_process";
15715
15715
  import { promisify } from "util";
15716
+ import { resolve, join, dirname } from "path";
15717
+ import { existsSync, readFileSync } from "fs";
15718
+ import { fileURLToPath } from "url";
15719
+ var execAsync = promisify(exec);
15720
+ function getPackageRoot() {
15721
+ const currentFile = fileURLToPath(import.meta.url);
15722
+ let currentDir = dirname(currentFile);
15723
+ let attempts = 0;
15724
+ const maxAttempts = 10;
15725
+ while (attempts < maxAttempts) {
15726
+ const packageJsonPath = join(currentDir, "package.json");
15727
+ if (existsSync(packageJsonPath)) {
15728
+ try {
15729
+ const packageJson = JSON.parse(readFileSync(packageJsonPath, "utf-8"));
15730
+ if (packageJson.name === "@exulu/backend") {
15731
+ return currentDir;
15732
+ }
15733
+ } catch {
15734
+ }
15735
+ }
15736
+ const parentDir = resolve(currentDir, "..");
15737
+ if (parentDir === currentDir) {
15738
+ break;
15739
+ }
15740
+ currentDir = parentDir;
15741
+ attempts++;
15742
+ }
15743
+ const fallback = resolve(dirname(fileURLToPath(import.meta.url)), "../..");
15744
+ return fallback;
15745
+ }
15746
+ function getSetupScriptPath(packageRoot) {
15747
+ return resolve(packageRoot, "ee/python/setup.sh");
15748
+ }
15749
+ function getVenvPath(packageRoot) {
15750
+ return resolve(packageRoot, "ee/python/.venv");
15751
+ }
15752
+ function isPythonEnvironmentSetup(packageRoot) {
15753
+ const root = packageRoot ?? getPackageRoot();
15754
+ const venvPath = getVenvPath(root);
15755
+ const pythonPath = join(venvPath, "bin", "python");
15756
+ return existsSync(venvPath) && existsSync(pythonPath);
15757
+ }
15758
+ async function setupPythonEnvironment(options = {}) {
15759
+ const {
15760
+ packageRoot = getPackageRoot(),
15761
+ force = false,
15762
+ verbose = false,
15763
+ timeout = 6e5
15764
+ // 10 minutes
15765
+ } = options;
15766
+ if (!force && isPythonEnvironmentSetup(packageRoot)) {
15767
+ if (verbose) {
15768
+ console.log("\u2713 Python environment already set up");
15769
+ }
15770
+ return {
15771
+ success: true,
15772
+ message: "Python environment already exists",
15773
+ alreadyExists: true
15774
+ };
15775
+ }
15776
+ const setupScriptPath = getSetupScriptPath(packageRoot);
15777
+ if (!existsSync(setupScriptPath)) {
15778
+ return {
15779
+ success: false,
15780
+ message: `Setup script not found at: ${setupScriptPath}`,
15781
+ alreadyExists: false
15782
+ };
15783
+ }
15784
+ try {
15785
+ if (verbose) {
15786
+ console.log("Setting up Python environment...");
15787
+ }
15788
+ const { stdout, stderr } = await execAsync(`bash "${setupScriptPath}"`, {
15789
+ cwd: packageRoot,
15790
+ timeout,
15791
+ env: {
15792
+ ...process.env,
15793
+ // Ensure script can write to the directory
15794
+ PYTHONDONTWRITEBYTECODE: "1"
15795
+ },
15796
+ maxBuffer: 10 * 1024 * 1024
15797
+ // 10MB buffer
15798
+ });
15799
+ const output = stdout + stderr;
15800
+ const versionMatch = output.match(/Python (\d+\.\d+\.\d+)/);
15801
+ const pythonVersion = versionMatch ? versionMatch[1] : void 0;
15802
+ if (verbose) {
15803
+ console.log(output);
15804
+ }
15805
+ return {
15806
+ success: true,
15807
+ message: "Python environment set up successfully",
15808
+ alreadyExists: false,
15809
+ pythonVersion,
15810
+ output
15811
+ };
15812
+ } catch (error) {
15813
+ const errorOutput = error.stdout + error.stderr;
15814
+ return {
15815
+ success: false,
15816
+ message: `Setup failed: ${error.message}`,
15817
+ alreadyExists: false,
15818
+ output: errorOutput
15819
+ };
15820
+ }
15821
+ }
15822
+ function getPythonSetupInstructions() {
15823
+ return `
15824
+ Python environment not set up. Please run one of the following:
15825
+
15826
+ Option 1 (Automatic):
15827
+ import { setupPythonEnvironment } from '@exulu/backend';
15828
+ await setupPythonEnvironment();
15829
+
15830
+ Option 2 (Manual - for package consumers):
15831
+ npx @exulu/backend setup-python
15832
+
15833
+ Option 3 (Manual - for contributors):
15834
+ npm run python:setup
15835
+
15836
+ Requirements:
15837
+ - Python 3.10 or higher must be installed
15838
+ - pip must be available
15839
+
15840
+ Installing Python:
15841
+ - macOS: brew install python@3.12
15842
+ - Ubuntu/Debian: sudo apt-get install python3.12
15843
+ - Windows: Download from https://www.python.org/downloads/
15844
+ `.trim();
15845
+ }
15846
+ async function validatePythonEnvironment(packageRoot) {
15847
+ const root = packageRoot ?? getPackageRoot();
15848
+ const venvPath = getVenvPath(root);
15849
+ const pythonPath = join(venvPath, "bin", "python");
15850
+ if (!existsSync(venvPath)) {
15851
+ return {
15852
+ valid: false,
15853
+ message: getPythonSetupInstructions()
15854
+ };
15855
+ }
15856
+ if (!existsSync(pythonPath)) {
15857
+ return {
15858
+ valid: false,
15859
+ message: "Python virtual environment is corrupted. Please run:\n await setupPythonEnvironment({ force: true })"
15860
+ };
15861
+ }
15862
+ try {
15863
+ await execAsync(`"${pythonPath}" --version`, { cwd: root });
15864
+ return {
15865
+ valid: true,
15866
+ message: "Python environment is valid"
15867
+ };
15868
+ } catch {
15869
+ return {
15870
+ valid: false,
15871
+ message: "Python executable is not working. Please run:\n await setupPythonEnvironment({ force: true })"
15872
+ };
15873
+ }
15874
+ }
15875
+
15876
+ // ee/python/documents/processing/doc_processor.ts
15877
+ import "child_process";
15878
+ import "util";
15716
15879
  import * as fs2 from "fs";
15717
15880
  import * as path from "path";
15718
15881
  import { generateText as generateText3, Output as Output2 } from "ai";
@@ -15723,7 +15886,131 @@ import * as mammoth from "mammoth";
15723
15886
  import TurndownService from "turndown";
15724
15887
  import WordExtractor from "word-extractor";
15725
15888
  import { parseOfficeAsync as parseOfficeAsync2 } from "officeparser";
15726
- var execAsync = promisify(exec);
15889
+
15890
+ // src/utils/python-executor.ts
15891
+ import { exec as exec2 } from "child_process";
15892
+ import { promisify as promisify2 } from "util";
15893
+ import { resolve as resolve2, join as join2, dirname as dirname2 } from "path";
15894
+ import { existsSync as existsSync2, readFileSync as readFileSync2 } from "fs";
15895
+ import { fileURLToPath as fileURLToPath2 } from "url";
15896
+ var execAsync2 = promisify2(exec2);
15897
+ function getPackageRoot2() {
15898
+ const currentFile = fileURLToPath2(import.meta.url);
15899
+ let currentDir = dirname2(currentFile);
15900
+ let attempts = 0;
15901
+ const maxAttempts = 10;
15902
+ while (attempts < maxAttempts) {
15903
+ const packageJsonPath = join2(currentDir, "package.json");
15904
+ if (existsSync2(packageJsonPath)) {
15905
+ try {
15906
+ const packageJson = JSON.parse(readFileSync2(packageJsonPath, "utf-8"));
15907
+ if (packageJson.name === "@exulu/backend") {
15908
+ return currentDir;
15909
+ }
15910
+ } catch {
15911
+ }
15912
+ }
15913
+ const parentDir = resolve2(currentDir, "..");
15914
+ if (parentDir === currentDir) {
15915
+ break;
15916
+ }
15917
+ currentDir = parentDir;
15918
+ attempts++;
15919
+ }
15920
+ return resolve2(dirname2(fileURLToPath2(import.meta.url)), "../..");
15921
+ }
15922
+ var PythonEnvironmentError = class extends Error {
15923
+ constructor(message) {
15924
+ super(message);
15925
+ this.name = "PythonEnvironmentError";
15926
+ }
15927
+ };
15928
+ var PythonExecutionError = class extends Error {
15929
+ stdout;
15930
+ stderr;
15931
+ exitCode;
15932
+ constructor(message, stdout, stderr, exitCode) {
15933
+ super(message);
15934
+ this.name = "PythonExecutionError";
15935
+ this.stdout = stdout;
15936
+ this.stderr = stderr;
15937
+ this.exitCode = exitCode;
15938
+ }
15939
+ };
15940
+ function getVenvPath2(packageRoot) {
15941
+ return resolve2(packageRoot, "ee/python/.venv");
15942
+ }
15943
+ function getPythonExecutable(packageRoot) {
15944
+ const venvPath = getVenvPath2(packageRoot);
15945
+ return join2(venvPath, "bin", "python");
15946
+ }
15947
+ async function validatePythonEnvironmentForExecution(packageRoot) {
15948
+ const validation = await validatePythonEnvironment(packageRoot);
15949
+ if (!validation.valid) {
15950
+ throw new PythonEnvironmentError(validation.message);
15951
+ }
15952
+ }
15953
+ async function executePythonScript(config) {
15954
+ const {
15955
+ scriptPath,
15956
+ args = [],
15957
+ packageRoot = getPackageRoot2(),
15958
+ cwd = process.cwd(),
15959
+ timeout = 3e5,
15960
+ // 5 minutes default
15961
+ env = {},
15962
+ validateEnvironment = true
15963
+ } = config;
15964
+ if (validateEnvironment) {
15965
+ await validatePythonEnvironmentForExecution(packageRoot);
15966
+ }
15967
+ const resolvedScriptPath = resolve2(packageRoot, scriptPath);
15968
+ if (!existsSync2(resolvedScriptPath)) {
15969
+ throw new PythonExecutionError(
15970
+ `Python script not found: ${resolvedScriptPath}`,
15971
+ "",
15972
+ "",
15973
+ 1
15974
+ );
15975
+ }
15976
+ const pythonExecutable = getPythonExecutable(packageRoot);
15977
+ const quotedArgs = args.map((arg) => {
15978
+ return arg.includes(" ") ? `"${arg}"` : arg;
15979
+ });
15980
+ const command = `${pythonExecutable} "${resolvedScriptPath}" ${quotedArgs.join(" ")}`;
15981
+ try {
15982
+ const { stdout, stderr } = await execAsync2(command, {
15983
+ cwd,
15984
+ timeout,
15985
+ env: {
15986
+ ...process.env,
15987
+ ...env,
15988
+ // Ensure Python doesn't write bytecode files
15989
+ PYTHONDONTWRITEBYTECODE: "1"
15990
+ },
15991
+ maxBuffer: 10 * 1024 * 1024
15992
+ // 10MB buffer for output
15993
+ });
15994
+ return {
15995
+ stdout: stdout.trim(),
15996
+ stderr: stderr.trim(),
15997
+ exitCode: 0,
15998
+ success: true
15999
+ };
16000
+ } catch (error) {
16001
+ const stdout = error.stdout?.toString() ?? "";
16002
+ const stderr = error.stderr?.toString() ?? "";
16003
+ const exitCode = error.code ?? 1;
16004
+ throw new PythonExecutionError(
16005
+ `Python script execution failed: ${error.message}`,
16006
+ stdout,
16007
+ stderr,
16008
+ exitCode
16009
+ );
16010
+ }
16011
+ }
16012
+
16013
+ // ee/python/documents/processing/doc_processor.ts
15727
16014
  async function processDocx(file) {
15728
16015
  const html = await mammoth.convertToHtml({ buffer: file });
15729
16016
  const turndownService = new TurndownService();
@@ -15973,16 +16260,24 @@ async function processPdf(buffer, paths, config, verbose = false) {
15973
16260
  try {
15974
16261
  let json;
15975
16262
  if (config?.docling) {
15976
- const script = `modal run modal_script.py --pdf-path "${paths.source}" --output "${paths.json}" --images-dir "${paths.images}"`;
15977
- console.log(`[EXULU] Running python script: ${script}`);
15978
- const { stderr } = await execAsync(
15979
- // todo replace python3 with the compiled executable
15980
- script,
15981
- { maxBuffer: 2e3 * 1024 * 1024 }
15982
- // 2000 MB buffer for large outputs
15983
- );
15984
- if (stderr) {
15985
- console.log("Processing info:", stderr.trim());
16263
+ console.log(`[EXULU] Processing document with document_to_markdown.py`);
16264
+ const result = await executePythonScript({
16265
+ scriptPath: "ee/python/documents/processing/document_to_markdown.py",
16266
+ args: [
16267
+ paths.source,
16268
+ "-o",
16269
+ paths.json,
16270
+ "--images-dir",
16271
+ paths.images
16272
+ ],
16273
+ timeout: 30 * 60 * 1e3
16274
+ // 30 minutes for large documents
16275
+ });
16276
+ if (result.stderr) {
16277
+ console.log("Processing info:", result.stderr.trim());
16278
+ }
16279
+ if (!result.success) {
16280
+ throw new Error(`Document processing failed: ${result.stderr}`);
15986
16281
  }
15987
16282
  const jsonContent = await fs2.promises.readFile(paths.json, "utf-8");
15988
16283
  json = JSON.parse(jsonContent);
@@ -15997,10 +16292,10 @@ async function processPdf(buffer, paths, config, verbose = false) {
15997
16292
  headings: []
15998
16293
  }];
15999
16294
  }
16000
- console.log(`
16295
+ console.log(`[EXULU]
16001
16296
  \u2713 Document processing completed successfully`);
16002
- console.log(` Total pages: ${json.length}`);
16003
- console.log(` Output file: ${paths.json}`);
16297
+ console.log(`[EXULU] Total pages: ${json.length}`);
16298
+ console.log(`[EXULU] Output file: ${paths.json}`);
16004
16299
  if (!config?.docling && config?.vlm?.model) {
16005
16300
  console.error("[EXULU] VLM validation is only supported when docling is enabled, skipping validation.");
16006
16301
  }
@@ -16213,6 +16508,12 @@ var ExuluChunkers = {
16213
16508
  rules: RecursiveRules
16214
16509
  }
16215
16510
  };
16511
+ var ExuluPython = {
16512
+ setup: setupPythonEnvironment,
16513
+ check: isPythonEnvironmentSetup,
16514
+ validate: validatePythonEnvironment,
16515
+ instructions: getPythonSetupInstructions
16516
+ };
16216
16517
  export {
16217
16518
  JOB_STATUS_ENUM as EXULU_JOB_STATUS_ENUM,
16218
16519
  STATISTICS_TYPE_ENUM as EXULU_STATISTICS_TYPE_ENUM,
@@ -16228,6 +16529,7 @@ export {
16228
16529
  ExuluJobs,
16229
16530
  ExuluOtel,
16230
16531
  ExuluProvider,
16532
+ ExuluPython,
16231
16533
  queues as ExuluQueues,
16232
16534
  ExuluReranker,
16233
16535
  ExuluTool,
@@ -1,5 +1,5 @@
1
- import { ExuluTokenizer } from "./tokenizer.ts";
2
- import { checkLicense } from "./entitlements";
1
+ import { ExuluTokenizer } from "../tokenizer.ts";
2
+ import { checkLicense } from "../entitlements";
3
3
 
4
4
  type CurrentHeaders = {
5
5
  h1?: {