aiex-cli 0.0.4-beta.5 → 0.0.5-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/dist/cli.mjs +113 -5
  2. package/dist/{doctor-collector-BtEPFDoa.mjs → doctor-collector-GXgIY3DH.mjs} +31 -15
  3. package/dist/index.mjs +1 -1
  4. package/dist/web/assets/AISettings-Blb8BV7K.js +272 -0
  5. package/dist/web/assets/Dashboard-DFlCyVls.js +1 -0
  6. package/dist/web/assets/{DataBrowser-IlgTMGi0.js → DataBrowser-GAA-pGq0.js} +1 -1
  7. package/dist/web/assets/ExtractionViewer-DqIrBGNK.js +1 -0
  8. package/dist/web/assets/{JsonSchemaEditor-Dyl391lX.js → JsonSchemaEditor-CfPzcMKJ.js} +11 -11
  9. package/dist/web/assets/{cssMode-BM5FOYIl.js → cssMode-CPThwItX.js} +1 -1
  10. package/dist/web/assets/{editor.main-C2Q97Dkk.js → editor.main-DQ658ZNP.js} +2 -2
  11. package/dist/web/assets/{freemarker2-BqyJTCTn.js → freemarker2-DWDTYVJR.js} +1 -1
  12. package/dist/web/assets/{handlebars-DxRJTefg.js → handlebars-D4DzjGQ7.js} +1 -1
  13. package/dist/web/assets/{html-gyvgrapw.js → html-DnzhKSoD.js} +1 -1
  14. package/dist/web/assets/{htmlMode-CNjCRwdY.js → htmlMode-CR7UKfEH.js} +1 -1
  15. package/dist/web/assets/index-BisY0WU-.js +882 -0
  16. package/dist/web/assets/index-CL1hhIbO.css +2 -0
  17. package/dist/web/assets/{javascript-BK6ufvq6.js → javascript-D2srszZ8.js} +1 -1
  18. package/dist/web/assets/{jsonMode-m2trGjkO.js → jsonMode-B4jaPYEr.js} +1 -1
  19. package/dist/web/assets/{liquid-BtyuYqQQ.js → liquid-CIT2Wl_l.js} +1 -1
  20. package/dist/web/assets/{mdx-C8K4EvCQ.js → mdx-CWLaEOFy.js} +1 -1
  21. package/dist/web/assets/{monaco.contribution-BTr-G8hO.js → monaco.contribution-BJhODGkt.js} +2 -2
  22. package/dist/web/assets/{python-8dyH1nS_.js → python-6CGfpCNq.js} +1 -1
  23. package/dist/web/assets/{razor-DtWMI74k.js → razor-DEMMh3TD.js} +1 -1
  24. package/dist/web/assets/{tsMode-Dv8YG-YK.js → tsMode-D6_qJNEr.js} +1 -1
  25. package/dist/web/assets/{typescript-DbClKYS3.js → typescript-BM9aPEFg.js} +1 -1
  26. package/dist/web/assets/{xml-Bb59gjP6.js → xml-CoSbvcg5.js} +1 -1
  27. package/dist/web/assets/{yaml-DVMb_IfV.js → yaml-56GOgy8k.js} +1 -1
  28. package/dist/web/index.html +2 -2
  29. package/dist/{zh-CN-BZihcLn3.mjs → zh-CN-Qcn0DHFh.mjs} +2 -1
  30. package/package.json +3 -1
  31. package/dist/web/assets/AISettings-DfoDfxk9.js +0 -272
  32. package/dist/web/assets/ExtractionViewer-0F4C26V5.js +0 -1
  33. package/dist/web/assets/index-DcTjZeUT.css +0 -2
  34. package/dist/web/assets/index-sK43vSj1.js +0 -882
  35. /package/dist/web/assets/{abap-DiwvWnMr.js → abap-Bgec7Keq.js} +0 -0
  36. /package/dist/web/assets/{apex-CmtZjKlf.js → apex-VBlPwEoQ.js} +0 -0
  37. /package/dist/web/assets/{azcli-DL2My_i-.js → azcli-DKqrEFBx.js} +0 -0
  38. /package/dist/web/assets/{bat-B-nC98wG.js → bat-DdgQWy_0.js} +0 -0
  39. /package/dist/web/assets/{bicep-Ju5MwOgh.js → bicep-CRMM43EB.js} +0 -0
  40. /package/dist/web/assets/{cameligo-8Eu1TyBr.js → cameligo-UatALtML.js} +0 -0
  41. /package/dist/web/assets/{clojure-u-RpMkH3.js → clojure-D8JU08RA.js} +0 -0
  42. /package/dist/web/assets/{coffee-CdA7bbTe.js → coffee-C56wu358.js} +0 -0
  43. /package/dist/web/assets/{cpp-CzNFP8ks.js → cpp-CyZLvhJG.js} +0 -0
  44. /package/dist/web/assets/{csharp-j1LThmcE.js → csharp-BJl3ixva.js} +0 -0
  45. /package/dist/web/assets/{csp-CLRC61y6.js → csp-CxEKxmO-.js} +0 -0
  46. /package/dist/web/assets/{css-r6rC_7P2.js → css-B0t_muXd.js} +0 -0
  47. /package/dist/web/assets/{cypher-CW08XVUh.js → cypher-D1hqiMFD.js} +0 -0
  48. /package/dist/web/assets/{dart-Cs9aL5T_.js → dart-Bz550Pyv.js} +0 -0
  49. /package/dist/web/assets/{dockerfile-BWM0M184.js → dockerfile-CIXgVAuA.js} +0 -0
  50. /package/dist/web/assets/{ecl-MJJuer5P.js → ecl-D9qbvZoA.js} +0 -0
  51. /package/dist/web/assets/{editor.api-nsOUOZde.js → editor.api-C8BHpRhn.js} +0 -0
  52. /package/dist/web/assets/{elixir-D2AIuXqn.js → elixir-b2M38fAy.js} +0 -0
  53. /package/dist/web/assets/{flow9-B2H24giC.js → flow9-Dq1UYMkt.js} +0 -0
  54. /package/dist/web/assets/{fsharp-CFNadkg7.js → fsharp-BaeLhgfq.js} +0 -0
  55. /package/dist/web/assets/{go-dSur1iB2.js → go-Bd-NFKIC.js} +0 -0
  56. /package/dist/web/assets/{graphql-qyhAo11d.js → graphql-DZVerJfy.js} +0 -0
  57. /package/dist/web/assets/{hcl-DFzjMyzm.js → hcl-CAVzrZfH.js} +0 -0
  58. /package/dist/web/assets/{ini-TdzA8TIl.js → ini-CyXdX58t.js} +0 -0
  59. /package/dist/web/assets/{java-CSGA9pkE.js → java-B5pNgvhy.js} +0 -0
  60. /package/dist/web/assets/{julia-9izz5OsY.js → julia-XRhmV3AN.js} +0 -0
  61. /package/dist/web/assets/{kotlin-DuPK7AtF.js → kotlin-DOd3J5vr.js} +0 -0
  62. /package/dist/web/assets/{less-B8d93iCg.js → less-veZSnyw6.js} +0 -0
  63. /package/dist/web/assets/{lexon-DWtEIyu7.js → lexon-QWGkuK0H.js} +0 -0
  64. /package/dist/web/assets/{lua-Ciq0OGgt.js → lua-CYGpjuO5.js} +0 -0
  65. /package/dist/web/assets/{m3-Cki6JWj_.js → m3-yNnrZkdc.js} +0 -0
  66. /package/dist/web/assets/{markdown-Cu47xwU0.js → markdown-BCSWEPSX.js} +0 -0
  67. /package/dist/web/assets/{mips-BM8ui995.js → mips-OpYmcC30.js} +0 -0
  68. /package/dist/web/assets/{msdax-DqLio0_c.js → msdax-2oxoTO9Z.js} +0 -0
  69. /package/dist/web/assets/{mysql-v1wbjJOq.js → mysql-5KlC-K_9.js} +0 -0
  70. /package/dist/web/assets/{objective-c-CQl3PGSB.js → objective-c-CcDCgtLx.js} +0 -0
  71. /package/dist/web/assets/{pascal-D4iW0ZtD.js → pascal-BZGsbaEV.js} +0 -0
  72. /package/dist/web/assets/{pascaligo-BdC9CZdj.js → pascaligo-DtD5qU3G.js} +0 -0
  73. /package/dist/web/assets/{perl-BL10m4XD.js → perl-C1jNNS3E.js} +0 -0
  74. /package/dist/web/assets/{pgsql-Be_oqVo3.js → pgsql-CT0fhiZa.js} +0 -0
  75. /package/dist/web/assets/{php-BtvXSFRI.js → php-D6DrXoPM.js} +0 -0
  76. /package/dist/web/assets/{pla-B2vUy15C.js → pla-b3-HN2pF.js} +0 -0
  77. /package/dist/web/assets/{postiats-CbmTTfXr.js → postiats-Bin2ApVS.js} +0 -0
  78. /package/dist/web/assets/{powerquery-DszLhJGx.js → powerquery-7ASnn-ZG.js} +0 -0
  79. /package/dist/web/assets/{powershell-B0dYktF6.js → powershell-t4p7sU1H.js} +0 -0
  80. /package/dist/web/assets/{protobuf-CZvaj1VX.js → protobuf-BUGeWa_j.js} +0 -0
  81. /package/dist/web/assets/{pug-CPDx1B3S.js → pug-BuKcgC9s.js} +0 -0
  82. /package/dist/web/assets/{qsharp-CAxMZVjw.js → qsharp-DxLLX8mo.js} +0 -0
  83. /package/dist/web/assets/{r-8DbbFX2l.js → r-DMlFgn7A.js} +0 -0
  84. /package/dist/web/assets/{redis-DRWj9MtJ.js → redis-cXItkC5u.js} +0 -0
  85. /package/dist/web/assets/{redshift-C6cElE_5.js → redshift-BZVbW7HE.js} +0 -0
  86. /package/dist/web/assets/{restructuredtext-W9pS9n3m.js → restructuredtext-BzjxwS8h.js} +0 -0
  87. /package/dist/web/assets/{ruby-BKnzWnk-.js → ruby-C5nyLV4l.js} +0 -0
  88. /package/dist/web/assets/{rust-YPCclWwe.js → rust-BcmMsHdf.js} +0 -0
  89. /package/dist/web/assets/{sb-BgM4DTFb.js → sb-Dnb1iy6B.js} +0 -0
  90. /package/dist/web/assets/{scala-fz1OPLMl.js → scala-anMIFYpA.js} +0 -0
  91. /package/dist/web/assets/{scheme-8Uz1RIbu.js → scheme-BItQTe08.js} +0 -0
  92. /package/dist/web/assets/{scss-Djo3IYXr.js → scss-BOv51BJ5.js} +0 -0
  93. /package/dist/web/assets/{shell-CINF5Tx_.js → shell-BsRYRTNN.js} +0 -0
  94. /package/dist/web/assets/{solidity-GgiNEuUm.js → solidity-BtuLgGDx.js} +0 -0
  95. /package/dist/web/assets/{sophia-Culj97P9.js → sophia-B0Vkc5MF.js} +0 -0
  96. /package/dist/web/assets/{sparql-C2ZlpxOY.js → sparql-B7lvkZQM.js} +0 -0
  97. /package/dist/web/assets/{sql-BEf5Pg7Y.js → sql-DvP5MpA3.js} +0 -0
  98. /package/dist/web/assets/{st-CT6UUoeH.js → st-GVUeyB3U.js} +0 -0
  99. /package/dist/web/assets/{swift-B5g0xTG3.js → swift-DSPIoCjm.js} +0 -0
  100. /package/dist/web/assets/{systemverilog-CEgQz9DR.js → systemverilog-Icj2-k23.js} +0 -0
  101. /package/dist/web/assets/{tcl-D0qL2L0I.js → tcl-Cd8KQcm-.js} +0 -0
  102. /package/dist/web/assets/{twig-BFUAVf1E.js → twig-CBHmt8z3.js} +0 -0
  103. /package/dist/web/assets/{typespec-CjVVcNKm.js → typespec-Ckc037mq.js} +0 -0
  104. /package/dist/web/assets/{vb-CZJr-DQz.js → vb-B97GW9Wb.js} +0 -0
  105. /package/dist/web/assets/{wgsl-ivoXUo2e.js → wgsl-DIKmb3YH.js} +0 -0
package/dist/cli.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { A as formatDoctorDiagnosticsJson, C as seedConfig, D as version, E as package_default, S as createConfig, T as name, _ as DEFAULT_MINERU_CONFIG, a as parseJsonSchema, b as PLACEHOLDER_TEXT, c as recognizeImageText, d as t, f as getDefaultAIConfig, g as DEFAULT_MARKITDOWN_CONFIG, h as DEFAULT_MARKER_CONFIG, i as JsonSchemaDefinitionSchema, k as doctorDiagnosticsTableRows, l as shouldUseImageOcrFallback, m as writeAIConfig, n as createMigrationConfig, o as toSnakeCase, p as readAIConfig, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as initI18n, v as DEFAULT_PROMPT_CONFIG, w as description, x as AIConfigSchema, y as PLACEHOLDER_SCHEMA } from "./doctor-collector-BtEPFDoa.mjs";
1
+ import { A as doctorDiagnosticsTableRows, C as createConfig, D as package_default, E as name, O as version, S as AIConfigSchema, T as description, _ as DEFAULT_MINERU_API_CONFIG, a as parseJsonSchema, b as PLACEHOLDER_SCHEMA, c as recognizeImageText, d as t, f as getDefaultAIConfig, g as DEFAULT_MARKITDOWN_CONFIG, h as DEFAULT_MARKER_CONFIG, i as JsonSchemaDefinitionSchema, j as formatDoctorDiagnosticsJson, l as shouldUseImageOcrFallback, m as writeAIConfig, n as createMigrationConfig, o as toSnakeCase, p as readAIConfig, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as initI18n, v as DEFAULT_MINERU_CONFIG, w as seedConfig, x as PLACEHOLDER_TEXT, y as DEFAULT_PROMPT_CONFIG } from "./doctor-collector-GXgIY3DH.mjs";
2
2
  import { createRequire } from "node:module";
3
3
  import fs from "node:fs/promises";
4
4
  import os from "node:os";
@@ -29,6 +29,7 @@ import { Client, extractNotionId } from "@notionhq/client";
29
29
  import { execa } from "execa";
30
30
  import { glob, globSync } from "tinyglobby";
31
31
  import { extractText, getDocumentProxy, getMeta } from "unpdf";
32
+ import AdmZip from "adm-zip";
32
33
  import { execFile } from "node:child_process";
33
34
  import { promisify } from "node:util";
34
35
  import * as chokidar from "chokidar";
@@ -14099,14 +14100,13 @@ var ExternalCommandPdfConverter = class {
14099
14100
  pageCount,
14100
14101
  metadata: {
14101
14102
  converter: this.name,
14102
- outputPath,
14103
- ...this.config.keepOutput ? { outputDir } : {}
14103
+ outputPath
14104
14104
  }
14105
14105
  };
14106
14106
  } catch (error) {
14107
14107
  throw formatCommandError(error, `${this.config.command} ${args.join(" ")}`);
14108
14108
  } finally {
14109
- if (!this.config.keepOutput) await fs.rm(tempRoot, {
14109
+ await fs.rm(tempRoot, {
14110
14110
  recursive: true,
14111
14111
  force: true
14112
14112
  }).catch(() => {});
@@ -14114,6 +14114,113 @@ var ExternalCommandPdfConverter = class {
14114
14114
  }
14115
14115
  };
14116
14116
 
14117
+ //#endregion
14118
+ //#region src/core/pdf-converter/mineru-api.ts
14119
+ const TRAILING_SLASH_REGEXP = /\/+$/;
14120
+ var MineruApiPdfConverter = class {
14121
+ name = "mineru_api";
14122
+ constructor(config) {
14123
+ this.config = config;
14124
+ }
14125
+ async convert(input, filePath) {
14126
+ const token = this.config.token?.trim();
14127
+ if (!token) throw new Error(t("errors.pdf.mineruApiTokenRequired"));
14128
+ const baseURL = (this.config.baseURL || "https://mineru.net/api/v4").replace(TRAILING_SLASH_REGEXP, "");
14129
+ const modelVersion = this.config.modelVersion || "vlm";
14130
+ const isOcr = this.config.isOcr ?? true;
14131
+ const enableFormula = this.config.enableFormula ?? true;
14132
+ const enableTable = this.config.enableTable ?? true;
14133
+ const fileName = filePath ? path.basename(filePath) : "document.pdf";
14134
+ consola.info("Requesting Mineru upload URL...");
14135
+ const requestUrl = `${baseURL}/file-urls/batch`;
14136
+ const requestPayload = {
14137
+ files: [{
14138
+ name: fileName,
14139
+ data_id: `aiex_${Date.now()}`
14140
+ }],
14141
+ model_version: modelVersion,
14142
+ is_ocr: isOcr,
14143
+ enable_formula: enableFormula,
14144
+ enable_table: enableTable
14145
+ };
14146
+ const response = await fetch(requestUrl, {
14147
+ method: "POST",
14148
+ headers: {
14149
+ "Authorization": `Bearer ${token}`,
14150
+ "Content-Type": "application/json"
14151
+ },
14152
+ body: JSON.stringify(requestPayload)
14153
+ });
14154
+ if (!response.ok) {
14155
+ const text$1 = await response.text().catch(() => "");
14156
+ throw new Error(`Failed to request upload URL: ${response.status} ${response.statusText} ${text$1}`);
14157
+ }
14158
+ const resJson = await response.json();
14159
+ if (resJson.code !== 0) throw new Error(`Mineru API error (file-urls/batch): ${resJson.msg || JSON.stringify(resJson)}`);
14160
+ const batchId = resJson.data?.batch_id;
14161
+ let uploadUrl = "";
14162
+ if (resJson.data?.file_urls && resJson.data.file_urls.length > 0) uploadUrl = resJson.data.file_urls[0];
14163
+ else if (resJson.data?.file_upload_urls && resJson.data.file_upload_urls.length > 0) uploadUrl = resJson.data.file_upload_urls[0].upload_url;
14164
+ if (!uploadUrl || !batchId) throw new Error(`Mineru API did not return upload URLs or batch ID: ${JSON.stringify(resJson)}`);
14165
+ consola.info(`Uploading file to Mineru storage (${(input.byteLength / 1024 / 1024).toFixed(2)} MB)...`);
14166
+ const uploadResponse = await fetch(uploadUrl, {
14167
+ method: "PUT",
14168
+ body: input
14169
+ });
14170
+ if (!uploadResponse.ok) {
14171
+ const text$1 = await uploadResponse.text().catch(() => "");
14172
+ throw new Error(`Failed to upload file to OSS: ${uploadResponse.status} ${uploadResponse.statusText} ${text$1}`);
14173
+ }
14174
+ consola.info(`Mineru task started, polling results (batch_id: ${batchId})...`);
14175
+ const statusUrl = `${baseURL}/extract-results/batch/${batchId}`;
14176
+ const maxPollAttempts = 120;
14177
+ const pollIntervalMs = process.env.NODE_ENV === "test" ? 1 : 5e3;
14178
+ let attempts = 0;
14179
+ let zipUrl = "";
14180
+ let totalPages = 1;
14181
+ while (attempts < maxPollAttempts) {
14182
+ attempts++;
14183
+ await new Promise((resolve) => setTimeout(resolve, pollIntervalMs));
14184
+ const pollResponse = await fetch(statusUrl, {
14185
+ method: "GET",
14186
+ headers: { Authorization: `Bearer ${token}` }
14187
+ });
14188
+ if (!pollResponse.ok) {
14189
+ consola.warn(`Poll request failed: ${pollResponse.statusText}. Retrying...`);
14190
+ continue;
14191
+ }
14192
+ const pollJson = await pollResponse.json();
14193
+ if (pollJson.code !== 0) throw new Error(`Mineru API poll error: ${pollJson.msg || JSON.stringify(pollJson)}`);
14194
+ const extractResultList = pollJson.data?.extract_result;
14195
+ if (!extractResultList || !extractResultList.length) throw new Error(`Mineru API did not return extraction results: ${JSON.stringify(pollJson)}`);
14196
+ const result = extractResultList[0];
14197
+ const state = result.state;
14198
+ consola.info(`Mineru parsing state: ${state} (attempt ${attempts}/${maxPollAttempts})`);
14199
+ if (state === "done") {
14200
+ zipUrl = result.full_zip_url;
14201
+ if (result.extract_progress?.total_pages) totalPages = result.extract_progress.total_pages;
14202
+ break;
14203
+ }
14204
+ if (state === "failed") throw new Error(`Mineru extraction failed: ${result.err_msg || "Unknown error"}`);
14205
+ }
14206
+ if (!zipUrl) throw new Error(`Mineru extraction timed out after ${maxPollAttempts * pollIntervalMs / 1e3} seconds`);
14207
+ consola.info("Downloading result ZIP from Mineru...");
14208
+ const zipResponse = await fetch(zipUrl);
14209
+ if (!zipResponse.ok) throw new Error(`Failed to download result zip: ${zipResponse.statusText}`);
14210
+ const arrayBuffer = await zipResponse.arrayBuffer();
14211
+ const zipBuffer = Buffer.from(arrayBuffer);
14212
+ consola.info("Extracting Markdown content...");
14213
+ const entries = new AdmZip(zipBuffer).getEntries();
14214
+ let mdEntry = entries.find((e) => e.entryName === "full.md" || e.entryName.endsWith("full.md"));
14215
+ if (!mdEntry) mdEntry = entries.find((e) => e.entryName.endsWith(".md"));
14216
+ if (!mdEntry) throw new Error("Could not find any Markdown (.md) file inside the Mineru result zip");
14217
+ return {
14218
+ text: mdEntry.getData().toString("utf8"),
14219
+ pageCount: totalPages
14220
+ };
14221
+ }
14222
+ };
14223
+
14117
14224
  //#endregion
14118
14225
  //#region src/core/pdf-converter/unpdf.ts
14119
14226
  var UnpdfConverter = class {
@@ -14172,6 +14279,7 @@ function createPdfConverter(config) {
14172
14279
  const mineruConfig = config.mineru ?? DEFAULT_MINERU_CONFIG;
14173
14280
  return withFallback(new ExternalCommandPdfConverter("mineru", mineruConfig), mineruConfig);
14174
14281
  }
14282
+ if (config.converter === "mineru_api") return new MineruApiPdfConverter(config.mineruApi ?? DEFAULT_MINERU_API_CONFIG);
14175
14283
  if (config.converter === "markitdown") {
14176
14284
  const markitdownConfig = config.markitdown ?? DEFAULT_MARKITDOWN_CONFIG;
14177
14285
  return withFallback(new ExternalCommandPdfConverter("markitdown", markitdownConfig), markitdownConfig);
@@ -14182,7 +14290,7 @@ function createPdfConverter(config) {
14182
14290
  }
14183
14291
  if (config.converter === "external") {
14184
14292
  if (!config.external) throw new Error(t("errors.pdf.externalNotConfigured"));
14185
- return withFallback(new ExternalCommandPdfConverter("external", config.external), config.external);
14293
+ return new ExternalCommandPdfConverter("external", config.external);
14186
14294
  }
14187
14295
  }
14188
14296
  const key = typeof config === "string" ? config : "unpdf";
@@ -74,7 +74,7 @@ function doctorDiagnosticsTableRows(d) {
74
74
  //#endregion
75
75
  //#region package.json
76
76
  var name = "aiex-cli";
77
- var version = "0.0.4-beta.5";
77
+ var version = "0.0.5-beta.1";
78
78
  var description = "JSON Schema → SQLite with AI-powered data extraction";
79
79
  var package_default = {
80
80
  name,
@@ -141,6 +141,7 @@ var package_default = {
141
141
  "@langfuse/otel": "catalog:",
142
142
  "@notionhq/client": "catalog:",
143
143
  "@opentelemetry/sdk-trace-node": "catalog:",
144
+ "adm-zip": "catalog:",
144
145
  "ai": "catalog:",
145
146
  "better-sqlite3": "catalog:",
146
147
  "chokidar": "catalog:",
@@ -176,6 +177,7 @@ var package_default = {
176
177
  devDependencies: {
177
178
  "@antfu/eslint-config": "catalog:cli",
178
179
  "@antfu/ni": "catalog:cli",
180
+ "@types/adm-zip": "catalog:",
179
181
  "@types/better-sqlite3": "catalog:types",
180
182
  "@types/jsonfile": "catalog:",
181
183
  "@types/node": "catalog:types",
@@ -238,21 +240,30 @@ const ImageOcrConfigSchema = z.object({
238
240
  });
239
241
  const ExternalPdfConverterConfigSchema = z.object({
240
242
  command: z.string().min(1),
241
- args: z.array(z.string()),
243
+ args: z.array(z.string()).min(1).refine((args) => args.some((arg) => arg.includes("{input}")), { message: "args must contain {input} template variable" }),
242
244
  outputFile: z.string().min(1).optional(),
243
245
  timeout: z.number().int().positive().default(600).optional(),
244
- fallbackToUnpdf: z.boolean().optional(),
245
- keepOutput: z.boolean().optional()
246
+ fallbackToUnpdf: z.boolean().optional()
247
+ });
248
+ const MineruApiPdfConverterConfigSchema = z.object({
249
+ token: z.string(),
250
+ baseURL: z.string().url().optional(),
251
+ modelVersion: z.string().optional(),
252
+ isOcr: z.boolean().optional(),
253
+ enableFormula: z.boolean().optional(),
254
+ enableTable: z.boolean().optional()
246
255
  });
247
256
  const PdfConfigSchema = z.object({
248
257
  converter: z.enum([
249
258
  "unpdf",
250
259
  "mineru",
260
+ "mineru_api",
251
261
  "markitdown",
252
262
  "marker",
253
263
  "external"
254
264
  ]),
255
265
  mineru: ExternalPdfConverterConfigSchema.optional(),
266
+ mineruApi: MineruApiPdfConverterConfigSchema.optional(),
256
267
  markitdown: ExternalPdfConverterConfigSchema.optional(),
257
268
  marker: ExternalPdfConverterConfigSchema.optional(),
258
269
  external: ExternalPdfConverterConfigSchema.optional()
@@ -339,8 +350,7 @@ const DEFAULT_MINERU_CONFIG = {
339
350
  "{outputDir}"
340
351
  ],
341
352
  timeout: 600,
342
- fallbackToUnpdf: true,
343
- keepOutput: true
353
+ fallbackToUnpdf: true
344
354
  };
345
355
  const DEFAULT_MARKITDOWN_CONFIG = {
346
356
  command: "markitdown",
@@ -349,10 +359,8 @@ const DEFAULT_MARKITDOWN_CONFIG = {
349
359
  "-o",
350
360
  "{outputDir}/{basename}.md"
351
361
  ],
352
- outputFile: "{outputDir}/{basename}.md",
353
362
  timeout: 600,
354
- fallbackToUnpdf: true,
355
- keepOutput: true
363
+ fallbackToUnpdf: true
356
364
  };
357
365
  const DEFAULT_MARKER_CONFIG = {
358
366
  command: "marker_single",
@@ -361,14 +369,21 @@ const DEFAULT_MARKER_CONFIG = {
361
369
  "--output_dir",
362
370
  "{outputDir}"
363
371
  ],
364
- outputFile: "{outputDir}/{basename}/{basename}.md",
365
372
  timeout: 600,
366
- fallbackToUnpdf: true,
367
- keepOutput: true
373
+ fallbackToUnpdf: true
374
+ };
375
+ const DEFAULT_MINERU_API_CONFIG = {
376
+ token: "",
377
+ baseURL: "https://mineru.net/api/v4",
378
+ modelVersion: "vlm",
379
+ isOcr: true,
380
+ enableFormula: true,
381
+ enableTable: true
368
382
  };
369
383
  const DEFAULT_PDF_CONFIG = {
370
384
  converter: "unpdf",
371
385
  mineru: DEFAULT_MINERU_CONFIG,
386
+ mineruApi: DEFAULT_MINERU_API_CONFIG,
372
387
  markitdown: DEFAULT_MARKITDOWN_CONFIG,
373
388
  marker: DEFAULT_MARKER_CONFIG
374
389
  };
@@ -698,7 +713,8 @@ const en = {
698
713
  },
699
714
  pdf: {
700
715
  externalNotConfigured: "External PDF converter is selected but no external command is configured.",
701
- converterRequiresConfig: "PDF converter \"{{name}}\" requires configuration."
716
+ converterRequiresConfig: "PDF converter \"{{name}}\" requires configuration.",
717
+ mineruApiTokenRequired: "Mineru API Token is not configured. Please configure it in AI Settings."
702
718
  },
703
719
  extractionAudit: {
704
720
  recordNotFound: "Extraction audit record not found: {{id}}",
@@ -940,7 +956,7 @@ async function initI18n(lng) {
940
956
  fallbackLng: "en",
941
957
  resources: {
942
958
  "en": { translation: en },
943
- "zh-CN": { translation: await import("./zh-CN-BZihcLn3.mjs").then((m) => m.zhCN) }
959
+ "zh-CN": { translation: await import("./zh-CN-Qcn0DHFh.mjs").then((m) => m.zhCN) }
944
960
  },
945
961
  interpolation: { escapeValue: false },
946
962
  returnNull: false
@@ -1520,4 +1536,4 @@ async function collectDoctorDiagnostics(options = {}) {
1520
1536
  }
1521
1537
 
1522
1538
  //#endregion
1523
- export { formatDoctorDiagnosticsJson as A, seedConfig as C, version as D, package_default as E, buildDoctorDiagnostics as O, createConfig as S, name as T, DEFAULT_MINERU_CONFIG as _, parseJsonSchema as a, PLACEHOLDER_TEXT as b, recognizeImageText as c, t as d, getDefaultAIConfig as f, DEFAULT_MARKITDOWN_CONFIG as g, DEFAULT_MARKER_CONFIG as h, JsonSchemaDefinitionSchema as i, doctorDiagnosticsTableRows as k, shouldUseImageOcrFallback as l, writeAIConfig as m, createMigrationConfig as n, toSnakeCase as o, readAIConfig as p, generateDrizzleConfig as r, generateDrizzleSchema as s, collectDoctorDiagnostics as t, initI18n as u, DEFAULT_PROMPT_CONFIG as v, description as w, AIConfigSchema as x, PLACEHOLDER_SCHEMA as y };
1539
+ export { doctorDiagnosticsTableRows as A, createConfig as C, package_default as D, name as E, version as O, AIConfigSchema as S, description as T, DEFAULT_MINERU_API_CONFIG as _, parseJsonSchema as a, PLACEHOLDER_SCHEMA as b, recognizeImageText as c, t as d, getDefaultAIConfig as f, DEFAULT_MARKITDOWN_CONFIG as g, DEFAULT_MARKER_CONFIG as h, JsonSchemaDefinitionSchema as i, formatDoctorDiagnosticsJson as j, buildDoctorDiagnostics as k, shouldUseImageOcrFallback as l, writeAIConfig as m, createMigrationConfig as n, toSnakeCase as o, readAIConfig as p, generateDrizzleConfig as r, generateDrizzleSchema as s, collectDoctorDiagnostics as t, initI18n as u, DEFAULT_MINERU_CONFIG as v, seedConfig as w, PLACEHOLDER_TEXT as x, DEFAULT_PROMPT_CONFIG as y };
package/dist/index.mjs CHANGED
@@ -1,3 +1,3 @@
1
- import { A as formatDoctorDiagnosticsJson, O as buildDoctorDiagnostics, a as parseJsonSchema, i as JsonSchemaDefinitionSchema, k as doctorDiagnosticsTableRows, n as createMigrationConfig, r as generateDrizzleConfig, s as generateDrizzleSchema, t as collectDoctorDiagnostics } from "./doctor-collector-BtEPFDoa.mjs";
1
+ import { A as doctorDiagnosticsTableRows, a as parseJsonSchema, i as JsonSchemaDefinitionSchema, j as formatDoctorDiagnosticsJson, k as buildDoctorDiagnostics, n as createMigrationConfig, r as generateDrizzleConfig, s as generateDrizzleSchema, t as collectDoctorDiagnostics } from "./doctor-collector-GXgIY3DH.mjs";
2
2
 
3
3
  export { JsonSchemaDefinitionSchema, buildDoctorDiagnostics, collectDoctorDiagnostics, createMigrationConfig, doctorDiagnosticsTableRows, formatDoctorDiagnosticsJson, generateDrizzleConfig, generateDrizzleSchema, parseJsonSchema };