aiex-cli 0.0.4-beta.5 → 0.0.5-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.mjs +113 -5
- package/dist/{doctor-collector-BtEPFDoa.mjs → doctor-collector-GXgIY3DH.mjs} +31 -15
- package/dist/index.mjs +1 -1
- package/dist/web/assets/AISettings-Blb8BV7K.js +272 -0
- package/dist/web/assets/Dashboard-DFlCyVls.js +1 -0
- package/dist/web/assets/{DataBrowser-IlgTMGi0.js → DataBrowser-GAA-pGq0.js} +1 -1
- package/dist/web/assets/ExtractionViewer-DqIrBGNK.js +1 -0
- package/dist/web/assets/{JsonSchemaEditor-Dyl391lX.js → JsonSchemaEditor-CfPzcMKJ.js} +11 -11
- package/dist/web/assets/{cssMode-BM5FOYIl.js → cssMode-CPThwItX.js} +1 -1
- package/dist/web/assets/{editor.main-C2Q97Dkk.js → editor.main-DQ658ZNP.js} +2 -2
- package/dist/web/assets/{freemarker2-BqyJTCTn.js → freemarker2-DWDTYVJR.js} +1 -1
- package/dist/web/assets/{handlebars-DxRJTefg.js → handlebars-D4DzjGQ7.js} +1 -1
- package/dist/web/assets/{html-gyvgrapw.js → html-DnzhKSoD.js} +1 -1
- package/dist/web/assets/{htmlMode-CNjCRwdY.js → htmlMode-CR7UKfEH.js} +1 -1
- package/dist/web/assets/index-BisY0WU-.js +882 -0
- package/dist/web/assets/index-CL1hhIbO.css +2 -0
- package/dist/web/assets/{javascript-BK6ufvq6.js → javascript-D2srszZ8.js} +1 -1
- package/dist/web/assets/{jsonMode-m2trGjkO.js → jsonMode-B4jaPYEr.js} +1 -1
- package/dist/web/assets/{liquid-BtyuYqQQ.js → liquid-CIT2Wl_l.js} +1 -1
- package/dist/web/assets/{mdx-C8K4EvCQ.js → mdx-CWLaEOFy.js} +1 -1
- package/dist/web/assets/{monaco.contribution-BTr-G8hO.js → monaco.contribution-BJhODGkt.js} +2 -2
- package/dist/web/assets/{python-8dyH1nS_.js → python-6CGfpCNq.js} +1 -1
- package/dist/web/assets/{razor-DtWMI74k.js → razor-DEMMh3TD.js} +1 -1
- package/dist/web/assets/{tsMode-Dv8YG-YK.js → tsMode-D6_qJNEr.js} +1 -1
- package/dist/web/assets/{typescript-DbClKYS3.js → typescript-BM9aPEFg.js} +1 -1
- package/dist/web/assets/{xml-Bb59gjP6.js → xml-CoSbvcg5.js} +1 -1
- package/dist/web/assets/{yaml-DVMb_IfV.js → yaml-56GOgy8k.js} +1 -1
- package/dist/web/index.html +2 -2
- package/dist/{zh-CN-BZihcLn3.mjs → zh-CN-Qcn0DHFh.mjs} +2 -1
- package/package.json +3 -1
- package/dist/web/assets/AISettings-DfoDfxk9.js +0 -272
- package/dist/web/assets/ExtractionViewer-0F4C26V5.js +0 -1
- package/dist/web/assets/index-DcTjZeUT.css +0 -2
- package/dist/web/assets/index-sK43vSj1.js +0 -882
- /package/dist/web/assets/{abap-DiwvWnMr.js → abap-Bgec7Keq.js} +0 -0
- /package/dist/web/assets/{apex-CmtZjKlf.js → apex-VBlPwEoQ.js} +0 -0
- /package/dist/web/assets/{azcli-DL2My_i-.js → azcli-DKqrEFBx.js} +0 -0
- /package/dist/web/assets/{bat-B-nC98wG.js → bat-DdgQWy_0.js} +0 -0
- /package/dist/web/assets/{bicep-Ju5MwOgh.js → bicep-CRMM43EB.js} +0 -0
- /package/dist/web/assets/{cameligo-8Eu1TyBr.js → cameligo-UatALtML.js} +0 -0
- /package/dist/web/assets/{clojure-u-RpMkH3.js → clojure-D8JU08RA.js} +0 -0
- /package/dist/web/assets/{coffee-CdA7bbTe.js → coffee-C56wu358.js} +0 -0
- /package/dist/web/assets/{cpp-CzNFP8ks.js → cpp-CyZLvhJG.js} +0 -0
- /package/dist/web/assets/{csharp-j1LThmcE.js → csharp-BJl3ixva.js} +0 -0
- /package/dist/web/assets/{csp-CLRC61y6.js → csp-CxEKxmO-.js} +0 -0
- /package/dist/web/assets/{css-r6rC_7P2.js → css-B0t_muXd.js} +0 -0
- /package/dist/web/assets/{cypher-CW08XVUh.js → cypher-D1hqiMFD.js} +0 -0
- /package/dist/web/assets/{dart-Cs9aL5T_.js → dart-Bz550Pyv.js} +0 -0
- /package/dist/web/assets/{dockerfile-BWM0M184.js → dockerfile-CIXgVAuA.js} +0 -0
- /package/dist/web/assets/{ecl-MJJuer5P.js → ecl-D9qbvZoA.js} +0 -0
- /package/dist/web/assets/{editor.api-nsOUOZde.js → editor.api-C8BHpRhn.js} +0 -0
- /package/dist/web/assets/{elixir-D2AIuXqn.js → elixir-b2M38fAy.js} +0 -0
- /package/dist/web/assets/{flow9-B2H24giC.js → flow9-Dq1UYMkt.js} +0 -0
- /package/dist/web/assets/{fsharp-CFNadkg7.js → fsharp-BaeLhgfq.js} +0 -0
- /package/dist/web/assets/{go-dSur1iB2.js → go-Bd-NFKIC.js} +0 -0
- /package/dist/web/assets/{graphql-qyhAo11d.js → graphql-DZVerJfy.js} +0 -0
- /package/dist/web/assets/{hcl-DFzjMyzm.js → hcl-CAVzrZfH.js} +0 -0
- /package/dist/web/assets/{ini-TdzA8TIl.js → ini-CyXdX58t.js} +0 -0
- /package/dist/web/assets/{java-CSGA9pkE.js → java-B5pNgvhy.js} +0 -0
- /package/dist/web/assets/{julia-9izz5OsY.js → julia-XRhmV3AN.js} +0 -0
- /package/dist/web/assets/{kotlin-DuPK7AtF.js → kotlin-DOd3J5vr.js} +0 -0
- /package/dist/web/assets/{less-B8d93iCg.js → less-veZSnyw6.js} +0 -0
- /package/dist/web/assets/{lexon-DWtEIyu7.js → lexon-QWGkuK0H.js} +0 -0
- /package/dist/web/assets/{lua-Ciq0OGgt.js → lua-CYGpjuO5.js} +0 -0
- /package/dist/web/assets/{m3-Cki6JWj_.js → m3-yNnrZkdc.js} +0 -0
- /package/dist/web/assets/{markdown-Cu47xwU0.js → markdown-BCSWEPSX.js} +0 -0
- /package/dist/web/assets/{mips-BM8ui995.js → mips-OpYmcC30.js} +0 -0
- /package/dist/web/assets/{msdax-DqLio0_c.js → msdax-2oxoTO9Z.js} +0 -0
- /package/dist/web/assets/{mysql-v1wbjJOq.js → mysql-5KlC-K_9.js} +0 -0
- /package/dist/web/assets/{objective-c-CQl3PGSB.js → objective-c-CcDCgtLx.js} +0 -0
- /package/dist/web/assets/{pascal-D4iW0ZtD.js → pascal-BZGsbaEV.js} +0 -0
- /package/dist/web/assets/{pascaligo-BdC9CZdj.js → pascaligo-DtD5qU3G.js} +0 -0
- /package/dist/web/assets/{perl-BL10m4XD.js → perl-C1jNNS3E.js} +0 -0
- /package/dist/web/assets/{pgsql-Be_oqVo3.js → pgsql-CT0fhiZa.js} +0 -0
- /package/dist/web/assets/{php-BtvXSFRI.js → php-D6DrXoPM.js} +0 -0
- /package/dist/web/assets/{pla-B2vUy15C.js → pla-b3-HN2pF.js} +0 -0
- /package/dist/web/assets/{postiats-CbmTTfXr.js → postiats-Bin2ApVS.js} +0 -0
- /package/dist/web/assets/{powerquery-DszLhJGx.js → powerquery-7ASnn-ZG.js} +0 -0
- /package/dist/web/assets/{powershell-B0dYktF6.js → powershell-t4p7sU1H.js} +0 -0
- /package/dist/web/assets/{protobuf-CZvaj1VX.js → protobuf-BUGeWa_j.js} +0 -0
- /package/dist/web/assets/{pug-CPDx1B3S.js → pug-BuKcgC9s.js} +0 -0
- /package/dist/web/assets/{qsharp-CAxMZVjw.js → qsharp-DxLLX8mo.js} +0 -0
- /package/dist/web/assets/{r-8DbbFX2l.js → r-DMlFgn7A.js} +0 -0
- /package/dist/web/assets/{redis-DRWj9MtJ.js → redis-cXItkC5u.js} +0 -0
- /package/dist/web/assets/{redshift-C6cElE_5.js → redshift-BZVbW7HE.js} +0 -0
- /package/dist/web/assets/{restructuredtext-W9pS9n3m.js → restructuredtext-BzjxwS8h.js} +0 -0
- /package/dist/web/assets/{ruby-BKnzWnk-.js → ruby-C5nyLV4l.js} +0 -0
- /package/dist/web/assets/{rust-YPCclWwe.js → rust-BcmMsHdf.js} +0 -0
- /package/dist/web/assets/{sb-BgM4DTFb.js → sb-Dnb1iy6B.js} +0 -0
- /package/dist/web/assets/{scala-fz1OPLMl.js → scala-anMIFYpA.js} +0 -0
- /package/dist/web/assets/{scheme-8Uz1RIbu.js → scheme-BItQTe08.js} +0 -0
- /package/dist/web/assets/{scss-Djo3IYXr.js → scss-BOv51BJ5.js} +0 -0
- /package/dist/web/assets/{shell-CINF5Tx_.js → shell-BsRYRTNN.js} +0 -0
- /package/dist/web/assets/{solidity-GgiNEuUm.js → solidity-BtuLgGDx.js} +0 -0
- /package/dist/web/assets/{sophia-Culj97P9.js → sophia-B0Vkc5MF.js} +0 -0
- /package/dist/web/assets/{sparql-C2ZlpxOY.js → sparql-B7lvkZQM.js} +0 -0
- /package/dist/web/assets/{sql-BEf5Pg7Y.js → sql-DvP5MpA3.js} +0 -0
- /package/dist/web/assets/{st-CT6UUoeH.js → st-GVUeyB3U.js} +0 -0
- /package/dist/web/assets/{swift-B5g0xTG3.js → swift-DSPIoCjm.js} +0 -0
- /package/dist/web/assets/{systemverilog-CEgQz9DR.js → systemverilog-Icj2-k23.js} +0 -0
- /package/dist/web/assets/{tcl-D0qL2L0I.js → tcl-Cd8KQcm-.js} +0 -0
- /package/dist/web/assets/{twig-BFUAVf1E.js → twig-CBHmt8z3.js} +0 -0
- /package/dist/web/assets/{typespec-CjVVcNKm.js → typespec-Ckc037mq.js} +0 -0
- /package/dist/web/assets/{vb-CZJr-DQz.js → vb-B97GW9Wb.js} +0 -0
- /package/dist/web/assets/{wgsl-ivoXUo2e.js → wgsl-DIKmb3YH.js} +0 -0
package/dist/cli.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { A as
|
|
1
|
+
import { A as doctorDiagnosticsTableRows, C as createConfig, D as package_default, E as name, O as version, S as AIConfigSchema, T as description, _ as DEFAULT_MINERU_API_CONFIG, a as parseJsonSchema, b as PLACEHOLDER_SCHEMA, c as recognizeImageText, d as t, f as getDefaultAIConfig, g as DEFAULT_MARKITDOWN_CONFIG, h as DEFAULT_MARKER_CONFIG, i as JsonSchemaDefinitionSchema, j as formatDoctorDiagnosticsJson, l as shouldUseImageOcrFallback, m as writeAIConfig, n as createMigrationConfig, o as toSnakeCase, p as readAIConfig, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as initI18n, v as DEFAULT_MINERU_CONFIG, w as seedConfig, x as PLACEHOLDER_TEXT, y as DEFAULT_PROMPT_CONFIG } from "./doctor-collector-GXgIY3DH.mjs";
|
|
2
2
|
import { createRequire } from "node:module";
|
|
3
3
|
import fs from "node:fs/promises";
|
|
4
4
|
import os from "node:os";
|
|
@@ -29,6 +29,7 @@ import { Client, extractNotionId } from "@notionhq/client";
|
|
|
29
29
|
import { execa } from "execa";
|
|
30
30
|
import { glob, globSync } from "tinyglobby";
|
|
31
31
|
import { extractText, getDocumentProxy, getMeta } from "unpdf";
|
|
32
|
+
import AdmZip from "adm-zip";
|
|
32
33
|
import { execFile } from "node:child_process";
|
|
33
34
|
import { promisify } from "node:util";
|
|
34
35
|
import * as chokidar from "chokidar";
|
|
@@ -14099,14 +14100,13 @@ var ExternalCommandPdfConverter = class {
|
|
|
14099
14100
|
pageCount,
|
|
14100
14101
|
metadata: {
|
|
14101
14102
|
converter: this.name,
|
|
14102
|
-
outputPath
|
|
14103
|
-
...this.config.keepOutput ? { outputDir } : {}
|
|
14103
|
+
outputPath
|
|
14104
14104
|
}
|
|
14105
14105
|
};
|
|
14106
14106
|
} catch (error) {
|
|
14107
14107
|
throw formatCommandError(error, `${this.config.command} ${args.join(" ")}`);
|
|
14108
14108
|
} finally {
|
|
14109
|
-
|
|
14109
|
+
await fs.rm(tempRoot, {
|
|
14110
14110
|
recursive: true,
|
|
14111
14111
|
force: true
|
|
14112
14112
|
}).catch(() => {});
|
|
@@ -14114,6 +14114,113 @@ var ExternalCommandPdfConverter = class {
|
|
|
14114
14114
|
}
|
|
14115
14115
|
};
|
|
14116
14116
|
|
|
14117
|
+
//#endregion
|
|
14118
|
+
//#region src/core/pdf-converter/mineru-api.ts
|
|
14119
|
+
const TRAILING_SLASH_REGEXP = /\/+$/;
|
|
14120
|
+
var MineruApiPdfConverter = class {
|
|
14121
|
+
name = "mineru_api";
|
|
14122
|
+
constructor(config) {
|
|
14123
|
+
this.config = config;
|
|
14124
|
+
}
|
|
14125
|
+
async convert(input, filePath) {
|
|
14126
|
+
const token = this.config.token?.trim();
|
|
14127
|
+
if (!token) throw new Error(t("errors.pdf.mineruApiTokenRequired"));
|
|
14128
|
+
const baseURL = (this.config.baseURL || "https://mineru.net/api/v4").replace(TRAILING_SLASH_REGEXP, "");
|
|
14129
|
+
const modelVersion = this.config.modelVersion || "vlm";
|
|
14130
|
+
const isOcr = this.config.isOcr ?? true;
|
|
14131
|
+
const enableFormula = this.config.enableFormula ?? true;
|
|
14132
|
+
const enableTable = this.config.enableTable ?? true;
|
|
14133
|
+
const fileName = filePath ? path.basename(filePath) : "document.pdf";
|
|
14134
|
+
consola.info("Requesting Mineru upload URL...");
|
|
14135
|
+
const requestUrl = `${baseURL}/file-urls/batch`;
|
|
14136
|
+
const requestPayload = {
|
|
14137
|
+
files: [{
|
|
14138
|
+
name: fileName,
|
|
14139
|
+
data_id: `aiex_${Date.now()}`
|
|
14140
|
+
}],
|
|
14141
|
+
model_version: modelVersion,
|
|
14142
|
+
is_ocr: isOcr,
|
|
14143
|
+
enable_formula: enableFormula,
|
|
14144
|
+
enable_table: enableTable
|
|
14145
|
+
};
|
|
14146
|
+
const response = await fetch(requestUrl, {
|
|
14147
|
+
method: "POST",
|
|
14148
|
+
headers: {
|
|
14149
|
+
"Authorization": `Bearer ${token}`,
|
|
14150
|
+
"Content-Type": "application/json"
|
|
14151
|
+
},
|
|
14152
|
+
body: JSON.stringify(requestPayload)
|
|
14153
|
+
});
|
|
14154
|
+
if (!response.ok) {
|
|
14155
|
+
const text$1 = await response.text().catch(() => "");
|
|
14156
|
+
throw new Error(`Failed to request upload URL: ${response.status} ${response.statusText} ${text$1}`);
|
|
14157
|
+
}
|
|
14158
|
+
const resJson = await response.json();
|
|
14159
|
+
if (resJson.code !== 0) throw new Error(`Mineru API error (file-urls/batch): ${resJson.msg || JSON.stringify(resJson)}`);
|
|
14160
|
+
const batchId = resJson.data?.batch_id;
|
|
14161
|
+
let uploadUrl = "";
|
|
14162
|
+
if (resJson.data?.file_urls && resJson.data.file_urls.length > 0) uploadUrl = resJson.data.file_urls[0];
|
|
14163
|
+
else if (resJson.data?.file_upload_urls && resJson.data.file_upload_urls.length > 0) uploadUrl = resJson.data.file_upload_urls[0].upload_url;
|
|
14164
|
+
if (!uploadUrl || !batchId) throw new Error(`Mineru API did not return upload URLs or batch ID: ${JSON.stringify(resJson)}`);
|
|
14165
|
+
consola.info(`Uploading file to Mineru storage (${(input.byteLength / 1024 / 1024).toFixed(2)} MB)...`);
|
|
14166
|
+
const uploadResponse = await fetch(uploadUrl, {
|
|
14167
|
+
method: "PUT",
|
|
14168
|
+
body: input
|
|
14169
|
+
});
|
|
14170
|
+
if (!uploadResponse.ok) {
|
|
14171
|
+
const text$1 = await uploadResponse.text().catch(() => "");
|
|
14172
|
+
throw new Error(`Failed to upload file to OSS: ${uploadResponse.status} ${uploadResponse.statusText} ${text$1}`);
|
|
14173
|
+
}
|
|
14174
|
+
consola.info(`Mineru task started, polling results (batch_id: ${batchId})...`);
|
|
14175
|
+
const statusUrl = `${baseURL}/extract-results/batch/${batchId}`;
|
|
14176
|
+
const maxPollAttempts = 120;
|
|
14177
|
+
const pollIntervalMs = process.env.NODE_ENV === "test" ? 1 : 5e3;
|
|
14178
|
+
let attempts = 0;
|
|
14179
|
+
let zipUrl = "";
|
|
14180
|
+
let totalPages = 1;
|
|
14181
|
+
while (attempts < maxPollAttempts) {
|
|
14182
|
+
attempts++;
|
|
14183
|
+
await new Promise((resolve) => setTimeout(resolve, pollIntervalMs));
|
|
14184
|
+
const pollResponse = await fetch(statusUrl, {
|
|
14185
|
+
method: "GET",
|
|
14186
|
+
headers: { Authorization: `Bearer ${token}` }
|
|
14187
|
+
});
|
|
14188
|
+
if (!pollResponse.ok) {
|
|
14189
|
+
consola.warn(`Poll request failed: ${pollResponse.statusText}. Retrying...`);
|
|
14190
|
+
continue;
|
|
14191
|
+
}
|
|
14192
|
+
const pollJson = await pollResponse.json();
|
|
14193
|
+
if (pollJson.code !== 0) throw new Error(`Mineru API poll error: ${pollJson.msg || JSON.stringify(pollJson)}`);
|
|
14194
|
+
const extractResultList = pollJson.data?.extract_result;
|
|
14195
|
+
if (!extractResultList || !extractResultList.length) throw new Error(`Mineru API did not return extraction results: ${JSON.stringify(pollJson)}`);
|
|
14196
|
+
const result = extractResultList[0];
|
|
14197
|
+
const state = result.state;
|
|
14198
|
+
consola.info(`Mineru parsing state: ${state} (attempt ${attempts}/${maxPollAttempts})`);
|
|
14199
|
+
if (state === "done") {
|
|
14200
|
+
zipUrl = result.full_zip_url;
|
|
14201
|
+
if (result.extract_progress?.total_pages) totalPages = result.extract_progress.total_pages;
|
|
14202
|
+
break;
|
|
14203
|
+
}
|
|
14204
|
+
if (state === "failed") throw new Error(`Mineru extraction failed: ${result.err_msg || "Unknown error"}`);
|
|
14205
|
+
}
|
|
14206
|
+
if (!zipUrl) throw new Error(`Mineru extraction timed out after ${maxPollAttempts * pollIntervalMs / 1e3} seconds`);
|
|
14207
|
+
consola.info("Downloading result ZIP from Mineru...");
|
|
14208
|
+
const zipResponse = await fetch(zipUrl);
|
|
14209
|
+
if (!zipResponse.ok) throw new Error(`Failed to download result zip: ${zipResponse.statusText}`);
|
|
14210
|
+
const arrayBuffer = await zipResponse.arrayBuffer();
|
|
14211
|
+
const zipBuffer = Buffer.from(arrayBuffer);
|
|
14212
|
+
consola.info("Extracting Markdown content...");
|
|
14213
|
+
const entries = new AdmZip(zipBuffer).getEntries();
|
|
14214
|
+
let mdEntry = entries.find((e) => e.entryName === "full.md" || e.entryName.endsWith("full.md"));
|
|
14215
|
+
if (!mdEntry) mdEntry = entries.find((e) => e.entryName.endsWith(".md"));
|
|
14216
|
+
if (!mdEntry) throw new Error("Could not find any Markdown (.md) file inside the Mineru result zip");
|
|
14217
|
+
return {
|
|
14218
|
+
text: mdEntry.getData().toString("utf8"),
|
|
14219
|
+
pageCount: totalPages
|
|
14220
|
+
};
|
|
14221
|
+
}
|
|
14222
|
+
};
|
|
14223
|
+
|
|
14117
14224
|
//#endregion
|
|
14118
14225
|
//#region src/core/pdf-converter/unpdf.ts
|
|
14119
14226
|
var UnpdfConverter = class {
|
|
@@ -14172,6 +14279,7 @@ function createPdfConverter(config) {
|
|
|
14172
14279
|
const mineruConfig = config.mineru ?? DEFAULT_MINERU_CONFIG;
|
|
14173
14280
|
return withFallback(new ExternalCommandPdfConverter("mineru", mineruConfig), mineruConfig);
|
|
14174
14281
|
}
|
|
14282
|
+
if (config.converter === "mineru_api") return new MineruApiPdfConverter(config.mineruApi ?? DEFAULT_MINERU_API_CONFIG);
|
|
14175
14283
|
if (config.converter === "markitdown") {
|
|
14176
14284
|
const markitdownConfig = config.markitdown ?? DEFAULT_MARKITDOWN_CONFIG;
|
|
14177
14285
|
return withFallback(new ExternalCommandPdfConverter("markitdown", markitdownConfig), markitdownConfig);
|
|
@@ -14182,7 +14290,7 @@ function createPdfConverter(config) {
|
|
|
14182
14290
|
}
|
|
14183
14291
|
if (config.converter === "external") {
|
|
14184
14292
|
if (!config.external) throw new Error(t("errors.pdf.externalNotConfigured"));
|
|
14185
|
-
return
|
|
14293
|
+
return new ExternalCommandPdfConverter("external", config.external);
|
|
14186
14294
|
}
|
|
14187
14295
|
}
|
|
14188
14296
|
const key = typeof config === "string" ? config : "unpdf";
|
|
@@ -74,7 +74,7 @@ function doctorDiagnosticsTableRows(d) {
|
|
|
74
74
|
//#endregion
|
|
75
75
|
//#region package.json
|
|
76
76
|
var name = "aiex-cli";
|
|
77
|
-
var version = "0.0.
|
|
77
|
+
var version = "0.0.5-beta.1";
|
|
78
78
|
var description = "JSON Schema → SQLite with AI-powered data extraction";
|
|
79
79
|
var package_default = {
|
|
80
80
|
name,
|
|
@@ -141,6 +141,7 @@ var package_default = {
|
|
|
141
141
|
"@langfuse/otel": "catalog:",
|
|
142
142
|
"@notionhq/client": "catalog:",
|
|
143
143
|
"@opentelemetry/sdk-trace-node": "catalog:",
|
|
144
|
+
"adm-zip": "catalog:",
|
|
144
145
|
"ai": "catalog:",
|
|
145
146
|
"better-sqlite3": "catalog:",
|
|
146
147
|
"chokidar": "catalog:",
|
|
@@ -176,6 +177,7 @@ var package_default = {
|
|
|
176
177
|
devDependencies: {
|
|
177
178
|
"@antfu/eslint-config": "catalog:cli",
|
|
178
179
|
"@antfu/ni": "catalog:cli",
|
|
180
|
+
"@types/adm-zip": "catalog:",
|
|
179
181
|
"@types/better-sqlite3": "catalog:types",
|
|
180
182
|
"@types/jsonfile": "catalog:",
|
|
181
183
|
"@types/node": "catalog:types",
|
|
@@ -238,21 +240,30 @@ const ImageOcrConfigSchema = z.object({
|
|
|
238
240
|
});
|
|
239
241
|
const ExternalPdfConverterConfigSchema = z.object({
|
|
240
242
|
command: z.string().min(1),
|
|
241
|
-
args: z.array(z.string()),
|
|
243
|
+
args: z.array(z.string()).min(1).refine((args) => args.some((arg) => arg.includes("{input}")), { message: "args must contain {input} template variable" }),
|
|
242
244
|
outputFile: z.string().min(1).optional(),
|
|
243
245
|
timeout: z.number().int().positive().default(600).optional(),
|
|
244
|
-
fallbackToUnpdf: z.boolean().optional()
|
|
245
|
-
|
|
246
|
+
fallbackToUnpdf: z.boolean().optional()
|
|
247
|
+
});
|
|
248
|
+
const MineruApiPdfConverterConfigSchema = z.object({
|
|
249
|
+
token: z.string(),
|
|
250
|
+
baseURL: z.string().url().optional(),
|
|
251
|
+
modelVersion: z.string().optional(),
|
|
252
|
+
isOcr: z.boolean().optional(),
|
|
253
|
+
enableFormula: z.boolean().optional(),
|
|
254
|
+
enableTable: z.boolean().optional()
|
|
246
255
|
});
|
|
247
256
|
const PdfConfigSchema = z.object({
|
|
248
257
|
converter: z.enum([
|
|
249
258
|
"unpdf",
|
|
250
259
|
"mineru",
|
|
260
|
+
"mineru_api",
|
|
251
261
|
"markitdown",
|
|
252
262
|
"marker",
|
|
253
263
|
"external"
|
|
254
264
|
]),
|
|
255
265
|
mineru: ExternalPdfConverterConfigSchema.optional(),
|
|
266
|
+
mineruApi: MineruApiPdfConverterConfigSchema.optional(),
|
|
256
267
|
markitdown: ExternalPdfConverterConfigSchema.optional(),
|
|
257
268
|
marker: ExternalPdfConverterConfigSchema.optional(),
|
|
258
269
|
external: ExternalPdfConverterConfigSchema.optional()
|
|
@@ -339,8 +350,7 @@ const DEFAULT_MINERU_CONFIG = {
|
|
|
339
350
|
"{outputDir}"
|
|
340
351
|
],
|
|
341
352
|
timeout: 600,
|
|
342
|
-
fallbackToUnpdf: true
|
|
343
|
-
keepOutput: true
|
|
353
|
+
fallbackToUnpdf: true
|
|
344
354
|
};
|
|
345
355
|
const DEFAULT_MARKITDOWN_CONFIG = {
|
|
346
356
|
command: "markitdown",
|
|
@@ -349,10 +359,8 @@ const DEFAULT_MARKITDOWN_CONFIG = {
|
|
|
349
359
|
"-o",
|
|
350
360
|
"{outputDir}/{basename}.md"
|
|
351
361
|
],
|
|
352
|
-
outputFile: "{outputDir}/{basename}.md",
|
|
353
362
|
timeout: 600,
|
|
354
|
-
fallbackToUnpdf: true
|
|
355
|
-
keepOutput: true
|
|
363
|
+
fallbackToUnpdf: true
|
|
356
364
|
};
|
|
357
365
|
const DEFAULT_MARKER_CONFIG = {
|
|
358
366
|
command: "marker_single",
|
|
@@ -361,14 +369,21 @@ const DEFAULT_MARKER_CONFIG = {
|
|
|
361
369
|
"--output_dir",
|
|
362
370
|
"{outputDir}"
|
|
363
371
|
],
|
|
364
|
-
outputFile: "{outputDir}/{basename}/{basename}.md",
|
|
365
372
|
timeout: 600,
|
|
366
|
-
fallbackToUnpdf: true
|
|
367
|
-
|
|
373
|
+
fallbackToUnpdf: true
|
|
374
|
+
};
|
|
375
|
+
const DEFAULT_MINERU_API_CONFIG = {
|
|
376
|
+
token: "",
|
|
377
|
+
baseURL: "https://mineru.net/api/v4",
|
|
378
|
+
modelVersion: "vlm",
|
|
379
|
+
isOcr: true,
|
|
380
|
+
enableFormula: true,
|
|
381
|
+
enableTable: true
|
|
368
382
|
};
|
|
369
383
|
const DEFAULT_PDF_CONFIG = {
|
|
370
384
|
converter: "unpdf",
|
|
371
385
|
mineru: DEFAULT_MINERU_CONFIG,
|
|
386
|
+
mineruApi: DEFAULT_MINERU_API_CONFIG,
|
|
372
387
|
markitdown: DEFAULT_MARKITDOWN_CONFIG,
|
|
373
388
|
marker: DEFAULT_MARKER_CONFIG
|
|
374
389
|
};
|
|
@@ -698,7 +713,8 @@ const en = {
|
|
|
698
713
|
},
|
|
699
714
|
pdf: {
|
|
700
715
|
externalNotConfigured: "External PDF converter is selected but no external command is configured.",
|
|
701
|
-
converterRequiresConfig: "PDF converter \"{{name}}\" requires configuration."
|
|
716
|
+
converterRequiresConfig: "PDF converter \"{{name}}\" requires configuration.",
|
|
717
|
+
mineruApiTokenRequired: "Mineru API Token is not configured. Please configure it in AI Settings."
|
|
702
718
|
},
|
|
703
719
|
extractionAudit: {
|
|
704
720
|
recordNotFound: "Extraction audit record not found: {{id}}",
|
|
@@ -940,7 +956,7 @@ async function initI18n(lng) {
|
|
|
940
956
|
fallbackLng: "en",
|
|
941
957
|
resources: {
|
|
942
958
|
"en": { translation: en },
|
|
943
|
-
"zh-CN": { translation: await import("./zh-CN-
|
|
959
|
+
"zh-CN": { translation: await import("./zh-CN-Qcn0DHFh.mjs").then((m) => m.zhCN) }
|
|
944
960
|
},
|
|
945
961
|
interpolation: { escapeValue: false },
|
|
946
962
|
returnNull: false
|
|
@@ -1520,4 +1536,4 @@ async function collectDoctorDiagnostics(options = {}) {
|
|
|
1520
1536
|
}
|
|
1521
1537
|
|
|
1522
1538
|
//#endregion
|
|
1523
|
-
export {
|
|
1539
|
+
export { doctorDiagnosticsTableRows as A, createConfig as C, package_default as D, name as E, version as O, AIConfigSchema as S, description as T, DEFAULT_MINERU_API_CONFIG as _, parseJsonSchema as a, PLACEHOLDER_SCHEMA as b, recognizeImageText as c, t as d, getDefaultAIConfig as f, DEFAULT_MARKITDOWN_CONFIG as g, DEFAULT_MARKER_CONFIG as h, JsonSchemaDefinitionSchema as i, formatDoctorDiagnosticsJson as j, buildDoctorDiagnostics as k, shouldUseImageOcrFallback as l, writeAIConfig as m, createMigrationConfig as n, toSnakeCase as o, readAIConfig as p, generateDrizzleConfig as r, generateDrizzleSchema as s, collectDoctorDiagnostics as t, initI18n as u, DEFAULT_MINERU_CONFIG as v, seedConfig as w, PLACEHOLDER_TEXT as x, DEFAULT_PROMPT_CONFIG as y };
|
package/dist/index.mjs
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import { A as
|
|
1
|
+
import { A as doctorDiagnosticsTableRows, a as parseJsonSchema, i as JsonSchemaDefinitionSchema, j as formatDoctorDiagnosticsJson, k as buildDoctorDiagnostics, n as createMigrationConfig, r as generateDrizzleConfig, s as generateDrizzleSchema, t as collectDoctorDiagnostics } from "./doctor-collector-GXgIY3DH.mjs";
|
|
2
2
|
|
|
3
3
|
export { JsonSchemaDefinitionSchema, buildDoctorDiagnostics, collectDoctorDiagnostics, createMigrationConfig, doctorDiagnosticsTableRows, formatDoctorDiagnosticsJson, generateDrizzleConfig, generateDrizzleSchema, parseJsonSchema };
|