aiex-cli 0.1.1-beta.5 → 0.1.1-beta.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.mjs CHANGED
@@ -1,11 +1,10 @@
1
- import { A as package_default, C as DEFAULT_PROMPT_CONFIG, D as seedConfig, E as createConfig, O as description, S as DEFAULT_MINERU_CONFIG, T as PLACEHOLDER_TEXT, _ as doctorDiagnosticsSeverityRows, a as recognizeImageText, b as DEFAULT_LITEPARSE_CONFIG, c as t, d as writeAIConfig, f as AIConfigSchema, h as toSnakeCase, j as version, k as name, l as getDefaultAIConfig, m as parseJsonSchema, n as collectDoctorDiagnostics, o as shouldUseImageOcrFallback, p as JsonSchemaDefinitionSchema, r as createMigrationConfig, s as initI18n, t as generateDrizzleSchema, u as readAIConfig, v as doctorDiagnosticsTableRows, w as PLACEHOLDER_SCHEMA, x as DEFAULT_MINERU_API_CONFIG, y as formatDoctorDiagnosticsJson } from "./generate-drizzle-schema-DpHYeu2z.mjs";
1
+ import { A as seedConfig, C as CORRECTION_SYSTEM_PROMPT, D as PLACEHOLDER_TEXT, E as PLACEHOLDER_SCHEMA, M as name, N as package_default, O as buildCorrectionUserPrompt, P as version, S as DEFAULT_MINERU_CONFIG, T as EVIDENCE_INSTRUCTIONS, _ as doctorDiagnosticsSeverityRows, a as recognizeImageText, b as DEFAULT_LITEPARSE_CONFIG, c as t, d as readAIConfig, f as writeAIConfig, h as parseJsonSchema, j as description, k as createConfig, l as createProjectDatabase, m as JsonSchemaDefinitionSchema, n as collectDoctorDiagnostics, o as shouldUseImageOcrFallback, p as AIConfigSchema, r as createMigrationConfig, s as initI18n, t as generateDrizzleSchema, u as getDefaultAIConfig, v as doctorDiagnosticsTableRows, w as DEFAULT_PROMPT_CONFIG, x as DEFAULT_MINERU_API_CONFIG, y as formatDoctorDiagnosticsJson } from "./generate-drizzle-schema-B6ocPcWd.mjs";
2
2
  import { createRequire } from "node:module";
3
3
  import fs from "node:fs/promises";
4
4
  import os from "node:os";
5
5
  import path from "node:path";
6
6
  import process from "node:process";
7
7
  import { fileURLToPath } from "node:url";
8
- import Database from "better-sqlite3";
9
8
  import { execa } from "execa";
10
9
  import { readFile, writeFile } from "jsonfile";
11
10
  import { ZodError, z } from "zod";
@@ -39,7 +38,6 @@ import { serveStatic } from "@hono/node-server/serve-static";
39
38
  import { Hono } from "hono";
40
39
  import { cors } from "hono/cors";
41
40
  import { zValidator } from "@hono/zod-validator";
42
- import { Kysely, SqliteDialect, sql } from "kysely";
43
41
 
44
42
  //#region src/infrastructure/completion/completion-scripts.ts
45
43
  function bashScript(name$1) {
@@ -164,6 +162,11 @@ function classifyInputError(error, inputProcessing) {
164
162
  if (message.includes("pdf") || message.includes("converter")) return "file_conversion";
165
163
  return "input_detection";
166
164
  }
165
+ function qualityGateError(quality) {
166
+ const invalidEvidenceFields = quality?.ai?.evidence?.invalidFields ?? [];
167
+ if (invalidEvidenceFields.length > 0) return `Evidence mismatch for field(s): ${invalidEvidenceFields.join(", ")}`;
168
+ return null;
169
+ }
167
170
 
168
171
  //#endregion
169
172
  //#region src/infrastructure/input/detect-file-kind.ts
@@ -619,26 +622,43 @@ const FILE_PART_EXTENSIONS = new Set([
619
622
  "webp"
620
623
  ]);
621
624
  const PDF_EXT_RE = /\.pdf$/i;
625
+ function textStatus(text$1) {
626
+ return text$1.trim().length > 0 ? "parsed" : "partially_parsed";
627
+ }
628
+ function withProcessingResult(inputProcessing, status, diagnostics = {}, warnings = []) {
629
+ return {
630
+ ...inputProcessing,
631
+ status,
632
+ diagnostics,
633
+ warnings: warnings.length ? warnings : void 0
634
+ };
635
+ }
622
636
  async function describeExtractFileInput(filePath, aiConfig, modelOverride) {
623
637
  const detected = await detectInputFileKind(filePath);
624
- if (detected.kind === "image") return {
625
- kind: "image",
626
- mime: detected.mime,
627
- handler: shouldUseImageOcrFallback(aiConfig, modelOverride) ? "image_local_ocr" : "image_vision"
628
- };
638
+ if (detected.kind === "image") {
639
+ const useLocalOcr = shouldUseImageOcrFallback(aiConfig, modelOverride);
640
+ return {
641
+ kind: "image",
642
+ mime: detected.mime,
643
+ handler: useLocalOcr ? "image_local_ocr" : "image_vision",
644
+ parser: useLocalOcr ? "system_ocr" : "vision_model"
645
+ };
646
+ }
629
647
  if (detected.kind === "pdf") {
630
648
  const converter = createPdfConverter(aiConfig?.pdf);
631
649
  return {
632
650
  kind: "pdf",
633
651
  mime: detected.mime,
634
652
  handler: "pdf_converter",
635
- converter: converter.name
653
+ converter: converter.name,
654
+ parser: converter.name
636
655
  };
637
656
  }
638
657
  if (detected.kind === "text") return {
639
658
  kind: "text",
640
659
  mime: detected.mime,
641
- handler: "text"
660
+ handler: "text",
661
+ parser: "utf8_text"
642
662
  };
643
663
  throw new Error(unsupportedFileTypeMessage(detected.mime ?? "application/octet-stream"));
644
664
  }
@@ -656,7 +676,11 @@ async function readExtractFileInput(filePath, aiConfig, modelOverride) {
656
676
  consola.info(t("command.extract.file.ocrText", { confidence: (result.confidence * 100).toFixed(1) }));
657
677
  return {
658
678
  text: result.text,
659
- inputProcessing,
679
+ inputProcessing: withProcessingResult(inputProcessing, textStatus(result.text), {
680
+ confidence: result.confidence,
681
+ textLength: result.text.length,
682
+ platform: process.platform
683
+ }),
660
684
  quality: { input: {
661
685
  kind: "image",
662
686
  textLength: result.text.length,
@@ -672,7 +696,7 @@ async function readExtractFileInput(filePath, aiConfig, modelOverride) {
672
696
  return {
673
697
  text: "",
674
698
  filePath,
675
- inputProcessing,
699
+ inputProcessing: withProcessingResult(inputProcessing, "parsed", { filePart: true }),
676
700
  quality: { input: { kind: "image" } }
677
701
  };
678
702
  }
@@ -695,17 +719,23 @@ async function readExtractFileInput(filePath, aiConfig, modelOverride) {
695
719
  consola.info(t("command.extract.file.markdownSaved", { path: fallbackMd }));
696
720
  }
697
721
  const textLength = result.text.length;
722
+ const emptyText = result.text.trim().length === 0;
698
723
  return {
699
724
  text: result.text,
700
- inputProcessing,
725
+ inputProcessing: withProcessingResult(inputProcessing, emptyText ? "partially_parsed" : "parsed", {
726
+ pageCount: result.pageCount,
727
+ textLength,
728
+ emptyText,
729
+ fallbackUsed: result.metadata?.fallback === "true"
730
+ }, result.warnings),
701
731
  quality: { input: {
702
732
  kind: "pdf",
703
733
  textLength,
704
- emptyText: result.text.trim().length === 0,
734
+ emptyText,
705
735
  pdf: {
706
736
  pageCount: result.pageCount,
707
737
  textLength,
708
- emptyText: result.text.trim().length === 0,
738
+ emptyText,
709
739
  fallbackUsed: result.metadata?.fallback === "true",
710
740
  converter: result.metadata?.converter ?? converter.name
711
741
  }
@@ -716,7 +746,10 @@ async function readExtractFileInput(filePath, aiConfig, modelOverride) {
716
746
  const text$1 = await fs.readFile(filePath, "utf-8");
717
747
  return {
718
748
  text: text$1,
719
- inputProcessing,
749
+ inputProcessing: withProcessingResult(inputProcessing, textStatus(text$1), {
750
+ textLength: text$1.length,
751
+ emptyText: text$1.trim().length === 0
752
+ }),
720
753
  quality: { input: {
721
754
  kind: "text",
722
755
  textLength: text$1.length,
@@ -1223,6 +1256,46 @@ function verifyFieldEvidence(input) {
1223
1256
  }
1224
1257
  return Object.keys(verified).length > 0 ? verified : void 0;
1225
1258
  }
1259
+ function isEvidenceEligibleProperty(property) {
1260
+ return property.type === "string" || property.type === "number" || property.type === "integer";
1261
+ }
1262
+ function buildFieldEvidenceQuality(input) {
1263
+ if (!isRecord$3(input.data)) return void 0;
1264
+ const fieldStatus = {};
1265
+ const supportedFields = [];
1266
+ const unsupportedFields = [];
1267
+ const missingFields = [];
1268
+ const invalidFields = [];
1269
+ for (const [field, property] of Object.entries(input.schema.properties)) {
1270
+ if (!isEvidenceEligibleProperty(property)) continue;
1271
+ const value = input.data[field];
1272
+ let status;
1273
+ if (value === null || value === void 0) {
1274
+ status = "missing";
1275
+ missingFields.push(field);
1276
+ } else if (input.verifiedEvidence?.[field]) {
1277
+ status = "supported";
1278
+ supportedFields.push(field);
1279
+ } else if (input.rawEvidence?.[field]) {
1280
+ status = "invalid";
1281
+ invalidFields.push(field);
1282
+ } else {
1283
+ status = "unsupported";
1284
+ unsupportedFields.push(field);
1285
+ }
1286
+ fieldStatus[field] = status;
1287
+ }
1288
+ const total = Object.keys(fieldStatus).length;
1289
+ if (total === 0) return void 0;
1290
+ return {
1291
+ fieldStatus,
1292
+ supportedFields,
1293
+ unsupportedFields,
1294
+ missingFields,
1295
+ invalidFields,
1296
+ supportedRate: supportedFields.length / total
1297
+ };
1298
+ }
1226
1299
 
1227
1300
  //#endregion
1228
1301
  //#region src/domain/ai-extraction/json-utils.ts
@@ -1513,12 +1586,6 @@ function validateExtractedData(schema, data) {
1513
1586
 
1514
1587
  //#endregion
1515
1588
  //#region src/domain/extraction/evidence-schema.ts
1516
- const EVIDENCE_INSTRUCTIONS = `Evidence requirements:
1517
- - Also return a top-level "_evidence" object.
1518
- - For each top-level scalar field you extracted from the text, include "_evidence.<field>.quote".
1519
- - The quote must be an exact contiguous substring copied from the input text.
1520
- - Do not invent offsets. Only provide quotes.
1521
- - If no exact quote supports a field, omit that field from "_evidence".`;
1522
1589
  function isRecord$1(value) {
1523
1590
  return typeof value === "object" && value !== null && !Array.isArray(value);
1524
1591
  }
@@ -1833,6 +1900,12 @@ async function extractStructuredData(input) {
1833
1900
  data: businessData,
1834
1901
  rawEvidence: stripped.rawEvidence
1835
1902
  }) : void 0;
1903
+ const evidenceQuality = canLocateEvidence ? buildFieldEvidenceQuality({
1904
+ schema,
1905
+ data: businessData,
1906
+ rawEvidence: stripped.rawEvidence,
1907
+ verifiedEvidence: evidence
1908
+ }) : void 0;
1836
1909
  return {
1837
1910
  success: true,
1838
1911
  outputPath: await writeExtractionOutput({
@@ -1854,7 +1927,8 @@ async function extractStructuredData(input) {
1854
1927
  selfCorrectionCount: attempt - 1,
1855
1928
  apiRetryCount,
1856
1929
  missingFields: missing.fields,
1857
- missingFieldRate: missing.rate
1930
+ missingFieldRate: missing.rate,
1931
+ evidence: evidenceQuality
1858
1932
  } }
1859
1933
  };
1860
1934
  } else validationError = validation.error;
@@ -1863,29 +1937,14 @@ async function extractStructuredData(input) {
1863
1937
  lastError = errorMsg;
1864
1938
  if (attempt < maxAttempts) {
1865
1939
  const invalidJson = data !== void 0 ? JSON.stringify(canLocateEvidence ? stripEvidence(data).data : data, null, 2) : result ? result.text : "";
1866
- systemPrompt = `You are a precise data correction assistant. Your task is to correct validation errors in a previously generated JSON object to make it comply with the provided JSON Schema.
1867
-
1868
- CRITICAL RULES:
1869
- 1. Only correct the fields that failed validation.
1870
- 2. Preserve all other correctly extracted fields and their values exactly.
1871
- 3. Return ONLY the corrected JSON object. No explanations, no markdown blocks other than JSON.`;
1872
- userPrompt = `The JSON data you generated previously failed validation. Please correct it.
1873
-
1874
- [Original Text]
1875
- ${text$1 || "Data is contained in the attached file."}
1876
-
1877
- [JSON Schema Definition]
1878
- ${JSON.stringify(schemaToExtractionOutputSchema(schema), null, 2)}
1879
-
1880
- [Previously Generated Invalid JSON]
1881
- ${invalidJson}
1882
-
1883
- [Validation Error Details]
1884
- ${errorMsg}
1885
-
1886
- ${canLocateEvidence ? EVIDENCE_INSTRUCTIONS : ""}
1887
-
1888
- Please output the corrected JSON object now:`;
1940
+ systemPrompt = CORRECTION_SYSTEM_PROMPT;
1941
+ userPrompt = buildCorrectionUserPrompt({
1942
+ text: text$1,
1943
+ schema: schemaToExtractionOutputSchema(schema),
1944
+ invalidJson,
1945
+ error: errorMsg,
1946
+ includeEvidenceInstructions: canLocateEvidence
1947
+ });
1889
1948
  }
1890
1949
  }
1891
1950
  return {
@@ -1953,161 +2012,23 @@ async function listSchemas(aiexDir) {
1953
2012
  }
1954
2013
  }
1955
2014
 
1956
- //#endregion
1957
- //#region src/infrastructure/extraction/insert-extracted-data.ts
1958
- function convertValue(value, column) {
1959
- if (value === null || value === void 0) return null;
1960
- const mode = column.columnType.class !== "real" ? column.columnType.mode : void 0;
1961
- if (mode === "json") return typeof value === "string" ? value : JSON.stringify(value);
1962
- if (mode === "boolean") return value ? 1 : 0;
1963
- if (mode === "timestamp" || mode === "timestamp_ms") {
1964
- if (typeof value === "string") {
1965
- const ms = Date.parse(value);
1966
- if (Number.isNaN(ms)) return value;
1967
- return mode === "timestamp_ms" ? ms : Math.floor(ms / 1e3);
1968
- }
1969
- return value;
1970
- }
1971
- return value;
1972
- }
1973
- function buildInsertSql(table, data) {
1974
- const columns = [];
1975
- const values = [];
1976
- for (const col of table.columns) {
1977
- if (col.isAutoIncrement) continue;
1978
- const value = data[col.name];
1979
- if (value === void 0) {
1980
- if (col.default !== void 0) {
1981
- columns.push(col.name);
1982
- values.push(convertValue(col.default, col));
1983
- }
1984
- continue;
1985
- }
1986
- columns.push(col.name);
1987
- values.push(convertValue(value, col));
1988
- }
1989
- const placeholders = values.map(() => "?").join(", ");
1990
- return {
1991
- sql: `INSERT INTO ${table.name} (${columns.join(", ")}) VALUES (${placeholders})`,
1992
- values
1993
- };
1994
- }
1995
- function insertTableRow({ db, table, data, parentRowId, foreignKeyColumn }) {
1996
- const rowData = { ...data };
1997
- if (parentRowId !== void 0 && foreignKeyColumn) rowData[foreignKeyColumn] = parentRowId;
1998
- const { sql: sql$1, values } = buildInsertSql(table, rowData);
1999
- const info = db.prepare(sql$1).run(...values);
2000
- return Number(info.lastInsertRowid);
2001
- }
2002
- function parseDataByColumns(data, schema, table) {
2003
- const result = {};
2004
- if ("properties" in schema) {
2005
- const s = schema;
2006
- for (const [propName, prop] of Object.entries(s.properties)) {
2007
- if (prop.nested?.enabled) continue;
2008
- if (prop.type === "array" && prop.items?.nested?.enabled) continue;
2009
- const colName = toSnakeCase(propName);
2010
- if (table.columns.some((c) => c.name === colName && c.isAutoIncrement)) continue;
2011
- if (propName in data) result[colName] = data[propName];
2012
- }
2013
- }
2014
- if (schema.table?.timestamps) {
2015
- if (!("created_at" in result)) result.created_at = Math.floor(Date.now() / 1e3);
2016
- if (!("updated_at" in result)) result.updated_at = Math.floor(Date.now() / 1e3);
2017
- }
2018
- return result;
2019
- }
2020
- function insertExtractedData(db, schema, data) {
2021
- const inserted = [];
2022
- try {
2023
- const parseResult = parseJsonSchema(schema);
2024
- const mainTable = parseResult.tables[0];
2025
- db.transaction(() => {
2026
- const mainRowId = insertTableRow({
2027
- db,
2028
- table: mainTable,
2029
- data: parseDataByColumns(data, schema, mainTable)
2030
- });
2031
- inserted.push({
2032
- table: mainTable.name,
2033
- rowId: mainRowId
2034
- });
2035
- for (const revRel of parseResult.reverseRelations) {
2036
- const rel = parseResult.relations.find((r) => r.fromTable === revRel.toTable && r.toTable === revRel.fromTable);
2037
- if (!rel) continue;
2038
- const propEntry = Object.entries(schema.properties).find(([key]) => toSnakeCase(key) === revRel.name && key in data);
2039
- if (!propEntry) continue;
2040
- const [propName] = propEntry;
2041
- const nestedValue = data[propName];
2042
- if (nestedValue === null || nestedValue === void 0) continue;
2043
- const nestedTable = parseResult.tables.find((t$1) => t$1.name === revRel.toTable);
2044
- if (!nestedTable) continue;
2045
- if (revRel.type === "has-one") {
2046
- const rowId = insertTableRow({
2047
- db,
2048
- table: nestedTable,
2049
- data: parseDataByColumns(nestedValue, schema.properties[propName], nestedTable),
2050
- parentRowId: mainRowId,
2051
- foreignKeyColumn: rel.fromColumn
2052
- });
2053
- inserted.push({
2054
- table: revRel.toTable,
2055
- rowId
2056
- });
2057
- } else if (revRel.type === "has-many") {
2058
- const items = nestedValue;
2059
- for (const item of items) {
2060
- const rowId = insertTableRow({
2061
- db,
2062
- table: nestedTable,
2063
- data: parseDataByColumns(item, schema.properties[propName].items, nestedTable),
2064
- parentRowId: mainRowId,
2065
- foreignKeyColumn: rel.fromColumn
2066
- });
2067
- inserted.push({
2068
- table: revRel.toTable,
2069
- rowId
2070
- });
2071
- }
2072
- }
2073
- }
2074
- return mainRowId;
2075
- })();
2076
- return {
2077
- success: true,
2078
- tablesInserted: inserted
2079
- };
2080
- } catch (e) {
2081
- return {
2082
- success: false,
2083
- tablesInserted: inserted,
2084
- error: e instanceof Error ? e.message : String(e)
2085
- };
2086
- }
2087
- }
2088
-
2089
2015
  //#endregion
2090
2016
  //#region src/application/extraction/run-extraction.ts
2091
- async function ensureDatabaseReady(dbPath, schema) {
2017
+ async function ensureDatabaseReady(config, schema) {
2018
+ const database = createProjectDatabase(config);
2019
+ if (!await database.exists()) return t("errors.db.notFound", {
2020
+ path: pc.cyan(".aiex/database.db"),
2021
+ cmd: pc.cyan("aiex schema")
2022
+ });
2092
2023
  try {
2093
- await fs.access(dbPath);
2094
- } catch {
2095
- return t("errors.db.notFound", {
2096
- path: pc.cyan(".aiex/database.db"),
2024
+ const result = parseJsonSchema(schema);
2025
+ const tableCheck = await database.verifyTables(result.tables.map((table) => table.name));
2026
+ if (tableCheck.error) return t("errors.db.cannotVerify", { error: tableCheck.error });
2027
+ const missing = tableCheck.missing[0];
2028
+ if (missing) return t("errors.db.tableNotFound", {
2029
+ name: missing,
2097
2030
  cmd: pc.cyan("aiex schema")
2098
2031
  });
2099
- }
2100
- try {
2101
- const result = parseJsonSchema(schema);
2102
- const db = new Database(dbPath);
2103
- try {
2104
- for (const table of result.tables) if (!db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name=?`).get(table.name)) return t("errors.db.tableNotFound", {
2105
- name: table.name,
2106
- cmd: pc.cyan("aiex schema")
2107
- });
2108
- } finally {
2109
- db.close();
2110
- }
2111
2032
  } catch (e) {
2112
2033
  return t("errors.db.cannotVerify", { error: e instanceof Error ? e.message : String(e) });
2113
2034
  }
@@ -2160,55 +2081,75 @@ async function extractSingle(aiexDir, config, aiConfig, schemaName, text$1, file
2160
2081
  total: result.tokensUsed.total
2161
2082
  })));
2162
2083
  if (result.data && options?.insert !== false) {
2084
+ const qualityError = qualityGateError(result.quality);
2085
+ if (qualityError) {
2086
+ consola.error(qualityError);
2087
+ return {
2088
+ success: false,
2089
+ error: qualityError,
2090
+ outputPath: result.outputPath,
2091
+ data: result.data,
2092
+ tokensUsed: result.tokensUsed,
2093
+ quality: result.quality,
2094
+ failureStage: "ai_extraction",
2095
+ evidence: result.evidence
2096
+ };
2097
+ }
2163
2098
  const s2 = spinner();
2164
2099
  if (!options?.quiet) s2.start(t("command.extract.file.insertingDb"));
2165
- const dbError = await ensureDatabaseReady(config.databasePath, schemaLoad.schema);
2100
+ const dbError = await ensureDatabaseReady(config, schemaLoad.schema);
2166
2101
  if (dbError) {
2167
2102
  if (!options?.quiet) s2.stop(t("command.extract.file.dbNotReady"));
2168
2103
  consola.error(dbError);
2169
2104
  return {
2170
2105
  success: false,
2171
2106
  error: dbError,
2107
+ outputPath: result.outputPath,
2108
+ data: result.data,
2109
+ tokensUsed: result.tokensUsed,
2172
2110
  quality: result.quality,
2173
- failureStage: "db_insert"
2111
+ failureStage: "db_insert",
2112
+ evidence: result.evidence
2174
2113
  };
2175
2114
  }
2176
2115
  try {
2177
- const db = new Database(config.databasePath);
2178
- try {
2179
- const insertResult = insertExtractedData(db, schemaLoad.schema, result.data);
2180
- if (insertResult.success) {
2181
- if (!options?.quiet) s2.stop(t("command.extract.file.insertedTables", { count: insertResult.tablesInserted.length }));
2182
- return {
2183
- success: true,
2184
- outputPath: result.outputPath,
2185
- data: result.data,
2186
- tablesInserted: insertResult.tablesInserted,
2187
- tokensUsed: result.tokensUsed,
2188
- quality: result.quality,
2189
- evidence: result.evidence
2190
- };
2191
- } else {
2192
- if (!options?.quiet) s2.stop(t("command.extract.file.dbInsertFail"));
2193
- consola.error(insertResult.error || t("common.unknownError"));
2194
- return {
2195
- success: false,
2196
- error: insertResult.error,
2197
- quality: result.quality,
2198
- failureStage: "db_insert"
2199
- };
2200
- }
2201
- } finally {
2202
- db.close();
2116
+ const insertResult = createProjectDatabase(config).insertExtracted(schemaLoad.schema, result.data);
2117
+ if (insertResult.success) {
2118
+ if (!options?.quiet) s2.stop(t("command.extract.file.insertedTables", { count: insertResult.tablesInserted.length }));
2119
+ return {
2120
+ success: true,
2121
+ outputPath: result.outputPath,
2122
+ data: result.data,
2123
+ tablesInserted: insertResult.tablesInserted,
2124
+ tokensUsed: result.tokensUsed,
2125
+ quality: result.quality,
2126
+ evidence: result.evidence
2127
+ };
2203
2128
  }
2129
+ if (!options?.quiet) s2.stop(t("command.extract.file.dbInsertFail"));
2130
+ consola.error(insertResult.error || t("common.unknownError"));
2131
+ return {
2132
+ success: false,
2133
+ error: insertResult.error,
2134
+ outputPath: result.outputPath,
2135
+ data: result.data,
2136
+ tokensUsed: result.tokensUsed,
2137
+ quality: result.quality,
2138
+ failureStage: "db_insert",
2139
+ evidence: result.evidence
2140
+ };
2204
2141
  } catch (e) {
2205
2142
  if (!options?.quiet) s2.stop(t("command.extract.file.dbInsertFail"));
2206
2143
  consola.error(e instanceof Error ? e.message : String(e));
2207
2144
  return {
2208
2145
  success: false,
2209
2146
  error: String(e),
2147
+ outputPath: result.outputPath,
2148
+ data: result.data,
2149
+ tokensUsed: result.tokensUsed,
2210
2150
  quality: result.quality,
2211
- failureStage: "db_insert"
2151
+ failureStage: "db_insert",
2152
+ evidence: result.evidence
2212
2153
  };
2213
2154
  }
2214
2155
  }
@@ -2227,19 +2168,7 @@ async function extractSingle(aiexDir, config, aiConfig, schemaName, text$1, file
2227
2168
  async function runAuditedExtraction(options) {
2228
2169
  const { aiexDir, config, aiConfig, schemaName, source, modelOverride, retryOf, insert, force, quiet = false } = options;
2229
2170
  let fileHash;
2230
- let isPlainTextFile = false;
2231
2171
  if (source.type === "file") {
2232
- const ext = path.extname(source.filePath).toLowerCase().replace(".", "");
2233
- isPlainTextFile = [
2234
- "txt",
2235
- "md",
2236
- "csv",
2237
- "json",
2238
- "html",
2239
- "xml",
2240
- "yaml",
2241
- "yml"
2242
- ].includes(ext);
2243
2172
  try {
2244
2173
  fileHash = await getFileHash(source.filePath);
2245
2174
  } catch (e) {
@@ -2248,7 +2177,7 @@ async function runAuditedExtraction(options) {
2248
2177
  error: e instanceof Error ? e.message : String(e)
2249
2178
  }));
2250
2179
  }
2251
- if (fileHash && !isPlainTextFile && !force) {
2180
+ if (fileHash && !force) {
2252
2181
  const existing = await findSucceededAuditByHash(aiexDir, schemaName, fileHash);
2253
2182
  if (existing) {
2254
2183
  if (!quiet) consola.info(t("command.extract.file.alreadyProcessed", {
@@ -2367,17 +2296,23 @@ async function runAuditedExtraction(options) {
2367
2296
  await updateExtractionAuditRecord(aiexDir, audit.id, {
2368
2297
  status: "failed",
2369
2298
  error: r.error || "Extraction failed",
2299
+ outputPath: r.outputPath,
2300
+ outputName: r.outputPath ? path.basename(r.outputPath) : void 0,
2301
+ tokensUsed: r.tokensUsed,
2370
2302
  quality: mergeQuality(inputQuality, r.quality),
2371
2303
  failureStage: r.failureStage ?? "ai_extraction",
2372
2304
  evidence: r.evidence
2373
2305
  });
2374
2306
  if (!quiet) consola.error(t("command.extract.file.extractionFailed", { error: r.error }));
2375
- await triggerWebhook(aiConfig, audit.id, schemaName, "extraction.failed", source, void 0, r.error || "Extraction failed", void 0, quiet);
2307
+ await triggerWebhook(aiConfig, audit.id, schemaName, "extraction.failed", source, r.data, r.error || "Extraction failed", r.tokensUsed, quiet);
2376
2308
  return {
2377
2309
  success: false,
2378
2310
  error: r.error,
2311
+ outputPath: r.outputPath,
2312
+ outputName: r.outputPath ? path.basename(r.outputPath) : void 0,
2379
2313
  auditId: audit.id,
2380
2314
  fileHash,
2315
+ tokensUsed: r.tokensUsed,
2381
2316
  inputProcessing,
2382
2317
  quality: mergeQuality(inputQuality, r.quality),
2383
2318
  failureStage: r.failureStage ?? "ai_extraction",
@@ -2798,7 +2733,7 @@ function analyzeMigrationRisk(previousEntries, nextEntries) {
2798
2733
  addRisk(items, "high", "column_removed", previous.table, previous.column, `Column "${key}" will be removed.`);
2799
2734
  continue;
2800
2735
  }
2801
- if (previous.sqliteType !== next.sqliteType || previous.drizzleType !== next.drizzleType) addRisk(items, "high", "column_type_changed", previous.table, previous.column, `Column "${key}" type changes from ${previous.drizzleType} to ${next.drizzleType}.`);
2736
+ if (previous.databaseType !== next.databaseType || previous.drizzleType !== next.drizzleType) addRisk(items, "high", "column_type_changed", previous.table, previous.column, `Column "${key}" type changes from ${previous.drizzleType} to ${next.drizzleType}.`);
2802
2737
  if (previous.nullable && !next.nullable) addRisk(items, "high", "nullable_tightened", previous.table, previous.column, `Column "${key}" changes from nullable to not null.`);
2803
2738
  else if (!previous.nullable && next.nullable) addRisk(items, "medium", "nullable_relaxed", previous.table, previous.column, `Column "${key}" changes from not null to nullable.`);
2804
2739
  if (!previous.unique && next.unique) addRisk(items, "high", "unique_added", previous.table, previous.column, `Column "${key}" adds a unique constraint.`);
@@ -2839,6 +2774,51 @@ function resolveHelperPath() {
2839
2774
  }
2840
2775
  }
2841
2776
 
2777
+ //#endregion
2778
+ //#region src/infrastructure/database/migration-runner.ts
2779
+ const execFileAsync = promisify(execFile);
2780
+ function parseMigrationOutput(stdout, stderr) {
2781
+ try {
2782
+ const jsonLine = stdout.trim().split("\n").find((l) => l.startsWith("{") && l.endsWith("}"));
2783
+ if (!jsonLine) return {
2784
+ success: false,
2785
+ error: t("errors.schema.migrationHelperInvalidOutput")
2786
+ };
2787
+ const result = JSON.parse(jsonLine);
2788
+ if (!result.success) return {
2789
+ success: false,
2790
+ error: result.error || t("errors.schema.migrationFailed")
2791
+ };
2792
+ return result;
2793
+ } catch {
2794
+ return {
2795
+ success: false,
2796
+ error: stderr || stdout || t("errors.schema.migrationHelperFailed")
2797
+ };
2798
+ }
2799
+ }
2800
+ async function runDatabaseMigration(config, migrationName) {
2801
+ const helperPath = resolveHelperPath();
2802
+ const helperArgs = [
2803
+ resolveTsxPath(),
2804
+ helperPath,
2805
+ config.drizzleSchemaPath,
2806
+ config.migrationsPath,
2807
+ config.databasePath
2808
+ ];
2809
+ if (migrationName) helperArgs.push(migrationName);
2810
+ try {
2811
+ const { stdout, stderr } = await execFileAsync(process.execPath, helperArgs, { cwd: process.cwd() });
2812
+ return parseMigrationOutput(stdout, stderr);
2813
+ } catch (error) {
2814
+ const execError = error;
2815
+ return {
2816
+ success: false,
2817
+ error: execError.stderr || execError.stdout || execError.message || String(error)
2818
+ };
2819
+ }
2820
+ }
2821
+
2842
2822
  //#endregion
2843
2823
  //#region src/domain/schema/dialect.ts
2844
2824
  const TOP_LEVEL_KEYS = new Set([
@@ -2935,6 +2915,14 @@ function collectDialectWarnings(schema, filePath) {
2935
2915
  return warnings;
2936
2916
  }
2937
2917
 
2918
+ //#endregion
2919
+ //#region src/infrastructure/database/schema-generator.ts
2920
+ function generateDatabaseSchema(result, dialect = "sqlite") {
2921
+ switch (dialect) {
2922
+ case "sqlite": return generateDrizzleSchema(result);
2923
+ }
2924
+ }
2925
+
2938
2926
  //#endregion
2939
2927
  //#region src/application/schema/parse-all-schemas.ts
2940
2928
  function formatZodError(error, filePath) {
@@ -2983,7 +2971,7 @@ function parseAllSchemas(entries) {
2983
2971
  reverseRelations,
2984
2972
  warnings,
2985
2973
  mapping,
2986
- drizzleCode: generateDrizzleSchema({
2974
+ drizzleCode: generateDatabaseSchema({
2987
2975
  tables,
2988
2976
  relations,
2989
2977
  reverseRelations,
@@ -2995,7 +2983,6 @@ function parseAllSchemas(entries) {
2995
2983
 
2996
2984
  //#endregion
2997
2985
  //#region src/application/schema/schema-sync.ts
2998
- const execFileAsync = promisify(execFile);
2999
2986
  const NO_RISK_REPORT = {
3000
2987
  level: "none",
3001
2988
  items: [],
@@ -3045,7 +3032,8 @@ async function generateSchemaFromFiles(schemaFiles, config, options = {}) {
3045
3032
  await fs.writeFile(config.drizzleSchemaPath, drizzleCode);
3046
3033
  await fs.writeFile(schemaMapPath(config), `${JSON.stringify({
3047
3034
  generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
3048
- dialect: "aiex-drizzle-sqlite",
3035
+ dialect: "aiex-drizzle",
3036
+ databaseDialect: config.databaseDialect,
3049
3037
  entries: mapping,
3050
3038
  baselineEntries: riskReport.hasHighRisk && !options.force ? previousMapping : void 0,
3051
3039
  warnings,
@@ -3061,46 +3049,8 @@ async function generateSchemaFromFiles(schemaFiles, config, options = {}) {
3061
3049
  riskReport
3062
3050
  };
3063
3051
  }
3064
- function parseMigrationOutput(stdout, stderr) {
3065
- try {
3066
- const jsonLine = stdout.trim().split("\n").find((l) => l.startsWith("{") && l.endsWith("}"));
3067
- if (!jsonLine) return {
3068
- success: false,
3069
- error: t("errors.schema.migrationHelperInvalidOutput")
3070
- };
3071
- const result = JSON.parse(jsonLine);
3072
- if (!result.success) return {
3073
- success: false,
3074
- error: result.error || t("errors.schema.migrationFailed")
3075
- };
3076
- return result;
3077
- } catch {
3078
- return {
3079
- success: false,
3080
- error: stderr || stdout || t("errors.schema.migrationHelperFailed")
3081
- };
3082
- }
3083
- }
3084
3052
  async function runSchemaMigration(config, migrationName) {
3085
- const helperPath = resolveHelperPath();
3086
- const helperArgs = [
3087
- resolveTsxPath(),
3088
- helperPath,
3089
- config.drizzleSchemaPath,
3090
- config.migrationsPath,
3091
- config.databasePath
3092
- ];
3093
- if (migrationName) helperArgs.push(migrationName);
3094
- try {
3095
- const { stdout, stderr } = await execFileAsync(process.execPath, helperArgs, { cwd: process.cwd() });
3096
- return parseMigrationOutput(stdout, stderr);
3097
- } catch (error) {
3098
- const execError = error;
3099
- return {
3100
- success: false,
3101
- error: execError.stderr || execError.stdout || execError.message || String(error)
3102
- };
3103
- }
3053
+ return runDatabaseMigration(config, migrationName);
3104
3054
  }
3105
3055
  async function runSchemaSync(config, options = {}) {
3106
3056
  const schemaFiles = await listSchemaFiles(config.schemaPath);
@@ -16255,7 +16205,6 @@ function aiRoutes(config) {
16255
16205
  //#region src/application/data/data-service.ts
16256
16206
  const FILE_REGEX = /\.json$/;
16257
16207
  const EXTRACTION_TIMESTAMP_RE = /-\d{4}-\d{2}-\d{2}T/;
16258
- const INTERNAL_ROWID_COLUMN = "__aiex_rowid";
16259
16208
  const TIMESTAMP_CLEANUP = /(\d{2})-(\d{2})-(\d{2})/;
16260
16209
  const TIMESTAMP_TZ = /(\d{3})Z/;
16261
16210
  function schemaNameFromExtractionFile(name$1) {
@@ -16289,9 +16238,6 @@ async function getRowExtractionActions(aiexDir, tableName) {
16289
16238
  }
16290
16239
  return actions;
16291
16240
  }
16292
- function createReadonlyQueryDb(databasePath) {
16293
- return new Kysely({ dialect: new SqliteDialect({ database: new Database(databasePath, { readonly: true }) }) });
16294
- }
16295
16241
  async function listExtractions(config) {
16296
16242
  const aiexDir = path.dirname(config.schemaPath);
16297
16243
  const extractedDir = path.join(aiexDir, "extracted");
@@ -16337,19 +16283,10 @@ async function listTables(config) {
16337
16283
  } catch {
16338
16284
  schemaFiles = [];
16339
16285
  }
16340
- let db = null;
16341
16286
  let dbTables = [];
16342
16287
  try {
16343
- db = createReadonlyQueryDb(config.databasePath);
16344
- dbTables = (await sql`
16345
- select name
16346
- from sqlite_master
16347
- where type = 'table' and name not like 'sqlite_%' and name not like '_%'
16348
- order by name
16349
- `.execute(db)).rows.map((row) => row.name);
16350
- } catch {} finally {
16351
- await db?.destroy();
16352
- }
16288
+ dbTables = await createProjectDatabase(config).listTableNames();
16289
+ } catch {}
16353
16290
  const tables = [];
16354
16291
  for (const file of schemaFiles) try {
16355
16292
  const schema = await readFile(path.join(schemaDir, file));
@@ -16368,57 +16305,26 @@ async function listTables(config) {
16368
16305
  async function getTableData(config, tableName, query) {
16369
16306
  const { page, pageSize, search, sortField, sortOrder, all } = query;
16370
16307
  const aiexDir = path.dirname(config.schemaPath);
16371
- let db;
16308
+ const database = createProjectDatabase(config);
16309
+ if (!await database.exists()) throw new Error(t("server.dbNotFound"));
16372
16310
  try {
16373
- db = createReadonlyQueryDb(config.databasePath);
16374
- } catch {
16375
- throw new Error(t("server.dbNotFound"));
16376
- }
16377
- try {
16378
- if ((await sql`
16379
- select name
16380
- from sqlite_master
16381
- where type = 'table' and name = ${tableName}
16382
- `.execute(db)).rows.length === 0) throw new Error(t("server.tableNotFound", { name: tableName }));
16383
- const columns = (await sql`
16384
- pragma table_info(${sql.table(tableName)})
16385
- `.execute(db)).rows.map((col) => ({
16386
- name: col.name,
16387
- type: col.type,
16388
- notNull: !!col.notnull,
16389
- pk: !!col.pk
16390
- }));
16391
- const searchConditions = columns.map((col) => sql`${sql.ref(col.name)} like ${`%${search}%`}`);
16392
- const searchCondition = search ? sql`where ${sql.join(searchConditions, sql` or `)}` : sql``;
16393
- const sortColumn = columns.find((col) => col.name === sortField);
16394
- const orderBy = sortColumn ? sql`order by ${sql.ref(sortColumn.name)} ${sql.raw(sortOrder === "desc" ? "desc" : "asc")}` : sql``;
16395
- const total = (await sql`
16396
- select count(*) as count
16397
- from ${sql.table(tableName)}
16398
- ${searchCondition}
16399
- `.execute(db)).rows[0]?.count ?? 0;
16400
- const offset = (page - 1) * pageSize;
16401
- const totalPages = all ? 1 : Math.max(1, Math.ceil(total / pageSize));
16402
- const result = all ? await sql`
16403
- select rowid as ${sql.raw(INTERNAL_ROWID_COLUMN)}, *
16404
- from ${sql.table(tableName)}
16405
- ${searchCondition}
16406
- ${orderBy}
16407
- `.execute(db) : await sql`
16408
- select rowid as ${sql.raw(INTERNAL_ROWID_COLUMN)}, *
16409
- from ${sql.table(tableName)}
16410
- ${searchCondition}
16411
- ${orderBy}
16412
- limit ${pageSize}
16413
- offset ${offset}
16414
- `.execute(db);
16311
+ const tableRows = await database.readTableRows({
16312
+ tableName,
16313
+ page,
16314
+ pageSize,
16315
+ search,
16316
+ sortField,
16317
+ sortOrder,
16318
+ all
16319
+ }).catch((error) => {
16320
+ if (error instanceof Error && error.message === `Table not found: ${tableName}`) throw new Error(t("server.tableNotFound", { name: tableName }));
16321
+ throw error;
16322
+ });
16415
16323
  const actionsByRowId = await getRowExtractionActions(aiexDir, tableName);
16416
- const rowActions = Object.fromEntries(result.rows.map((row, index) => {
16417
- const rowId = row[INTERNAL_ROWID_COLUMN];
16418
- const action = rowId === null || rowId === void 0 ? void 0 : actionsByRowId.get(String(rowId));
16324
+ const rowActions = Object.fromEntries(tableRows.rowIds.map((rowId, index) => {
16325
+ const action = rowId === void 0 ? void 0 : actionsByRowId.get(rowId);
16419
16326
  return action ? [String(index), action] : null;
16420
16327
  }).filter((entry) => !!entry));
16421
- const rows = result.rows.map(({ [INTERNAL_ROWID_COLUMN]: _rowid, ...row }) => row);
16422
16328
  const schemaDir = config.schemaPath;
16423
16329
  let schema = null;
16424
16330
  try {
@@ -16432,17 +16338,18 @@ async function getTableData(config, tableName, query) {
16432
16338
  }
16433
16339
  } catch {}
16434
16340
  return {
16435
- columns,
16436
- rows,
16341
+ columns: tableRows.columns,
16342
+ rows: tableRows.rows,
16437
16343
  rowActions,
16438
- total,
16439
- page: all ? 1 : page,
16440
- pageSize: all ? total : pageSize,
16441
- totalPages,
16344
+ total: tableRows.total,
16345
+ page: tableRows.page,
16346
+ pageSize: all ? tableRows.total : tableRows.pageSize,
16347
+ totalPages: tableRows.totalPages,
16442
16348
  schema
16443
16349
  };
16444
- } finally {
16445
- await db.destroy();
16350
+ } catch (error) {
16351
+ if (error instanceof Error) throw error;
16352
+ throw new Error(String(error));
16446
16353
  }
16447
16354
  }
16448
16355
  async function retryNotionSync(config, fileName) {