aiex-cli 0.1.1-beta.6 → 0.1.1-beta.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -37,10 +37,10 @@ aiex watch -s invoice -d ./watch_folder # watch folder daemon for automatic extr
37
37
  - **AI Extraction** — Extract structured data from files (text, images, PDFs) using any OpenAI-compatible provider (OpenAI, Anthropic, Ollama, DeepSeek, local models, etc.)
38
38
  - **Interactive Mode** — Run `aiex extract` without arguments for a guided extraction workflow
39
39
  - **Batch Mode** — `aiex extract -d <dir>` processes entire directories with optional glob filtering
40
- - **Incremental Extraction** — File hash deduplication skips already-processed files; use `--force` to override
40
+ - **Incremental Extraction** — File hash deduplication skips already-processed file inputs; use `--force` to override
41
41
  - **Web Data Export** — Export SQLite table data to CSV, Excel (.xlsx), or JSON from the Web UI
42
42
  - **Notion Sync** — Optionally sync CLI extraction results to configured Notion data sources
43
- - **Extraction Audit Trail** — Every extraction is recorded with status, input source, output path, token usage, database inserts, Notion pages, and errors
43
+ - **Extraction Audit Trail** — Every extraction is recorded with status, input source, parser diagnostics, evidence quality, output path, token usage, database inserts, Notion pages, and errors
44
44
  - **Built-in Model Registry** — Knows capabilities of 2000+ models (vision, structured output) so you don't have to guess
45
45
 
46
46
  <br>
@@ -90,7 +90,7 @@ Saves the extracted result to `.aiex/extracted/<schema-name>-<timestamp>.json` w
90
90
 
91
91
  By default, aiex automatically selects a model based on your input type (vision-capable for images, structured output for text). Use `--model` / `-m` to override and specify any model from your AI configuration.
92
92
 
93
- Every extraction is also recorded under `.aiex/extracted/_audit/`. Audit records include the run status (`running`, `succeeded`, `failed`, or `stale`), schema name, input source, output file, token usage, inserted table rows, synced Notion pages, retry lineage, and error message. Use the Web UI to inspect, retry, or delete extraction records.
93
+ Every extraction is also recorded under `.aiex/extracted/_audit/`. Audit records include the run status (`running`, `succeeded`, `failed`, or `stale`), schema name, input source, parser status, parser diagnostics, input quality, field evidence quality, output file, token usage, inserted table rows, synced Notion pages, retry lineage, failure stage, and error message. Use the Web UI to inspect, retry, or delete extraction records.
94
94
 
95
95
  ### 4. Watch Folder Daemon (Auto-Extraction)
96
96
 
package/dist/cli.mjs CHANGED
@@ -1,11 +1,10 @@
1
- import { A as seedConfig, C as CORRECTION_SYSTEM_PROMPT, D as PLACEHOLDER_TEXT, E as PLACEHOLDER_SCHEMA, M as name, N as package_default, O as buildCorrectionUserPrompt, P as version, S as DEFAULT_MINERU_CONFIG, T as EVIDENCE_INSTRUCTIONS, _ as doctorDiagnosticsSeverityRows, a as recognizeImageText, b as DEFAULT_LITEPARSE_CONFIG, c as t, d as writeAIConfig, f as AIConfigSchema, h as toSnakeCase, j as description, k as createConfig, l as getDefaultAIConfig, m as parseJsonSchema, n as collectDoctorDiagnostics, o as shouldUseImageOcrFallback, p as JsonSchemaDefinitionSchema, r as createMigrationConfig, s as initI18n, t as generateDrizzleSchema, u as readAIConfig, v as doctorDiagnosticsTableRows, w as DEFAULT_PROMPT_CONFIG, x as DEFAULT_MINERU_API_CONFIG, y as formatDoctorDiagnosticsJson } from "./generate-drizzle-schema-DAeXmyrZ.mjs";
1
+ import { A as seedConfig, C as CORRECTION_SYSTEM_PROMPT, D as PLACEHOLDER_TEXT, E as PLACEHOLDER_SCHEMA, M as name, N as package_default, O as buildCorrectionUserPrompt, P as version, S as DEFAULT_MINERU_CONFIG, T as EVIDENCE_INSTRUCTIONS, _ as doctorDiagnosticsSeverityRows, a as recognizeImageText, b as DEFAULT_LITEPARSE_CONFIG, c as t, d as readAIConfig, f as writeAIConfig, h as parseJsonSchema, j as description, k as createConfig, l as createProjectDatabase, m as JsonSchemaDefinitionSchema, n as collectDoctorDiagnostics, o as shouldUseImageOcrFallback, p as AIConfigSchema, r as createMigrationConfig, s as initI18n, t as generateDrizzleSchema, u as getDefaultAIConfig, v as doctorDiagnosticsTableRows, w as DEFAULT_PROMPT_CONFIG, x as DEFAULT_MINERU_API_CONFIG, y as formatDoctorDiagnosticsJson } from "./generate-drizzle-schema-B6ocPcWd.mjs";
2
2
  import { createRequire } from "node:module";
3
3
  import fs from "node:fs/promises";
4
4
  import os from "node:os";
5
5
  import path from "node:path";
6
6
  import process from "node:process";
7
7
  import { fileURLToPath } from "node:url";
8
- import Database from "better-sqlite3";
9
8
  import { execa } from "execa";
10
9
  import { readFile, writeFile } from "jsonfile";
11
10
  import { ZodError, z } from "zod";
@@ -39,7 +38,6 @@ import { serveStatic } from "@hono/node-server/serve-static";
39
38
  import { Hono } from "hono";
40
39
  import { cors } from "hono/cors";
41
40
  import { zValidator } from "@hono/zod-validator";
42
- import { Kysely, SqliteDialect, sql } from "kysely";
43
41
 
44
42
  //#region src/infrastructure/completion/completion-scripts.ts
45
43
  function bashScript(name$1) {
@@ -164,6 +162,11 @@ function classifyInputError(error, inputProcessing) {
164
162
  if (message.includes("pdf") || message.includes("converter")) return "file_conversion";
165
163
  return "input_detection";
166
164
  }
165
+ function qualityGateError(quality) {
166
+ const invalidEvidenceFields = quality?.ai?.evidence?.invalidFields ?? [];
167
+ if (invalidEvidenceFields.length > 0) return `Evidence mismatch for field(s): ${invalidEvidenceFields.join(", ")}`;
168
+ return null;
169
+ }
167
170
 
168
171
  //#endregion
169
172
  //#region src/infrastructure/input/detect-file-kind.ts
@@ -619,26 +622,43 @@ const FILE_PART_EXTENSIONS = new Set([
619
622
  "webp"
620
623
  ]);
621
624
  const PDF_EXT_RE = /\.pdf$/i;
625
+ function textStatus(text$1) {
626
+ return text$1.trim().length > 0 ? "parsed" : "partially_parsed";
627
+ }
628
+ function withProcessingResult(inputProcessing, status, diagnostics = {}, warnings = []) {
629
+ return {
630
+ ...inputProcessing,
631
+ status,
632
+ diagnostics,
633
+ warnings: warnings.length ? warnings : void 0
634
+ };
635
+ }
622
636
  async function describeExtractFileInput(filePath, aiConfig, modelOverride) {
623
637
  const detected = await detectInputFileKind(filePath);
624
- if (detected.kind === "image") return {
625
- kind: "image",
626
- mime: detected.mime,
627
- handler: shouldUseImageOcrFallback(aiConfig, modelOverride) ? "image_local_ocr" : "image_vision"
628
- };
638
+ if (detected.kind === "image") {
639
+ const useLocalOcr = shouldUseImageOcrFallback(aiConfig, modelOverride);
640
+ return {
641
+ kind: "image",
642
+ mime: detected.mime,
643
+ handler: useLocalOcr ? "image_local_ocr" : "image_vision",
644
+ parser: useLocalOcr ? "system_ocr" : "vision_model"
645
+ };
646
+ }
629
647
  if (detected.kind === "pdf") {
630
648
  const converter = createPdfConverter(aiConfig?.pdf);
631
649
  return {
632
650
  kind: "pdf",
633
651
  mime: detected.mime,
634
652
  handler: "pdf_converter",
635
- converter: converter.name
653
+ converter: converter.name,
654
+ parser: converter.name
636
655
  };
637
656
  }
638
657
  if (detected.kind === "text") return {
639
658
  kind: "text",
640
659
  mime: detected.mime,
641
- handler: "text"
660
+ handler: "text",
661
+ parser: "utf8_text"
642
662
  };
643
663
  throw new Error(unsupportedFileTypeMessage(detected.mime ?? "application/octet-stream"));
644
664
  }
@@ -656,7 +676,11 @@ async function readExtractFileInput(filePath, aiConfig, modelOverride) {
656
676
  consola.info(t("command.extract.file.ocrText", { confidence: (result.confidence * 100).toFixed(1) }));
657
677
  return {
658
678
  text: result.text,
659
- inputProcessing,
679
+ inputProcessing: withProcessingResult(inputProcessing, textStatus(result.text), {
680
+ confidence: result.confidence,
681
+ textLength: result.text.length,
682
+ platform: process.platform
683
+ }),
660
684
  quality: { input: {
661
685
  kind: "image",
662
686
  textLength: result.text.length,
@@ -672,7 +696,7 @@ async function readExtractFileInput(filePath, aiConfig, modelOverride) {
672
696
  return {
673
697
  text: "",
674
698
  filePath,
675
- inputProcessing,
699
+ inputProcessing: withProcessingResult(inputProcessing, "parsed", { filePart: true }),
676
700
  quality: { input: { kind: "image" } }
677
701
  };
678
702
  }
@@ -695,17 +719,23 @@ async function readExtractFileInput(filePath, aiConfig, modelOverride) {
695
719
  consola.info(t("command.extract.file.markdownSaved", { path: fallbackMd }));
696
720
  }
697
721
  const textLength = result.text.length;
722
+ const emptyText = result.text.trim().length === 0;
698
723
  return {
699
724
  text: result.text,
700
- inputProcessing,
725
+ inputProcessing: withProcessingResult(inputProcessing, emptyText ? "partially_parsed" : "parsed", {
726
+ pageCount: result.pageCount,
727
+ textLength,
728
+ emptyText,
729
+ fallbackUsed: result.metadata?.fallback === "true"
730
+ }, result.warnings),
701
731
  quality: { input: {
702
732
  kind: "pdf",
703
733
  textLength,
704
- emptyText: result.text.trim().length === 0,
734
+ emptyText,
705
735
  pdf: {
706
736
  pageCount: result.pageCount,
707
737
  textLength,
708
- emptyText: result.text.trim().length === 0,
738
+ emptyText,
709
739
  fallbackUsed: result.metadata?.fallback === "true",
710
740
  converter: result.metadata?.converter ?? converter.name
711
741
  }
@@ -716,7 +746,10 @@ async function readExtractFileInput(filePath, aiConfig, modelOverride) {
716
746
  const text$1 = await fs.readFile(filePath, "utf-8");
717
747
  return {
718
748
  text: text$1,
719
- inputProcessing,
749
+ inputProcessing: withProcessingResult(inputProcessing, textStatus(text$1), {
750
+ textLength: text$1.length,
751
+ emptyText: text$1.trim().length === 0
752
+ }),
720
753
  quality: { input: {
721
754
  kind: "text",
722
755
  textLength: text$1.length,
@@ -1223,6 +1256,46 @@ function verifyFieldEvidence(input) {
1223
1256
  }
1224
1257
  return Object.keys(verified).length > 0 ? verified : void 0;
1225
1258
  }
1259
+ function isEvidenceEligibleProperty(property) {
1260
+ return property.type === "string" || property.type === "number" || property.type === "integer";
1261
+ }
1262
+ function buildFieldEvidenceQuality(input) {
1263
+ if (!isRecord$3(input.data)) return void 0;
1264
+ const fieldStatus = {};
1265
+ const supportedFields = [];
1266
+ const unsupportedFields = [];
1267
+ const missingFields = [];
1268
+ const invalidFields = [];
1269
+ for (const [field, property] of Object.entries(input.schema.properties)) {
1270
+ if (!isEvidenceEligibleProperty(property)) continue;
1271
+ const value = input.data[field];
1272
+ let status;
1273
+ if (value === null || value === void 0) {
1274
+ status = "missing";
1275
+ missingFields.push(field);
1276
+ } else if (input.verifiedEvidence?.[field]) {
1277
+ status = "supported";
1278
+ supportedFields.push(field);
1279
+ } else if (input.rawEvidence?.[field]) {
1280
+ status = "invalid";
1281
+ invalidFields.push(field);
1282
+ } else {
1283
+ status = "unsupported";
1284
+ unsupportedFields.push(field);
1285
+ }
1286
+ fieldStatus[field] = status;
1287
+ }
1288
+ const total = Object.keys(fieldStatus).length;
1289
+ if (total === 0) return void 0;
1290
+ return {
1291
+ fieldStatus,
1292
+ supportedFields,
1293
+ unsupportedFields,
1294
+ missingFields,
1295
+ invalidFields,
1296
+ supportedRate: supportedFields.length / total
1297
+ };
1298
+ }
1226
1299
 
1227
1300
  //#endregion
1228
1301
  //#region src/domain/ai-extraction/json-utils.ts
@@ -1827,6 +1900,12 @@ async function extractStructuredData(input) {
1827
1900
  data: businessData,
1828
1901
  rawEvidence: stripped.rawEvidence
1829
1902
  }) : void 0;
1903
+ const evidenceQuality = canLocateEvidence ? buildFieldEvidenceQuality({
1904
+ schema,
1905
+ data: businessData,
1906
+ rawEvidence: stripped.rawEvidence,
1907
+ verifiedEvidence: evidence
1908
+ }) : void 0;
1830
1909
  return {
1831
1910
  success: true,
1832
1911
  outputPath: await writeExtractionOutput({
@@ -1848,7 +1927,8 @@ async function extractStructuredData(input) {
1848
1927
  selfCorrectionCount: attempt - 1,
1849
1928
  apiRetryCount,
1850
1929
  missingFields: missing.fields,
1851
- missingFieldRate: missing.rate
1930
+ missingFieldRate: missing.rate,
1931
+ evidence: evidenceQuality
1852
1932
  } }
1853
1933
  };
1854
1934
  } else validationError = validation.error;
@@ -1932,161 +2012,23 @@ async function listSchemas(aiexDir) {
1932
2012
  }
1933
2013
  }
1934
2014
 
1935
- //#endregion
1936
- //#region src/infrastructure/extraction/insert-extracted-data.ts
1937
- function convertValue(value, column) {
1938
- if (value === null || value === void 0) return null;
1939
- const mode = column.columnType.class !== "real" ? column.columnType.mode : void 0;
1940
- if (mode === "json") return typeof value === "string" ? value : JSON.stringify(value);
1941
- if (mode === "boolean") return value ? 1 : 0;
1942
- if (mode === "timestamp" || mode === "timestamp_ms") {
1943
- if (typeof value === "string") {
1944
- const ms = Date.parse(value);
1945
- if (Number.isNaN(ms)) return value;
1946
- return mode === "timestamp_ms" ? ms : Math.floor(ms / 1e3);
1947
- }
1948
- return value;
1949
- }
1950
- return value;
1951
- }
1952
- function buildInsertSql(table, data) {
1953
- const columns = [];
1954
- const values = [];
1955
- for (const col of table.columns) {
1956
- if (col.isAutoIncrement) continue;
1957
- const value = data[col.name];
1958
- if (value === void 0) {
1959
- if (col.default !== void 0) {
1960
- columns.push(col.name);
1961
- values.push(convertValue(col.default, col));
1962
- }
1963
- continue;
1964
- }
1965
- columns.push(col.name);
1966
- values.push(convertValue(value, col));
1967
- }
1968
- const placeholders = values.map(() => "?").join(", ");
1969
- return {
1970
- sql: `INSERT INTO ${table.name} (${columns.join(", ")}) VALUES (${placeholders})`,
1971
- values
1972
- };
1973
- }
1974
- function insertTableRow({ db, table, data, parentRowId, foreignKeyColumn }) {
1975
- const rowData = { ...data };
1976
- if (parentRowId !== void 0 && foreignKeyColumn) rowData[foreignKeyColumn] = parentRowId;
1977
- const { sql: sql$1, values } = buildInsertSql(table, rowData);
1978
- const info = db.prepare(sql$1).run(...values);
1979
- return Number(info.lastInsertRowid);
1980
- }
1981
- function parseDataByColumns(data, schema, table) {
1982
- const result = {};
1983
- if ("properties" in schema) {
1984
- const s = schema;
1985
- for (const [propName, prop] of Object.entries(s.properties)) {
1986
- if (prop.nested?.enabled) continue;
1987
- if (prop.type === "array" && prop.items?.nested?.enabled) continue;
1988
- const colName = toSnakeCase(propName);
1989
- if (table.columns.some((c) => c.name === colName && c.isAutoIncrement)) continue;
1990
- if (propName in data) result[colName] = data[propName];
1991
- }
1992
- }
1993
- if (schema.table?.timestamps) {
1994
- if (!("created_at" in result)) result.created_at = Math.floor(Date.now() / 1e3);
1995
- if (!("updated_at" in result)) result.updated_at = Math.floor(Date.now() / 1e3);
1996
- }
1997
- return result;
1998
- }
1999
- function insertExtractedData(db, schema, data) {
2000
- const inserted = [];
2001
- try {
2002
- const parseResult = parseJsonSchema(schema);
2003
- const mainTable = parseResult.tables[0];
2004
- db.transaction(() => {
2005
- const mainRowId = insertTableRow({
2006
- db,
2007
- table: mainTable,
2008
- data: parseDataByColumns(data, schema, mainTable)
2009
- });
2010
- inserted.push({
2011
- table: mainTable.name,
2012
- rowId: mainRowId
2013
- });
2014
- for (const revRel of parseResult.reverseRelations) {
2015
- const rel = parseResult.relations.find((r) => r.fromTable === revRel.toTable && r.toTable === revRel.fromTable);
2016
- if (!rel) continue;
2017
- const propEntry = Object.entries(schema.properties).find(([key]) => toSnakeCase(key) === revRel.name && key in data);
2018
- if (!propEntry) continue;
2019
- const [propName] = propEntry;
2020
- const nestedValue = data[propName];
2021
- if (nestedValue === null || nestedValue === void 0) continue;
2022
- const nestedTable = parseResult.tables.find((t$1) => t$1.name === revRel.toTable);
2023
- if (!nestedTable) continue;
2024
- if (revRel.type === "has-one") {
2025
- const rowId = insertTableRow({
2026
- db,
2027
- table: nestedTable,
2028
- data: parseDataByColumns(nestedValue, schema.properties[propName], nestedTable),
2029
- parentRowId: mainRowId,
2030
- foreignKeyColumn: rel.fromColumn
2031
- });
2032
- inserted.push({
2033
- table: revRel.toTable,
2034
- rowId
2035
- });
2036
- } else if (revRel.type === "has-many") {
2037
- const items = nestedValue;
2038
- for (const item of items) {
2039
- const rowId = insertTableRow({
2040
- db,
2041
- table: nestedTable,
2042
- data: parseDataByColumns(item, schema.properties[propName].items, nestedTable),
2043
- parentRowId: mainRowId,
2044
- foreignKeyColumn: rel.fromColumn
2045
- });
2046
- inserted.push({
2047
- table: revRel.toTable,
2048
- rowId
2049
- });
2050
- }
2051
- }
2052
- }
2053
- return mainRowId;
2054
- })();
2055
- return {
2056
- success: true,
2057
- tablesInserted: inserted
2058
- };
2059
- } catch (e) {
2060
- return {
2061
- success: false,
2062
- tablesInserted: inserted,
2063
- error: e instanceof Error ? e.message : String(e)
2064
- };
2065
- }
2066
- }
2067
-
2068
2015
  //#endregion
2069
2016
  //#region src/application/extraction/run-extraction.ts
2070
- async function ensureDatabaseReady(dbPath, schema) {
2017
+ async function ensureDatabaseReady(config, schema) {
2018
+ const database = createProjectDatabase(config);
2019
+ if (!await database.exists()) return t("errors.db.notFound", {
2020
+ path: pc.cyan(".aiex/database.db"),
2021
+ cmd: pc.cyan("aiex schema")
2022
+ });
2071
2023
  try {
2072
- await fs.access(dbPath);
2073
- } catch {
2074
- return t("errors.db.notFound", {
2075
- path: pc.cyan(".aiex/database.db"),
2024
+ const result = parseJsonSchema(schema);
2025
+ const tableCheck = await database.verifyTables(result.tables.map((table) => table.name));
2026
+ if (tableCheck.error) return t("errors.db.cannotVerify", { error: tableCheck.error });
2027
+ const missing = tableCheck.missing[0];
2028
+ if (missing) return t("errors.db.tableNotFound", {
2029
+ name: missing,
2076
2030
  cmd: pc.cyan("aiex schema")
2077
2031
  });
2078
- }
2079
- try {
2080
- const result = parseJsonSchema(schema);
2081
- const db = new Database(dbPath);
2082
- try {
2083
- for (const table of result.tables) if (!db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name=?`).get(table.name)) return t("errors.db.tableNotFound", {
2084
- name: table.name,
2085
- cmd: pc.cyan("aiex schema")
2086
- });
2087
- } finally {
2088
- db.close();
2089
- }
2090
2032
  } catch (e) {
2091
2033
  return t("errors.db.cannotVerify", { error: e instanceof Error ? e.message : String(e) });
2092
2034
  }
@@ -2139,55 +2081,75 @@ async function extractSingle(aiexDir, config, aiConfig, schemaName, text$1, file
2139
2081
  total: result.tokensUsed.total
2140
2082
  })));
2141
2083
  if (result.data && options?.insert !== false) {
2084
+ const qualityError = qualityGateError(result.quality);
2085
+ if (qualityError) {
2086
+ consola.error(qualityError);
2087
+ return {
2088
+ success: false,
2089
+ error: qualityError,
2090
+ outputPath: result.outputPath,
2091
+ data: result.data,
2092
+ tokensUsed: result.tokensUsed,
2093
+ quality: result.quality,
2094
+ failureStage: "ai_extraction",
2095
+ evidence: result.evidence
2096
+ };
2097
+ }
2142
2098
  const s2 = spinner();
2143
2099
  if (!options?.quiet) s2.start(t("command.extract.file.insertingDb"));
2144
- const dbError = await ensureDatabaseReady(config.databasePath, schemaLoad.schema);
2100
+ const dbError = await ensureDatabaseReady(config, schemaLoad.schema);
2145
2101
  if (dbError) {
2146
2102
  if (!options?.quiet) s2.stop(t("command.extract.file.dbNotReady"));
2147
2103
  consola.error(dbError);
2148
2104
  return {
2149
2105
  success: false,
2150
2106
  error: dbError,
2107
+ outputPath: result.outputPath,
2108
+ data: result.data,
2109
+ tokensUsed: result.tokensUsed,
2151
2110
  quality: result.quality,
2152
- failureStage: "db_insert"
2111
+ failureStage: "db_insert",
2112
+ evidence: result.evidence
2153
2113
  };
2154
2114
  }
2155
2115
  try {
2156
- const db = new Database(config.databasePath);
2157
- try {
2158
- const insertResult = insertExtractedData(db, schemaLoad.schema, result.data);
2159
- if (insertResult.success) {
2160
- if (!options?.quiet) s2.stop(t("command.extract.file.insertedTables", { count: insertResult.tablesInserted.length }));
2161
- return {
2162
- success: true,
2163
- outputPath: result.outputPath,
2164
- data: result.data,
2165
- tablesInserted: insertResult.tablesInserted,
2166
- tokensUsed: result.tokensUsed,
2167
- quality: result.quality,
2168
- evidence: result.evidence
2169
- };
2170
- } else {
2171
- if (!options?.quiet) s2.stop(t("command.extract.file.dbInsertFail"));
2172
- consola.error(insertResult.error || t("common.unknownError"));
2173
- return {
2174
- success: false,
2175
- error: insertResult.error,
2176
- quality: result.quality,
2177
- failureStage: "db_insert"
2178
- };
2179
- }
2180
- } finally {
2181
- db.close();
2116
+ const insertResult = createProjectDatabase(config).insertExtracted(schemaLoad.schema, result.data);
2117
+ if (insertResult.success) {
2118
+ if (!options?.quiet) s2.stop(t("command.extract.file.insertedTables", { count: insertResult.tablesInserted.length }));
2119
+ return {
2120
+ success: true,
2121
+ outputPath: result.outputPath,
2122
+ data: result.data,
2123
+ tablesInserted: insertResult.tablesInserted,
2124
+ tokensUsed: result.tokensUsed,
2125
+ quality: result.quality,
2126
+ evidence: result.evidence
2127
+ };
2182
2128
  }
2129
+ if (!options?.quiet) s2.stop(t("command.extract.file.dbInsertFail"));
2130
+ consola.error(insertResult.error || t("common.unknownError"));
2131
+ return {
2132
+ success: false,
2133
+ error: insertResult.error,
2134
+ outputPath: result.outputPath,
2135
+ data: result.data,
2136
+ tokensUsed: result.tokensUsed,
2137
+ quality: result.quality,
2138
+ failureStage: "db_insert",
2139
+ evidence: result.evidence
2140
+ };
2183
2141
  } catch (e) {
2184
2142
  if (!options?.quiet) s2.stop(t("command.extract.file.dbInsertFail"));
2185
2143
  consola.error(e instanceof Error ? e.message : String(e));
2186
2144
  return {
2187
2145
  success: false,
2188
2146
  error: String(e),
2147
+ outputPath: result.outputPath,
2148
+ data: result.data,
2149
+ tokensUsed: result.tokensUsed,
2189
2150
  quality: result.quality,
2190
- failureStage: "db_insert"
2151
+ failureStage: "db_insert",
2152
+ evidence: result.evidence
2191
2153
  };
2192
2154
  }
2193
2155
  }
@@ -2206,19 +2168,7 @@ async function extractSingle(aiexDir, config, aiConfig, schemaName, text$1, file
2206
2168
  async function runAuditedExtraction(options) {
2207
2169
  const { aiexDir, config, aiConfig, schemaName, source, modelOverride, retryOf, insert, force, quiet = false } = options;
2208
2170
  let fileHash;
2209
- let isPlainTextFile = false;
2210
2171
  if (source.type === "file") {
2211
- const ext = path.extname(source.filePath).toLowerCase().replace(".", "");
2212
- isPlainTextFile = [
2213
- "txt",
2214
- "md",
2215
- "csv",
2216
- "json",
2217
- "html",
2218
- "xml",
2219
- "yaml",
2220
- "yml"
2221
- ].includes(ext);
2222
2172
  try {
2223
2173
  fileHash = await getFileHash(source.filePath);
2224
2174
  } catch (e) {
@@ -2227,7 +2177,7 @@ async function runAuditedExtraction(options) {
2227
2177
  error: e instanceof Error ? e.message : String(e)
2228
2178
  }));
2229
2179
  }
2230
- if (fileHash && !isPlainTextFile && !force) {
2180
+ if (fileHash && !force) {
2231
2181
  const existing = await findSucceededAuditByHash(aiexDir, schemaName, fileHash);
2232
2182
  if (existing) {
2233
2183
  if (!quiet) consola.info(t("command.extract.file.alreadyProcessed", {
@@ -2346,17 +2296,23 @@ async function runAuditedExtraction(options) {
2346
2296
  await updateExtractionAuditRecord(aiexDir, audit.id, {
2347
2297
  status: "failed",
2348
2298
  error: r.error || "Extraction failed",
2299
+ outputPath: r.outputPath,
2300
+ outputName: r.outputPath ? path.basename(r.outputPath) : void 0,
2301
+ tokensUsed: r.tokensUsed,
2349
2302
  quality: mergeQuality(inputQuality, r.quality),
2350
2303
  failureStage: r.failureStage ?? "ai_extraction",
2351
2304
  evidence: r.evidence
2352
2305
  });
2353
2306
  if (!quiet) consola.error(t("command.extract.file.extractionFailed", { error: r.error }));
2354
- await triggerWebhook(aiConfig, audit.id, schemaName, "extraction.failed", source, void 0, r.error || "Extraction failed", void 0, quiet);
2307
+ await triggerWebhook(aiConfig, audit.id, schemaName, "extraction.failed", source, r.data, r.error || "Extraction failed", r.tokensUsed, quiet);
2355
2308
  return {
2356
2309
  success: false,
2357
2310
  error: r.error,
2311
+ outputPath: r.outputPath,
2312
+ outputName: r.outputPath ? path.basename(r.outputPath) : void 0,
2358
2313
  auditId: audit.id,
2359
2314
  fileHash,
2315
+ tokensUsed: r.tokensUsed,
2360
2316
  inputProcessing,
2361
2317
  quality: mergeQuality(inputQuality, r.quality),
2362
2318
  failureStage: r.failureStage ?? "ai_extraction",
@@ -2777,7 +2733,7 @@ function analyzeMigrationRisk(previousEntries, nextEntries) {
2777
2733
  addRisk(items, "high", "column_removed", previous.table, previous.column, `Column "${key}" will be removed.`);
2778
2734
  continue;
2779
2735
  }
2780
- if (previous.sqliteType !== next.sqliteType || previous.drizzleType !== next.drizzleType) addRisk(items, "high", "column_type_changed", previous.table, previous.column, `Column "${key}" type changes from ${previous.drizzleType} to ${next.drizzleType}.`);
2736
+ if (previous.databaseType !== next.databaseType || previous.drizzleType !== next.drizzleType) addRisk(items, "high", "column_type_changed", previous.table, previous.column, `Column "${key}" type changes from ${previous.drizzleType} to ${next.drizzleType}.`);
2781
2737
  if (previous.nullable && !next.nullable) addRisk(items, "high", "nullable_tightened", previous.table, previous.column, `Column "${key}" changes from nullable to not null.`);
2782
2738
  else if (!previous.nullable && next.nullable) addRisk(items, "medium", "nullable_relaxed", previous.table, previous.column, `Column "${key}" changes from not null to nullable.`);
2783
2739
  if (!previous.unique && next.unique) addRisk(items, "high", "unique_added", previous.table, previous.column, `Column "${key}" adds a unique constraint.`);
@@ -2818,6 +2774,51 @@ function resolveHelperPath() {
2818
2774
  }
2819
2775
  }
2820
2776
 
2777
+ //#endregion
2778
+ //#region src/infrastructure/database/migration-runner.ts
2779
+ const execFileAsync = promisify(execFile);
2780
+ function parseMigrationOutput(stdout, stderr) {
2781
+ try {
2782
+ const jsonLine = stdout.trim().split("\n").find((l) => l.startsWith("{") && l.endsWith("}"));
2783
+ if (!jsonLine) return {
2784
+ success: false,
2785
+ error: t("errors.schema.migrationHelperInvalidOutput")
2786
+ };
2787
+ const result = JSON.parse(jsonLine);
2788
+ if (!result.success) return {
2789
+ success: false,
2790
+ error: result.error || t("errors.schema.migrationFailed")
2791
+ };
2792
+ return result;
2793
+ } catch {
2794
+ return {
2795
+ success: false,
2796
+ error: stderr || stdout || t("errors.schema.migrationHelperFailed")
2797
+ };
2798
+ }
2799
+ }
2800
+ async function runDatabaseMigration(config, migrationName) {
2801
+ const helperPath = resolveHelperPath();
2802
+ const helperArgs = [
2803
+ resolveTsxPath(),
2804
+ helperPath,
2805
+ config.drizzleSchemaPath,
2806
+ config.migrationsPath,
2807
+ config.databasePath
2808
+ ];
2809
+ if (migrationName) helperArgs.push(migrationName);
2810
+ try {
2811
+ const { stdout, stderr } = await execFileAsync(process.execPath, helperArgs, { cwd: process.cwd() });
2812
+ return parseMigrationOutput(stdout, stderr);
2813
+ } catch (error) {
2814
+ const execError = error;
2815
+ return {
2816
+ success: false,
2817
+ error: execError.stderr || execError.stdout || execError.message || String(error)
2818
+ };
2819
+ }
2820
+ }
2821
+
2821
2822
  //#endregion
2822
2823
  //#region src/domain/schema/dialect.ts
2823
2824
  const TOP_LEVEL_KEYS = new Set([
@@ -2914,6 +2915,14 @@ function collectDialectWarnings(schema, filePath) {
2914
2915
  return warnings;
2915
2916
  }
2916
2917
 
2918
+ //#endregion
2919
+ //#region src/infrastructure/database/schema-generator.ts
2920
+ function generateDatabaseSchema(result, dialect = "sqlite") {
2921
+ switch (dialect) {
2922
+ case "sqlite": return generateDrizzleSchema(result);
2923
+ }
2924
+ }
2925
+
2917
2926
  //#endregion
2918
2927
  //#region src/application/schema/parse-all-schemas.ts
2919
2928
  function formatZodError(error, filePath) {
@@ -2962,7 +2971,7 @@ function parseAllSchemas(entries) {
2962
2971
  reverseRelations,
2963
2972
  warnings,
2964
2973
  mapping,
2965
- drizzleCode: generateDrizzleSchema({
2974
+ drizzleCode: generateDatabaseSchema({
2966
2975
  tables,
2967
2976
  relations,
2968
2977
  reverseRelations,
@@ -2974,7 +2983,6 @@ function parseAllSchemas(entries) {
2974
2983
 
2975
2984
  //#endregion
2976
2985
  //#region src/application/schema/schema-sync.ts
2977
- const execFileAsync = promisify(execFile);
2978
2986
  const NO_RISK_REPORT = {
2979
2987
  level: "none",
2980
2988
  items: [],
@@ -3024,7 +3032,8 @@ async function generateSchemaFromFiles(schemaFiles, config, options = {}) {
3024
3032
  await fs.writeFile(config.drizzleSchemaPath, drizzleCode);
3025
3033
  await fs.writeFile(schemaMapPath(config), `${JSON.stringify({
3026
3034
  generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
3027
- dialect: "aiex-drizzle-sqlite",
3035
+ dialect: "aiex-drizzle",
3036
+ databaseDialect: config.databaseDialect,
3028
3037
  entries: mapping,
3029
3038
  baselineEntries: riskReport.hasHighRisk && !options.force ? previousMapping : void 0,
3030
3039
  warnings,
@@ -3040,46 +3049,8 @@ async function generateSchemaFromFiles(schemaFiles, config, options = {}) {
3040
3049
  riskReport
3041
3050
  };
3042
3051
  }
3043
- function parseMigrationOutput(stdout, stderr) {
3044
- try {
3045
- const jsonLine = stdout.trim().split("\n").find((l) => l.startsWith("{") && l.endsWith("}"));
3046
- if (!jsonLine) return {
3047
- success: false,
3048
- error: t("errors.schema.migrationHelperInvalidOutput")
3049
- };
3050
- const result = JSON.parse(jsonLine);
3051
- if (!result.success) return {
3052
- success: false,
3053
- error: result.error || t("errors.schema.migrationFailed")
3054
- };
3055
- return result;
3056
- } catch {
3057
- return {
3058
- success: false,
3059
- error: stderr || stdout || t("errors.schema.migrationHelperFailed")
3060
- };
3061
- }
3062
- }
3063
3052
  async function runSchemaMigration(config, migrationName) {
3064
- const helperPath = resolveHelperPath();
3065
- const helperArgs = [
3066
- resolveTsxPath(),
3067
- helperPath,
3068
- config.drizzleSchemaPath,
3069
- config.migrationsPath,
3070
- config.databasePath
3071
- ];
3072
- if (migrationName) helperArgs.push(migrationName);
3073
- try {
3074
- const { stdout, stderr } = await execFileAsync(process.execPath, helperArgs, { cwd: process.cwd() });
3075
- return parseMigrationOutput(stdout, stderr);
3076
- } catch (error) {
3077
- const execError = error;
3078
- return {
3079
- success: false,
3080
- error: execError.stderr || execError.stdout || execError.message || String(error)
3081
- };
3082
- }
3053
+ return runDatabaseMigration(config, migrationName);
3083
3054
  }
3084
3055
  async function runSchemaSync(config, options = {}) {
3085
3056
  const schemaFiles = await listSchemaFiles(config.schemaPath);
@@ -16234,7 +16205,6 @@ function aiRoutes(config) {
16234
16205
  //#region src/application/data/data-service.ts
16235
16206
  const FILE_REGEX = /\.json$/;
16236
16207
  const EXTRACTION_TIMESTAMP_RE = /-\d{4}-\d{2}-\d{2}T/;
16237
- const INTERNAL_ROWID_COLUMN = "__aiex_rowid";
16238
16208
  const TIMESTAMP_CLEANUP = /(\d{2})-(\d{2})-(\d{2})/;
16239
16209
  const TIMESTAMP_TZ = /(\d{3})Z/;
16240
16210
  function schemaNameFromExtractionFile(name$1) {
@@ -16268,9 +16238,6 @@ async function getRowExtractionActions(aiexDir, tableName) {
16268
16238
  }
16269
16239
  return actions;
16270
16240
  }
16271
- function createReadonlyQueryDb(databasePath) {
16272
- return new Kysely({ dialect: new SqliteDialect({ database: new Database(databasePath, { readonly: true }) }) });
16273
- }
16274
16241
  async function listExtractions(config) {
16275
16242
  const aiexDir = path.dirname(config.schemaPath);
16276
16243
  const extractedDir = path.join(aiexDir, "extracted");
@@ -16316,19 +16283,10 @@ async function listTables(config) {
16316
16283
  } catch {
16317
16284
  schemaFiles = [];
16318
16285
  }
16319
- let db = null;
16320
16286
  let dbTables = [];
16321
16287
  try {
16322
- db = createReadonlyQueryDb(config.databasePath);
16323
- dbTables = (await sql`
16324
- select name
16325
- from sqlite_master
16326
- where type = 'table' and name not like 'sqlite_%' and name not like '_%'
16327
- order by name
16328
- `.execute(db)).rows.map((row) => row.name);
16329
- } catch {} finally {
16330
- await db?.destroy();
16331
- }
16288
+ dbTables = await createProjectDatabase(config).listTableNames();
16289
+ } catch {}
16332
16290
  const tables = [];
16333
16291
  for (const file of schemaFiles) try {
16334
16292
  const schema = await readFile(path.join(schemaDir, file));
@@ -16347,57 +16305,26 @@ async function listTables(config) {
16347
16305
  async function getTableData(config, tableName, query) {
16348
16306
  const { page, pageSize, search, sortField, sortOrder, all } = query;
16349
16307
  const aiexDir = path.dirname(config.schemaPath);
16350
- let db;
16308
+ const database = createProjectDatabase(config);
16309
+ if (!await database.exists()) throw new Error(t("server.dbNotFound"));
16351
16310
  try {
16352
- db = createReadonlyQueryDb(config.databasePath);
16353
- } catch {
16354
- throw new Error(t("server.dbNotFound"));
16355
- }
16356
- try {
16357
- if ((await sql`
16358
- select name
16359
- from sqlite_master
16360
- where type = 'table' and name = ${tableName}
16361
- `.execute(db)).rows.length === 0) throw new Error(t("server.tableNotFound", { name: tableName }));
16362
- const columns = (await sql`
16363
- pragma table_info(${sql.table(tableName)})
16364
- `.execute(db)).rows.map((col) => ({
16365
- name: col.name,
16366
- type: col.type,
16367
- notNull: !!col.notnull,
16368
- pk: !!col.pk
16369
- }));
16370
- const searchConditions = columns.map((col) => sql`${sql.ref(col.name)} like ${`%${search}%`}`);
16371
- const searchCondition = search ? sql`where ${sql.join(searchConditions, sql` or `)}` : sql``;
16372
- const sortColumn = columns.find((col) => col.name === sortField);
16373
- const orderBy = sortColumn ? sql`order by ${sql.ref(sortColumn.name)} ${sql.raw(sortOrder === "desc" ? "desc" : "asc")}` : sql``;
16374
- const total = (await sql`
16375
- select count(*) as count
16376
- from ${sql.table(tableName)}
16377
- ${searchCondition}
16378
- `.execute(db)).rows[0]?.count ?? 0;
16379
- const offset = (page - 1) * pageSize;
16380
- const totalPages = all ? 1 : Math.max(1, Math.ceil(total / pageSize));
16381
- const result = all ? await sql`
16382
- select rowid as ${sql.raw(INTERNAL_ROWID_COLUMN)}, *
16383
- from ${sql.table(tableName)}
16384
- ${searchCondition}
16385
- ${orderBy}
16386
- `.execute(db) : await sql`
16387
- select rowid as ${sql.raw(INTERNAL_ROWID_COLUMN)}, *
16388
- from ${sql.table(tableName)}
16389
- ${searchCondition}
16390
- ${orderBy}
16391
- limit ${pageSize}
16392
- offset ${offset}
16393
- `.execute(db);
16311
+ const tableRows = await database.readTableRows({
16312
+ tableName,
16313
+ page,
16314
+ pageSize,
16315
+ search,
16316
+ sortField,
16317
+ sortOrder,
16318
+ all
16319
+ }).catch((error) => {
16320
+ if (error instanceof Error && error.message === `Table not found: ${tableName}`) throw new Error(t("server.tableNotFound", { name: tableName }));
16321
+ throw error;
16322
+ });
16394
16323
  const actionsByRowId = await getRowExtractionActions(aiexDir, tableName);
16395
- const rowActions = Object.fromEntries(result.rows.map((row, index) => {
16396
- const rowId = row[INTERNAL_ROWID_COLUMN];
16397
- const action = rowId === null || rowId === void 0 ? void 0 : actionsByRowId.get(String(rowId));
16324
+ const rowActions = Object.fromEntries(tableRows.rowIds.map((rowId, index) => {
16325
+ const action = rowId === void 0 ? void 0 : actionsByRowId.get(rowId);
16398
16326
  return action ? [String(index), action] : null;
16399
16327
  }).filter((entry) => !!entry));
16400
- const rows = result.rows.map(({ [INTERNAL_ROWID_COLUMN]: _rowid, ...row }) => row);
16401
16328
  const schemaDir = config.schemaPath;
16402
16329
  let schema = null;
16403
16330
  try {
@@ -16411,17 +16338,18 @@ async function getTableData(config, tableName, query) {
16411
16338
  }
16412
16339
  } catch {}
16413
16340
  return {
16414
- columns,
16415
- rows,
16341
+ columns: tableRows.columns,
16342
+ rows: tableRows.rows,
16416
16343
  rowActions,
16417
- total,
16418
- page: all ? 1 : page,
16419
- pageSize: all ? total : pageSize,
16420
- totalPages,
16344
+ total: tableRows.total,
16345
+ page: tableRows.page,
16346
+ pageSize: all ? tableRows.total : tableRows.pageSize,
16347
+ totalPages: tableRows.totalPages,
16421
16348
  schema
16422
16349
  };
16423
- } finally {
16424
- await db.destroy();
16350
+ } catch (error) {
16351
+ if (error instanceof Error) throw error;
16352
+ throw new Error(String(error));
16425
16353
  }
16426
16354
  }
16427
16355
  async function retryNotionSync(config, fileName) {
@@ -3,15 +3,16 @@ import os from "node:os";
3
3
  import path from "node:path";
4
4
  import process from "node:process";
5
5
  import { fileURLToPath } from "node:url";
6
- import Database from "better-sqlite3";
7
6
  import { execa } from "execa";
8
7
  import { readFile, writeFile } from "jsonfile";
9
8
  import Conf from "conf";
10
9
  import { z } from "zod";
10
+ import Database from "better-sqlite3";
11
+ import { Kysely, SqliteDialect, sql } from "kysely";
11
12
 
12
13
  //#region package.json
13
14
  var name = "aiex-cli";
14
- var version = "0.1.1-beta.6";
15
+ var version = "0.1.1-beta.7";
15
16
  var description = "JSON Schema → SQLite with AI-powered data extraction";
16
17
  var package_default = {
17
18
  name,
@@ -452,15 +453,15 @@ function describeColumnType(columnType) {
452
453
  switch (columnType.class) {
453
454
  case "text": return {
454
455
  drizzleType: columnType.mode === "json" ? `text({ mode: 'json' })` : "text()",
455
- sqliteType: "text"
456
+ databaseType: "text"
456
457
  };
457
458
  case "integer": return {
458
459
  drizzleType: columnType.mode ? `integer({ mode: '${columnType.mode}' })` : "integer()",
459
- sqliteType: "integer"
460
+ databaseType: "integer"
460
461
  };
461
462
  case "real": return {
462
463
  drizzleType: "real()",
463
- sqliteType: "real"
464
+ databaseType: "real"
464
465
  };
465
466
  }
466
467
  }
@@ -480,7 +481,7 @@ function mapColumnToReport(schemaPath, table, property, column, relation) {
480
481
  table,
481
482
  column: column.name,
482
483
  drizzleType: columnType.drizzleType,
483
- sqliteType: columnType.sqliteType,
484
+ databaseType: columnType.databaseType,
484
485
  nullable: column.isNullable,
485
486
  primary: column.isPrimary,
486
487
  unique: column.isUnique,
@@ -586,7 +587,7 @@ function parseNestedObject(propName, property, parentTableName, warnings, mappin
586
587
  table: nestedTableName,
587
588
  column: "id",
588
589
  drizzleType: "integer().primaryKey({ autoIncrement: true })",
589
- sqliteType: "integer",
590
+ databaseType: "integer",
590
591
  nullable: false,
591
592
  primary: true,
592
593
  unique: false,
@@ -611,7 +612,7 @@ function parseNestedObject(propName, property, parentTableName, warnings, mappin
611
612
  table: nestedTableName,
612
613
  column: `${parentTableName}_id`,
613
614
  drizzleType: "integer().references(...)",
614
- sqliteType: "integer",
615
+ databaseType: "integer",
615
616
  nullable: false,
616
617
  primary: false,
617
618
  unique: false,
@@ -916,6 +917,262 @@ async function addToGitignore(aiexDir, fileName) {
916
917
  }
917
918
  }
918
919
 
920
+ //#endregion
921
+ //#region src/infrastructure/extraction/insert-extracted-data.ts
922
+ function convertValue(value, column) {
923
+ if (value === null || value === void 0) return null;
924
+ const mode = column.columnType.class !== "real" ? column.columnType.mode : void 0;
925
+ if (mode === "json") return typeof value === "string" ? value : JSON.stringify(value);
926
+ if (mode === "boolean") return value ? 1 : 0;
927
+ if (mode === "timestamp" || mode === "timestamp_ms") {
928
+ if (typeof value === "string") {
929
+ const ms = Date.parse(value);
930
+ if (Number.isNaN(ms)) return value;
931
+ return mode === "timestamp_ms" ? ms : Math.floor(ms / 1e3);
932
+ }
933
+ return value;
934
+ }
935
+ return value;
936
+ }
937
+ function buildInsertSql(table, data) {
938
+ const columns = [];
939
+ const values = [];
940
+ for (const col of table.columns) {
941
+ if (col.isAutoIncrement) continue;
942
+ const value = data[col.name];
943
+ if (value === void 0) {
944
+ if (col.default !== void 0) {
945
+ columns.push(col.name);
946
+ values.push(convertValue(col.default, col));
947
+ }
948
+ continue;
949
+ }
950
+ columns.push(col.name);
951
+ values.push(convertValue(value, col));
952
+ }
953
+ const placeholders = values.map(() => "?").join(", ");
954
+ return {
955
+ sql: `INSERT INTO ${table.name} (${columns.join(", ")}) VALUES (${placeholders})`,
956
+ values
957
+ };
958
+ }
959
+ function insertTableRow({ db, table, data, parentRowId, foreignKeyColumn }) {
960
+ const rowData = { ...data };
961
+ if (parentRowId !== void 0 && foreignKeyColumn) rowData[foreignKeyColumn] = parentRowId;
962
+ const { sql: sql$1, values } = buildInsertSql(table, rowData);
963
+ const info = db.prepare(sql$1).run(...values);
964
+ return Number(info.lastInsertRowid);
965
+ }
966
+ function parseDataByColumns(data, schema, table) {
967
+ const result = {};
968
+ if ("properties" in schema) {
969
+ const s = schema;
970
+ for (const [propName, prop] of Object.entries(s.properties)) {
971
+ if (prop.nested?.enabled) continue;
972
+ if (prop.type === "array" && prop.items?.nested?.enabled) continue;
973
+ const colName = toSnakeCase(propName);
974
+ if (table.columns.some((c) => c.name === colName && c.isAutoIncrement)) continue;
975
+ if (propName in data) result[colName] = data[propName];
976
+ }
977
+ }
978
+ if (schema.table?.timestamps) {
979
+ if (!("created_at" in result)) result.created_at = Math.floor(Date.now() / 1e3);
980
+ if (!("updated_at" in result)) result.updated_at = Math.floor(Date.now() / 1e3);
981
+ }
982
+ return result;
983
+ }
984
+ function insertExtractedData(db, schema, data) {
985
+ const inserted = [];
986
+ try {
987
+ const parseResult = parseJsonSchema(schema);
988
+ const mainTable = parseResult.tables[0];
989
+ db.transaction(() => {
990
+ const mainRowId = insertTableRow({
991
+ db,
992
+ table: mainTable,
993
+ data: parseDataByColumns(data, schema, mainTable)
994
+ });
995
+ inserted.push({
996
+ table: mainTable.name,
997
+ rowId: mainRowId
998
+ });
999
+ for (const revRel of parseResult.reverseRelations) {
1000
+ const rel = parseResult.relations.find((r) => r.fromTable === revRel.toTable && r.toTable === revRel.fromTable);
1001
+ if (!rel) continue;
1002
+ const propEntry = Object.entries(schema.properties).find(([key]) => toSnakeCase(key) === revRel.name && key in data);
1003
+ if (!propEntry) continue;
1004
+ const [propName] = propEntry;
1005
+ const nestedValue = data[propName];
1006
+ if (nestedValue === null || nestedValue === void 0) continue;
1007
+ const nestedTable = parseResult.tables.find((t$1) => t$1.name === revRel.toTable);
1008
+ if (!nestedTable) continue;
1009
+ if (revRel.type === "has-one") {
1010
+ const rowId = insertTableRow({
1011
+ db,
1012
+ table: nestedTable,
1013
+ data: parseDataByColumns(nestedValue, schema.properties[propName], nestedTable),
1014
+ parentRowId: mainRowId,
1015
+ foreignKeyColumn: rel.fromColumn
1016
+ });
1017
+ inserted.push({
1018
+ table: revRel.toTable,
1019
+ rowId
1020
+ });
1021
+ } else if (revRel.type === "has-many") {
1022
+ const items = nestedValue;
1023
+ for (const item of items) {
1024
+ const rowId = insertTableRow({
1025
+ db,
1026
+ table: nestedTable,
1027
+ data: parseDataByColumns(item, schema.properties[propName].items, nestedTable),
1028
+ parentRowId: mainRowId,
1029
+ foreignKeyColumn: rel.fromColumn
1030
+ });
1031
+ inserted.push({
1032
+ table: revRel.toTable,
1033
+ rowId
1034
+ });
1035
+ }
1036
+ }
1037
+ }
1038
+ return mainRowId;
1039
+ })();
1040
+ return {
1041
+ success: true,
1042
+ tablesInserted: inserted
1043
+ };
1044
+ } catch (e) {
1045
+ return {
1046
+ success: false,
1047
+ tablesInserted: inserted,
1048
+ error: e instanceof Error ? e.message : String(e)
1049
+ };
1050
+ }
1051
+ }
1052
+
1053
+ //#endregion
1054
+ //#region src/infrastructure/database/sqlite-database.ts
1055
+ const INTERNAL_ROWID_COLUMN = "__aiex_rowid";
1056
+ function createReadonlyQueryDb(databasePath) {
1057
+ return new Kysely({ dialect: new SqliteDialect({ database: new Database(databasePath, { readonly: true }) }) });
1058
+ }
1059
+ var SqliteProjectDatabase = class {
1060
+ dialect = "sqlite";
1061
+ constructor(databasePath) {
1062
+ this.databasePath = databasePath;
1063
+ }
1064
+ async exists() {
1065
+ try {
1066
+ return (await fs.stat(this.databasePath)).isFile();
1067
+ } catch {
1068
+ return false;
1069
+ }
1070
+ }
1071
+ async listTableNames() {
1072
+ let db = null;
1073
+ try {
1074
+ db = createReadonlyQueryDb(this.databasePath);
1075
+ return (await sql`
1076
+ select name
1077
+ from sqlite_master
1078
+ where type = 'table' and name not like 'sqlite_%' and name not like '_%'
1079
+ order by name
1080
+ `.execute(db)).rows.map((row) => row.name);
1081
+ } finally {
1082
+ await db?.destroy();
1083
+ }
1084
+ }
1085
+ async verifyTables(tableNames) {
1086
+ const db = new Database(this.databasePath, { readonly: true });
1087
+ try {
1088
+ const missing = tableNames.filter((table) => {
1089
+ return !db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name=?`).get(table);
1090
+ });
1091
+ return {
1092
+ ok: missing.length === 0,
1093
+ missing
1094
+ };
1095
+ } catch (error) {
1096
+ return {
1097
+ ok: false,
1098
+ missing: [],
1099
+ error: error instanceof Error ? error.message : String(error)
1100
+ };
1101
+ } finally {
1102
+ db.close();
1103
+ }
1104
+ }
1105
+ insertExtracted(schema, data) {
1106
+ const db = new Database(this.databasePath);
1107
+ try {
1108
+ return insertExtractedData(db, schema, data);
1109
+ } finally {
1110
+ db.close();
1111
+ }
1112
+ }
1113
+ async readTableRows(query) {
1114
+ const { tableName, page, pageSize, search, sortField, sortOrder, all } = query;
1115
+ const db = createReadonlyQueryDb(this.databasePath);
1116
+ try {
1117
+ if ((await sql`
1118
+ select name
1119
+ from sqlite_master
1120
+ where type = 'table' and name = ${tableName}
1121
+ `.execute(db)).rows.length === 0) throw new Error(`Table not found: ${tableName}`);
1122
+ const columns = (await sql`
1123
+ pragma table_info(${sql.table(tableName)})
1124
+ `.execute(db)).rows.map((col) => ({
1125
+ name: col.name,
1126
+ type: col.type,
1127
+ notNull: !!col.notnull,
1128
+ pk: !!col.pk
1129
+ }));
1130
+ const searchConditions = columns.map((col) => sql`${sql.ref(col.name)} like ${`%${search}%`}`);
1131
+ const searchCondition = search ? sql`where ${sql.join(searchConditions, sql` or `)}` : sql``;
1132
+ const sortColumn = columns.find((col) => col.name === sortField);
1133
+ const orderBy = sortColumn ? sql`order by ${sql.ref(sortColumn.name)} ${sql.raw(sortOrder === "desc" ? "desc" : "asc")}` : sql``;
1134
+ const total = (await sql`
1135
+ select count(*) as count
1136
+ from ${sql.table(tableName)}
1137
+ ${searchCondition}
1138
+ `.execute(db)).rows[0]?.count ?? 0;
1139
+ const offset = (page - 1) * pageSize;
1140
+ const totalPages = all ? 1 : Math.max(1, Math.ceil(total / pageSize));
1141
+ const result = all ? await sql`
1142
+ select rowid as ${sql.raw(INTERNAL_ROWID_COLUMN)}, *
1143
+ from ${sql.table(tableName)}
1144
+ ${searchCondition}
1145
+ ${orderBy}
1146
+ `.execute(db) : await sql`
1147
+ select rowid as ${sql.raw(INTERNAL_ROWID_COLUMN)}, *
1148
+ from ${sql.table(tableName)}
1149
+ ${searchCondition}
1150
+ ${orderBy}
1151
+ limit ${pageSize}
1152
+ offset ${offset}
1153
+ `.execute(db);
1154
+ const rowIds = result.rows.map((row) => {
1155
+ const rowId = row[INTERNAL_ROWID_COLUMN];
1156
+ return rowId === null || rowId === void 0 ? void 0 : String(rowId);
1157
+ });
1158
+ return {
1159
+ columns,
1160
+ rows: result.rows.map(({ [INTERNAL_ROWID_COLUMN]: _rowid, ...row }) => row),
1161
+ rowIds,
1162
+ total,
1163
+ page,
1164
+ pageSize,
1165
+ totalPages
1166
+ };
1167
+ } finally {
1168
+ await db.destroy();
1169
+ }
1170
+ }
1171
+ };
1172
+ function createProjectDatabase(config) {
1173
+ return new SqliteProjectDatabase(config.databasePath);
1174
+ }
1175
+
919
1176
  //#endregion
920
1177
  //#region src/locales/en.ts
921
1178
  const en = {
@@ -1498,6 +1755,7 @@ async function checkImageOcrAvailability(imagePath, runtime = defaultRuntime) {
1498
1755
  //#region src/infrastructure/schema/migration-config.ts
1499
1756
  function createMigrationConfig(cwd) {
1500
1757
  return {
1758
+ databaseDialect: "sqlite",
1501
1759
  schemaPath: `${cwd}/.aiex/schema`,
1502
1760
  drizzleSchemaPath: `${cwd}/.aiex/drizzle/schema.ts`,
1503
1761
  migrationsPath: `${cwd}/.aiex/migrations`,
@@ -1636,26 +1894,18 @@ async function collectDoctorDiagnostics(options = {}) {
1636
1894
  error: error instanceof Error ? error.message : String(error)
1637
1895
  });
1638
1896
  }
1639
- let dbExists = false;
1640
- if (dirExists) try {
1641
- dbExists = (await fs.stat(migConfig.databasePath)).isFile();
1642
- } catch {
1643
- dbExists = false;
1644
- }
1897
+ const database = createProjectDatabase(migConfig);
1898
+ const dbExists = dirExists ? await database.exists() : false;
1645
1899
  let databaseTablesOk = null;
1646
1900
  let missingDatabaseTables = [];
1647
1901
  if (dbExists && expectedTables.size > 0) {
1648
- const db = new Database(migConfig.databasePath, { readonly: true });
1649
- try {
1650
- missingDatabaseTables = [...expectedTables].filter((table) => {
1651
- return !db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name=?`).get(table);
1652
- });
1653
- databaseTablesOk = missingDatabaseTables.length === 0;
1654
- } catch (error) {
1902
+ const tableCheck = await database.verifyTables([...expectedTables]);
1903
+ if (tableCheck.error) {
1655
1904
  databaseTablesOk = false;
1656
- errors.push(`Could not inspect database tables: ${error instanceof Error ? error.message : String(error)}`);
1657
- } finally {
1658
- db.close();
1905
+ errors.push(`Could not inspect database tables: ${tableCheck.error}`);
1906
+ } else {
1907
+ missingDatabaseTables = tableCheck.missing;
1908
+ databaseTablesOk = tableCheck.ok;
1659
1909
  }
1660
1910
  } else if (dbExists) databaseTablesOk = true;
1661
1911
  let migrationCount = 0;
@@ -1807,4 +2057,4 @@ function generateDrizzleSchema(result) {
1807
2057
  }
1808
2058
 
1809
2059
  //#endregion
1810
- export { seedConfig as A, CORRECTION_SYSTEM_PROMPT as C, PLACEHOLDER_TEXT as D, PLACEHOLDER_SCHEMA as E, name as M, package_default as N, buildCorrectionUserPrompt as O, version as P, DEFAULT_MINERU_CONFIG as S, EVIDENCE_INSTRUCTIONS as T, doctorDiagnosticsSeverityRows as _, recognizeImageText as a, DEFAULT_LITEPARSE_CONFIG as b, t as c, writeAIConfig as d, AIConfigSchema as f, buildDoctorDiagnostics as g, toSnakeCase as h, generateDrizzleConfig as i, description as j, createConfig as k, getDefaultAIConfig as l, parseJsonSchema as m, collectDoctorDiagnostics as n, shouldUseImageOcrFallback as o, JsonSchemaDefinitionSchema as p, createMigrationConfig as r, initI18n as s, generateDrizzleSchema as t, readAIConfig as u, doctorDiagnosticsTableRows as v, DEFAULT_PROMPT_CONFIG as w, DEFAULT_MINERU_API_CONFIG as x, formatDoctorDiagnosticsJson as y };
2060
+ export { seedConfig as A, CORRECTION_SYSTEM_PROMPT as C, PLACEHOLDER_TEXT as D, PLACEHOLDER_SCHEMA as E, name as M, package_default as N, buildCorrectionUserPrompt as O, version as P, DEFAULT_MINERU_CONFIG as S, EVIDENCE_INSTRUCTIONS as T, doctorDiagnosticsSeverityRows as _, recognizeImageText as a, DEFAULT_LITEPARSE_CONFIG as b, t as c, readAIConfig as d, writeAIConfig as f, buildDoctorDiagnostics as g, parseJsonSchema as h, generateDrizzleConfig as i, description as j, createConfig as k, createProjectDatabase as l, JsonSchemaDefinitionSchema as m, collectDoctorDiagnostics as n, shouldUseImageOcrFallback as o, AIConfigSchema as p, createMigrationConfig as r, initI18n as s, generateDrizzleSchema as t, getDefaultAIConfig as u, doctorDiagnosticsTableRows as v, DEFAULT_PROMPT_CONFIG as w, DEFAULT_MINERU_API_CONFIG as x, formatDoctorDiagnosticsJson as y };
package/dist/index.d.mts CHANGED
@@ -326,7 +326,7 @@ interface SchemaMappingEntry {
326
326
  table: string;
327
327
  column: string;
328
328
  drizzleType: string;
329
- sqliteType: 'text' | 'integer' | 'real';
329
+ databaseType: 'text' | 'integer' | 'real';
330
330
  nullable: boolean;
331
331
  primary: boolean;
332
332
  unique: boolean;
@@ -362,6 +362,7 @@ interface ParseResult {
362
362
  mapping?: SchemaMappingEntry[];
363
363
  }
364
364
  interface MigrationConfig {
365
+ databaseDialect: 'sqlite';
365
366
  schemaPath: string;
366
367
  drizzleSchemaPath: string;
367
368
  migrationsPath: string;
package/dist/index.mjs CHANGED
@@ -1,3 +1,3 @@
1
- import { _ as doctorDiagnosticsSeverityRows, g as buildDoctorDiagnostics, i as generateDrizzleConfig, m as parseJsonSchema, n as collectDoctorDiagnostics, p as JsonSchemaDefinitionSchema, r as createMigrationConfig, t as generateDrizzleSchema, v as doctorDiagnosticsTableRows, y as formatDoctorDiagnosticsJson } from "./generate-drizzle-schema-DAeXmyrZ.mjs";
1
+ import { _ as doctorDiagnosticsSeverityRows, g as buildDoctorDiagnostics, h as parseJsonSchema, i as generateDrizzleConfig, m as JsonSchemaDefinitionSchema, n as collectDoctorDiagnostics, r as createMigrationConfig, t as generateDrizzleSchema, v as doctorDiagnosticsTableRows, y as formatDoctorDiagnosticsJson } from "./generate-drizzle-schema-B6ocPcWd.mjs";
2
2
 
3
3
  export { JsonSchemaDefinitionSchema, buildDoctorDiagnostics, collectDoctorDiagnostics, createMigrationConfig, doctorDiagnosticsSeverityRows, doctorDiagnosticsTableRows, formatDoctorDiagnosticsJson, generateDrizzleConfig, generateDrizzleSchema, parseJsonSchema };
@@ -3,8 +3,8 @@ import fs from "node:fs/promises";
3
3
  import path from "node:path";
4
4
  import process from "node:process";
5
5
  import { fileURLToPath } from "node:url";
6
- import Database from "better-sqlite3";
7
6
  import { readFile, writeFile } from "jsonfile";
7
+ import Database from "better-sqlite3";
8
8
  import * as esbuild from "esbuild";
9
9
  import lockfile from "proper-lockfile";
10
10
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "aiex-cli",
3
3
  "type": "module",
4
- "version": "0.1.1-beta.6",
4
+ "version": "0.1.1-beta.7",
5
5
  "description": "JSON Schema → SQLite with AI-powered data extraction",
6
6
  "author": "OSpoon <zxin088@gmail.com>",
7
7
  "license": "MIT",