aiex-cli 0.1.1-beta.5 → 0.1.1-beta.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/cli.mjs +259 -352
- package/dist/default-prompts.json +4 -0
- package/dist/{generate-drizzle-schema-DpHYeu2z.mjs → generate-drizzle-schema-B6ocPcWd.mjs} +323 -54
- package/dist/index.d.mts +2 -1
- package/dist/index.mjs +1 -1
- package/dist/infrastructure/schema/migrate-helper.mjs +1 -1
- package/dist/web/assets/{AISettings-CRumcTjo.js → AISettings-DI8JKzb4.js} +23 -23
- package/dist/web/assets/{index-BcNvqIR3.js → index-CuCVFoOf.js} +2 -2
- package/dist/web/index.html +1 -1
- package/dist/{zh-CN-BAGJklRp.mjs → zh-CN-CJiDMnGe.mjs} +0 -13
- package/package.json +1 -1
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
{
|
|
2
|
+
"systemTemplate": "You are a professional data extraction assistant. Your task is to extract structured data from text and return a JSON object based on the data structure definition provided below.\n\n{schema}\n\nExtraction requirements:\n1. Extract strictly according to the field names and types defined in the structure\n2. If the text lacks information for a field, set that field to null\n3. Do not add fields that do not exist in the structure definition\n4. Maintain data accuracy and completeness",
|
|
3
|
+
"userTemplate": "Please extract data from the following text:\n{text}"
|
|
4
|
+
}
|
|
@@ -3,15 +3,16 @@ import os from "node:os";
|
|
|
3
3
|
import path from "node:path";
|
|
4
4
|
import process from "node:process";
|
|
5
5
|
import { fileURLToPath } from "node:url";
|
|
6
|
-
import Database from "better-sqlite3";
|
|
7
6
|
import { execa } from "execa";
|
|
8
7
|
import { readFile, writeFile } from "jsonfile";
|
|
9
8
|
import Conf from "conf";
|
|
10
9
|
import { z } from "zod";
|
|
10
|
+
import Database from "better-sqlite3";
|
|
11
|
+
import { Kysely, SqliteDialect, sql } from "kysely";
|
|
11
12
|
|
|
12
13
|
//#region package.json
|
|
13
14
|
var name = "aiex-cli";
|
|
14
|
-
var version = "0.1.1-beta.
|
|
15
|
+
var version = "0.1.1-beta.7";
|
|
15
16
|
var description = "JSON Schema → SQLite with AI-powered data extraction";
|
|
16
17
|
var package_default = {
|
|
17
18
|
name,
|
|
@@ -147,6 +148,53 @@ function seedConfig(config = createConfig()) {
|
|
|
147
148
|
if (!config.has("version")) config.set("version", version);
|
|
148
149
|
}
|
|
149
150
|
|
|
151
|
+
//#endregion
|
|
152
|
+
//#region assets/default-prompts.json
|
|
153
|
+
var systemTemplate = "You are a professional data extraction assistant. Your task is to extract structured data from text and return a JSON object based on the data structure definition provided below.\n\n{schema}\n\nExtraction requirements:\n1. Extract strictly according to the field names and types defined in the structure\n2. If the text lacks information for a field, set that field to null\n3. Do not add fields that do not exist in the structure definition\n4. Maintain data accuracy and completeness";
|
|
154
|
+
var userTemplate = "Please extract data from the following text:\n{text}";
|
|
155
|
+
|
|
156
|
+
//#endregion
|
|
157
|
+
//#region src/domain/ai/prompts.ts
|
|
158
|
+
const PLACEHOLDER_SCHEMA = "{schema}";
|
|
159
|
+
const PLACEHOLDER_TEXT = "{text}";
|
|
160
|
+
const DEFAULT_EXTRACTION_SYSTEM_TEMPLATE = systemTemplate;
|
|
161
|
+
const DEFAULT_EXTRACTION_USER_TEMPLATE = userTemplate;
|
|
162
|
+
const DEFAULT_PROMPT_CONFIG = {
|
|
163
|
+
systemTemplate: DEFAULT_EXTRACTION_SYSTEM_TEMPLATE,
|
|
164
|
+
userTemplate: DEFAULT_EXTRACTION_USER_TEMPLATE
|
|
165
|
+
};
|
|
166
|
+
const EVIDENCE_INSTRUCTIONS = `Evidence requirements:
|
|
167
|
+
- Also return a top-level "_evidence" object.
|
|
168
|
+
- For each top-level scalar field you extracted from the text, include "_evidence.<field>.quote".
|
|
169
|
+
- The quote must be an exact contiguous substring copied from the input text.
|
|
170
|
+
- Do not invent offsets. Only provide quotes.
|
|
171
|
+
- If no exact quote supports a field, omit that field from "_evidence".`;
|
|
172
|
+
const CORRECTION_SYSTEM_PROMPT = `You are a precise data correction assistant. Your task is to correct validation errors in a previously generated JSON object to make it comply with the provided JSON Schema.
|
|
173
|
+
|
|
174
|
+
CRITICAL RULES:
|
|
175
|
+
1. Only correct the fields that failed validation.
|
|
176
|
+
2. Preserve all other correctly extracted fields and their values exactly.
|
|
177
|
+
3. Return ONLY the corrected JSON object. No explanations, no markdown blocks other than JSON.`;
|
|
178
|
+
function buildCorrectionUserPrompt(input) {
|
|
179
|
+
return `The JSON data you generated previously failed validation. Please correct it.
|
|
180
|
+
|
|
181
|
+
[Original Text]
|
|
182
|
+
${input.text || "Data is contained in the attached file."}
|
|
183
|
+
|
|
184
|
+
[JSON Schema Definition]
|
|
185
|
+
${JSON.stringify(input.schema, null, 2)}
|
|
186
|
+
|
|
187
|
+
[Previously Generated Invalid JSON]
|
|
188
|
+
${input.invalidJson}
|
|
189
|
+
|
|
190
|
+
[Validation Error Details]
|
|
191
|
+
${input.error}
|
|
192
|
+
|
|
193
|
+
${input.includeEvidenceInstructions ? EVIDENCE_INSTRUCTIONS : ""}
|
|
194
|
+
|
|
195
|
+
Please output the corrected JSON object now:`;
|
|
196
|
+
}
|
|
197
|
+
|
|
150
198
|
//#endregion
|
|
151
199
|
//#region src/domain/ai/types.ts
|
|
152
200
|
const PDF_CONVERTER_KINDS = [
|
|
@@ -156,8 +204,6 @@ const PDF_CONVERTER_KINDS = [
|
|
|
156
204
|
"mineru_api",
|
|
157
205
|
"external"
|
|
158
206
|
];
|
|
159
|
-
const PLACEHOLDER_SCHEMA = "{schema}";
|
|
160
|
-
const PLACEHOLDER_TEXT = "{text}";
|
|
161
207
|
const DEFAULT_MODELS = [{
|
|
162
208
|
name: "qwen-plus",
|
|
163
209
|
capabilities: {
|
|
@@ -177,19 +223,6 @@ const DEFAULT_PROVIDER_CONFIG = {
|
|
|
177
223
|
models: [...DEFAULT_MODELS],
|
|
178
224
|
timeout: 300
|
|
179
225
|
};
|
|
180
|
-
const DEFAULT_PROMPT_CONFIG = {
|
|
181
|
-
systemTemplate: `You are a professional data extraction assistant. Your task is to extract structured data from text and return a JSON object based on the data structure definition provided below.
|
|
182
|
-
|
|
183
|
-
{schema}
|
|
184
|
-
|
|
185
|
-
Extraction requirements:
|
|
186
|
-
1. Extract strictly according to the field names and types defined in the structure
|
|
187
|
-
2. If the text lacks information for a field, set that field to null
|
|
188
|
-
3. Do not add fields that do not exist in the structure definition
|
|
189
|
-
4. Maintain data accuracy and completeness`,
|
|
190
|
-
userTemplate: `Please extract data from the following text:
|
|
191
|
-
{text}`
|
|
192
|
-
};
|
|
193
226
|
const DEFAULT_EXTRACTION_CONFIG = { outputDir: ".aiex/extracted" };
|
|
194
227
|
const DEFAULT_MINERU_CONFIG = {
|
|
195
228
|
command: "mineru",
|
|
@@ -420,15 +453,15 @@ function describeColumnType(columnType) {
|
|
|
420
453
|
switch (columnType.class) {
|
|
421
454
|
case "text": return {
|
|
422
455
|
drizzleType: columnType.mode === "json" ? `text({ mode: 'json' })` : "text()",
|
|
423
|
-
|
|
456
|
+
databaseType: "text"
|
|
424
457
|
};
|
|
425
458
|
case "integer": return {
|
|
426
459
|
drizzleType: columnType.mode ? `integer({ mode: '${columnType.mode}' })` : "integer()",
|
|
427
|
-
|
|
460
|
+
databaseType: "integer"
|
|
428
461
|
};
|
|
429
462
|
case "real": return {
|
|
430
463
|
drizzleType: "real()",
|
|
431
|
-
|
|
464
|
+
databaseType: "real"
|
|
432
465
|
};
|
|
433
466
|
}
|
|
434
467
|
}
|
|
@@ -448,7 +481,7 @@ function mapColumnToReport(schemaPath, table, property, column, relation) {
|
|
|
448
481
|
table,
|
|
449
482
|
column: column.name,
|
|
450
483
|
drizzleType: columnType.drizzleType,
|
|
451
|
-
|
|
484
|
+
databaseType: columnType.databaseType,
|
|
452
485
|
nullable: column.isNullable,
|
|
453
486
|
primary: column.isPrimary,
|
|
454
487
|
unique: column.isUnique,
|
|
@@ -554,7 +587,7 @@ function parseNestedObject(propName, property, parentTableName, warnings, mappin
|
|
|
554
587
|
table: nestedTableName,
|
|
555
588
|
column: "id",
|
|
556
589
|
drizzleType: "integer().primaryKey({ autoIncrement: true })",
|
|
557
|
-
|
|
590
|
+
databaseType: "integer",
|
|
558
591
|
nullable: false,
|
|
559
592
|
primary: true,
|
|
560
593
|
unique: false,
|
|
@@ -579,7 +612,7 @@ function parseNestedObject(propName, property, parentTableName, warnings, mappin
|
|
|
579
612
|
table: nestedTableName,
|
|
580
613
|
column: `${parentTableName}_id`,
|
|
581
614
|
drizzleType: "integer().references(...)",
|
|
582
|
-
|
|
615
|
+
databaseType: "integer",
|
|
583
616
|
nullable: false,
|
|
584
617
|
primary: false,
|
|
585
618
|
unique: false,
|
|
@@ -884,6 +917,262 @@ async function addToGitignore(aiexDir, fileName) {
|
|
|
884
917
|
}
|
|
885
918
|
}
|
|
886
919
|
|
|
920
|
+
//#endregion
|
|
921
|
+
//#region src/infrastructure/extraction/insert-extracted-data.ts
|
|
922
|
+
function convertValue(value, column) {
|
|
923
|
+
if (value === null || value === void 0) return null;
|
|
924
|
+
const mode = column.columnType.class !== "real" ? column.columnType.mode : void 0;
|
|
925
|
+
if (mode === "json") return typeof value === "string" ? value : JSON.stringify(value);
|
|
926
|
+
if (mode === "boolean") return value ? 1 : 0;
|
|
927
|
+
if (mode === "timestamp" || mode === "timestamp_ms") {
|
|
928
|
+
if (typeof value === "string") {
|
|
929
|
+
const ms = Date.parse(value);
|
|
930
|
+
if (Number.isNaN(ms)) return value;
|
|
931
|
+
return mode === "timestamp_ms" ? ms : Math.floor(ms / 1e3);
|
|
932
|
+
}
|
|
933
|
+
return value;
|
|
934
|
+
}
|
|
935
|
+
return value;
|
|
936
|
+
}
|
|
937
|
+
function buildInsertSql(table, data) {
|
|
938
|
+
const columns = [];
|
|
939
|
+
const values = [];
|
|
940
|
+
for (const col of table.columns) {
|
|
941
|
+
if (col.isAutoIncrement) continue;
|
|
942
|
+
const value = data[col.name];
|
|
943
|
+
if (value === void 0) {
|
|
944
|
+
if (col.default !== void 0) {
|
|
945
|
+
columns.push(col.name);
|
|
946
|
+
values.push(convertValue(col.default, col));
|
|
947
|
+
}
|
|
948
|
+
continue;
|
|
949
|
+
}
|
|
950
|
+
columns.push(col.name);
|
|
951
|
+
values.push(convertValue(value, col));
|
|
952
|
+
}
|
|
953
|
+
const placeholders = values.map(() => "?").join(", ");
|
|
954
|
+
return {
|
|
955
|
+
sql: `INSERT INTO ${table.name} (${columns.join(", ")}) VALUES (${placeholders})`,
|
|
956
|
+
values
|
|
957
|
+
};
|
|
958
|
+
}
|
|
959
|
+
function insertTableRow({ db, table, data, parentRowId, foreignKeyColumn }) {
|
|
960
|
+
const rowData = { ...data };
|
|
961
|
+
if (parentRowId !== void 0 && foreignKeyColumn) rowData[foreignKeyColumn] = parentRowId;
|
|
962
|
+
const { sql: sql$1, values } = buildInsertSql(table, rowData);
|
|
963
|
+
const info = db.prepare(sql$1).run(...values);
|
|
964
|
+
return Number(info.lastInsertRowid);
|
|
965
|
+
}
|
|
966
|
+
function parseDataByColumns(data, schema, table) {
|
|
967
|
+
const result = {};
|
|
968
|
+
if ("properties" in schema) {
|
|
969
|
+
const s = schema;
|
|
970
|
+
for (const [propName, prop] of Object.entries(s.properties)) {
|
|
971
|
+
if (prop.nested?.enabled) continue;
|
|
972
|
+
if (prop.type === "array" && prop.items?.nested?.enabled) continue;
|
|
973
|
+
const colName = toSnakeCase(propName);
|
|
974
|
+
if (table.columns.some((c) => c.name === colName && c.isAutoIncrement)) continue;
|
|
975
|
+
if (propName in data) result[colName] = data[propName];
|
|
976
|
+
}
|
|
977
|
+
}
|
|
978
|
+
if (schema.table?.timestamps) {
|
|
979
|
+
if (!("created_at" in result)) result.created_at = Math.floor(Date.now() / 1e3);
|
|
980
|
+
if (!("updated_at" in result)) result.updated_at = Math.floor(Date.now() / 1e3);
|
|
981
|
+
}
|
|
982
|
+
return result;
|
|
983
|
+
}
|
|
984
|
+
function insertExtractedData(db, schema, data) {
|
|
985
|
+
const inserted = [];
|
|
986
|
+
try {
|
|
987
|
+
const parseResult = parseJsonSchema(schema);
|
|
988
|
+
const mainTable = parseResult.tables[0];
|
|
989
|
+
db.transaction(() => {
|
|
990
|
+
const mainRowId = insertTableRow({
|
|
991
|
+
db,
|
|
992
|
+
table: mainTable,
|
|
993
|
+
data: parseDataByColumns(data, schema, mainTable)
|
|
994
|
+
});
|
|
995
|
+
inserted.push({
|
|
996
|
+
table: mainTable.name,
|
|
997
|
+
rowId: mainRowId
|
|
998
|
+
});
|
|
999
|
+
for (const revRel of parseResult.reverseRelations) {
|
|
1000
|
+
const rel = parseResult.relations.find((r) => r.fromTable === revRel.toTable && r.toTable === revRel.fromTable);
|
|
1001
|
+
if (!rel) continue;
|
|
1002
|
+
const propEntry = Object.entries(schema.properties).find(([key]) => toSnakeCase(key) === revRel.name && key in data);
|
|
1003
|
+
if (!propEntry) continue;
|
|
1004
|
+
const [propName] = propEntry;
|
|
1005
|
+
const nestedValue = data[propName];
|
|
1006
|
+
if (nestedValue === null || nestedValue === void 0) continue;
|
|
1007
|
+
const nestedTable = parseResult.tables.find((t$1) => t$1.name === revRel.toTable);
|
|
1008
|
+
if (!nestedTable) continue;
|
|
1009
|
+
if (revRel.type === "has-one") {
|
|
1010
|
+
const rowId = insertTableRow({
|
|
1011
|
+
db,
|
|
1012
|
+
table: nestedTable,
|
|
1013
|
+
data: parseDataByColumns(nestedValue, schema.properties[propName], nestedTable),
|
|
1014
|
+
parentRowId: mainRowId,
|
|
1015
|
+
foreignKeyColumn: rel.fromColumn
|
|
1016
|
+
});
|
|
1017
|
+
inserted.push({
|
|
1018
|
+
table: revRel.toTable,
|
|
1019
|
+
rowId
|
|
1020
|
+
});
|
|
1021
|
+
} else if (revRel.type === "has-many") {
|
|
1022
|
+
const items = nestedValue;
|
|
1023
|
+
for (const item of items) {
|
|
1024
|
+
const rowId = insertTableRow({
|
|
1025
|
+
db,
|
|
1026
|
+
table: nestedTable,
|
|
1027
|
+
data: parseDataByColumns(item, schema.properties[propName].items, nestedTable),
|
|
1028
|
+
parentRowId: mainRowId,
|
|
1029
|
+
foreignKeyColumn: rel.fromColumn
|
|
1030
|
+
});
|
|
1031
|
+
inserted.push({
|
|
1032
|
+
table: revRel.toTable,
|
|
1033
|
+
rowId
|
|
1034
|
+
});
|
|
1035
|
+
}
|
|
1036
|
+
}
|
|
1037
|
+
}
|
|
1038
|
+
return mainRowId;
|
|
1039
|
+
})();
|
|
1040
|
+
return {
|
|
1041
|
+
success: true,
|
|
1042
|
+
tablesInserted: inserted
|
|
1043
|
+
};
|
|
1044
|
+
} catch (e) {
|
|
1045
|
+
return {
|
|
1046
|
+
success: false,
|
|
1047
|
+
tablesInserted: inserted,
|
|
1048
|
+
error: e instanceof Error ? e.message : String(e)
|
|
1049
|
+
};
|
|
1050
|
+
}
|
|
1051
|
+
}
|
|
1052
|
+
|
|
1053
|
+
//#endregion
|
|
1054
|
+
//#region src/infrastructure/database/sqlite-database.ts
|
|
1055
|
+
const INTERNAL_ROWID_COLUMN = "__aiex_rowid";
|
|
1056
|
+
function createReadonlyQueryDb(databasePath) {
|
|
1057
|
+
return new Kysely({ dialect: new SqliteDialect({ database: new Database(databasePath, { readonly: true }) }) });
|
|
1058
|
+
}
|
|
1059
|
+
var SqliteProjectDatabase = class {
|
|
1060
|
+
dialect = "sqlite";
|
|
1061
|
+
constructor(databasePath) {
|
|
1062
|
+
this.databasePath = databasePath;
|
|
1063
|
+
}
|
|
1064
|
+
async exists() {
|
|
1065
|
+
try {
|
|
1066
|
+
return (await fs.stat(this.databasePath)).isFile();
|
|
1067
|
+
} catch {
|
|
1068
|
+
return false;
|
|
1069
|
+
}
|
|
1070
|
+
}
|
|
1071
|
+
async listTableNames() {
|
|
1072
|
+
let db = null;
|
|
1073
|
+
try {
|
|
1074
|
+
db = createReadonlyQueryDb(this.databasePath);
|
|
1075
|
+
return (await sql`
|
|
1076
|
+
select name
|
|
1077
|
+
from sqlite_master
|
|
1078
|
+
where type = 'table' and name not like 'sqlite_%' and name not like '_%'
|
|
1079
|
+
order by name
|
|
1080
|
+
`.execute(db)).rows.map((row) => row.name);
|
|
1081
|
+
} finally {
|
|
1082
|
+
await db?.destroy();
|
|
1083
|
+
}
|
|
1084
|
+
}
|
|
1085
|
+
async verifyTables(tableNames) {
|
|
1086
|
+
const db = new Database(this.databasePath, { readonly: true });
|
|
1087
|
+
try {
|
|
1088
|
+
const missing = tableNames.filter((table) => {
|
|
1089
|
+
return !db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name=?`).get(table);
|
|
1090
|
+
});
|
|
1091
|
+
return {
|
|
1092
|
+
ok: missing.length === 0,
|
|
1093
|
+
missing
|
|
1094
|
+
};
|
|
1095
|
+
} catch (error) {
|
|
1096
|
+
return {
|
|
1097
|
+
ok: false,
|
|
1098
|
+
missing: [],
|
|
1099
|
+
error: error instanceof Error ? error.message : String(error)
|
|
1100
|
+
};
|
|
1101
|
+
} finally {
|
|
1102
|
+
db.close();
|
|
1103
|
+
}
|
|
1104
|
+
}
|
|
1105
|
+
insertExtracted(schema, data) {
|
|
1106
|
+
const db = new Database(this.databasePath);
|
|
1107
|
+
try {
|
|
1108
|
+
return insertExtractedData(db, schema, data);
|
|
1109
|
+
} finally {
|
|
1110
|
+
db.close();
|
|
1111
|
+
}
|
|
1112
|
+
}
|
|
1113
|
+
async readTableRows(query) {
|
|
1114
|
+
const { tableName, page, pageSize, search, sortField, sortOrder, all } = query;
|
|
1115
|
+
const db = createReadonlyQueryDb(this.databasePath);
|
|
1116
|
+
try {
|
|
1117
|
+
if ((await sql`
|
|
1118
|
+
select name
|
|
1119
|
+
from sqlite_master
|
|
1120
|
+
where type = 'table' and name = ${tableName}
|
|
1121
|
+
`.execute(db)).rows.length === 0) throw new Error(`Table not found: ${tableName}`);
|
|
1122
|
+
const columns = (await sql`
|
|
1123
|
+
pragma table_info(${sql.table(tableName)})
|
|
1124
|
+
`.execute(db)).rows.map((col) => ({
|
|
1125
|
+
name: col.name,
|
|
1126
|
+
type: col.type,
|
|
1127
|
+
notNull: !!col.notnull,
|
|
1128
|
+
pk: !!col.pk
|
|
1129
|
+
}));
|
|
1130
|
+
const searchConditions = columns.map((col) => sql`${sql.ref(col.name)} like ${`%${search}%`}`);
|
|
1131
|
+
const searchCondition = search ? sql`where ${sql.join(searchConditions, sql` or `)}` : sql``;
|
|
1132
|
+
const sortColumn = columns.find((col) => col.name === sortField);
|
|
1133
|
+
const orderBy = sortColumn ? sql`order by ${sql.ref(sortColumn.name)} ${sql.raw(sortOrder === "desc" ? "desc" : "asc")}` : sql``;
|
|
1134
|
+
const total = (await sql`
|
|
1135
|
+
select count(*) as count
|
|
1136
|
+
from ${sql.table(tableName)}
|
|
1137
|
+
${searchCondition}
|
|
1138
|
+
`.execute(db)).rows[0]?.count ?? 0;
|
|
1139
|
+
const offset = (page - 1) * pageSize;
|
|
1140
|
+
const totalPages = all ? 1 : Math.max(1, Math.ceil(total / pageSize));
|
|
1141
|
+
const result = all ? await sql`
|
|
1142
|
+
select rowid as ${sql.raw(INTERNAL_ROWID_COLUMN)}, *
|
|
1143
|
+
from ${sql.table(tableName)}
|
|
1144
|
+
${searchCondition}
|
|
1145
|
+
${orderBy}
|
|
1146
|
+
`.execute(db) : await sql`
|
|
1147
|
+
select rowid as ${sql.raw(INTERNAL_ROWID_COLUMN)}, *
|
|
1148
|
+
from ${sql.table(tableName)}
|
|
1149
|
+
${searchCondition}
|
|
1150
|
+
${orderBy}
|
|
1151
|
+
limit ${pageSize}
|
|
1152
|
+
offset ${offset}
|
|
1153
|
+
`.execute(db);
|
|
1154
|
+
const rowIds = result.rows.map((row) => {
|
|
1155
|
+
const rowId = row[INTERNAL_ROWID_COLUMN];
|
|
1156
|
+
return rowId === null || rowId === void 0 ? void 0 : String(rowId);
|
|
1157
|
+
});
|
|
1158
|
+
return {
|
|
1159
|
+
columns,
|
|
1160
|
+
rows: result.rows.map(({ [INTERNAL_ROWID_COLUMN]: _rowid, ...row }) => row),
|
|
1161
|
+
rowIds,
|
|
1162
|
+
total,
|
|
1163
|
+
page,
|
|
1164
|
+
pageSize,
|
|
1165
|
+
totalPages
|
|
1166
|
+
};
|
|
1167
|
+
} finally {
|
|
1168
|
+
await db.destroy();
|
|
1169
|
+
}
|
|
1170
|
+
}
|
|
1171
|
+
};
|
|
1172
|
+
function createProjectDatabase(config) {
|
|
1173
|
+
return new SqliteProjectDatabase(config.databasePath);
|
|
1174
|
+
}
|
|
1175
|
+
|
|
887
1176
|
//#endregion
|
|
888
1177
|
//#region src/locales/en.ts
|
|
889
1178
|
const en = {
|
|
@@ -1288,19 +1577,6 @@ const en = {
|
|
|
1288
1577
|
},
|
|
1289
1578
|
ocrFallbackOptions: { localAuto: "Vision model or local OCR" }
|
|
1290
1579
|
},
|
|
1291
|
-
prompt: {
|
|
1292
|
-
defaultSystem: `You are a professional data extraction assistant. Your task is to extract structured data from text and return a JSON object based on the data structure definition provided below.
|
|
1293
|
-
|
|
1294
|
-
{schema}
|
|
1295
|
-
|
|
1296
|
-
Extraction requirements:
|
|
1297
|
-
1. Extract data strictly according to the field names and types defined in the structure
|
|
1298
|
-
2. If a field's information is missing from the text, set that field to null
|
|
1299
|
-
3. Do not add fields that are not in the structure definition
|
|
1300
|
-
4. Maintain data accuracy and completeness`,
|
|
1301
|
-
defaultUser: `Please extract data from the following text:
|
|
1302
|
-
{text}`
|
|
1303
|
-
},
|
|
1304
1580
|
fields: {
|
|
1305
1581
|
config: "Fields Config",
|
|
1306
1582
|
fewShotExamples: "Few-Shot Examples",
|
|
@@ -1357,7 +1633,7 @@ async function initI18n(lng) {
|
|
|
1357
1633
|
fallbackLng: "en",
|
|
1358
1634
|
resources: {
|
|
1359
1635
|
"en": { translation: en },
|
|
1360
|
-
"zh-CN": { translation: await import("./zh-CN-
|
|
1636
|
+
"zh-CN": { translation: await import("./zh-CN-CJiDMnGe.mjs").then((m) => m.zhCN) }
|
|
1361
1637
|
},
|
|
1362
1638
|
interpolation: { escapeValue: false },
|
|
1363
1639
|
returnNull: false
|
|
@@ -1479,6 +1755,7 @@ async function checkImageOcrAvailability(imagePath, runtime = defaultRuntime) {
|
|
|
1479
1755
|
//#region src/infrastructure/schema/migration-config.ts
|
|
1480
1756
|
function createMigrationConfig(cwd) {
|
|
1481
1757
|
return {
|
|
1758
|
+
databaseDialect: "sqlite",
|
|
1482
1759
|
schemaPath: `${cwd}/.aiex/schema`,
|
|
1483
1760
|
drizzleSchemaPath: `${cwd}/.aiex/drizzle/schema.ts`,
|
|
1484
1761
|
migrationsPath: `${cwd}/.aiex/migrations`,
|
|
@@ -1617,26 +1894,18 @@ async function collectDoctorDiagnostics(options = {}) {
|
|
|
1617
1894
|
error: error instanceof Error ? error.message : String(error)
|
|
1618
1895
|
});
|
|
1619
1896
|
}
|
|
1620
|
-
|
|
1621
|
-
|
|
1622
|
-
dbExists = (await fs.stat(migConfig.databasePath)).isFile();
|
|
1623
|
-
} catch {
|
|
1624
|
-
dbExists = false;
|
|
1625
|
-
}
|
|
1897
|
+
const database = createProjectDatabase(migConfig);
|
|
1898
|
+
const dbExists = dirExists ? await database.exists() : false;
|
|
1626
1899
|
let databaseTablesOk = null;
|
|
1627
1900
|
let missingDatabaseTables = [];
|
|
1628
1901
|
if (dbExists && expectedTables.size > 0) {
|
|
1629
|
-
const
|
|
1630
|
-
|
|
1631
|
-
missingDatabaseTables = [...expectedTables].filter((table) => {
|
|
1632
|
-
return !db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name=?`).get(table);
|
|
1633
|
-
});
|
|
1634
|
-
databaseTablesOk = missingDatabaseTables.length === 0;
|
|
1635
|
-
} catch (error) {
|
|
1902
|
+
const tableCheck = await database.verifyTables([...expectedTables]);
|
|
1903
|
+
if (tableCheck.error) {
|
|
1636
1904
|
databaseTablesOk = false;
|
|
1637
|
-
errors.push(`Could not inspect database tables: ${
|
|
1638
|
-
}
|
|
1639
|
-
|
|
1905
|
+
errors.push(`Could not inspect database tables: ${tableCheck.error}`);
|
|
1906
|
+
} else {
|
|
1907
|
+
missingDatabaseTables = tableCheck.missing;
|
|
1908
|
+
databaseTablesOk = tableCheck.ok;
|
|
1640
1909
|
}
|
|
1641
1910
|
} else if (dbExists) databaseTablesOk = true;
|
|
1642
1911
|
let migrationCount = 0;
|
|
@@ -1788,4 +2057,4 @@ function generateDrizzleSchema(result) {
|
|
|
1788
2057
|
}
|
|
1789
2058
|
|
|
1790
2059
|
//#endregion
|
|
1791
|
-
export {
|
|
2060
|
+
export { seedConfig as A, CORRECTION_SYSTEM_PROMPT as C, PLACEHOLDER_TEXT as D, PLACEHOLDER_SCHEMA as E, name as M, package_default as N, buildCorrectionUserPrompt as O, version as P, DEFAULT_MINERU_CONFIG as S, EVIDENCE_INSTRUCTIONS as T, doctorDiagnosticsSeverityRows as _, recognizeImageText as a, DEFAULT_LITEPARSE_CONFIG as b, t as c, readAIConfig as d, writeAIConfig as f, buildDoctorDiagnostics as g, parseJsonSchema as h, generateDrizzleConfig as i, description as j, createConfig as k, createProjectDatabase as l, JsonSchemaDefinitionSchema as m, collectDoctorDiagnostics as n, shouldUseImageOcrFallback as o, AIConfigSchema as p, createMigrationConfig as r, initI18n as s, generateDrizzleSchema as t, getDefaultAIConfig as u, doctorDiagnosticsTableRows as v, DEFAULT_PROMPT_CONFIG as w, DEFAULT_MINERU_API_CONFIG as x, formatDoctorDiagnosticsJson as y };
|
package/dist/index.d.mts
CHANGED
|
@@ -326,7 +326,7 @@ interface SchemaMappingEntry {
|
|
|
326
326
|
table: string;
|
|
327
327
|
column: string;
|
|
328
328
|
drizzleType: string;
|
|
329
|
-
|
|
329
|
+
databaseType: 'text' | 'integer' | 'real';
|
|
330
330
|
nullable: boolean;
|
|
331
331
|
primary: boolean;
|
|
332
332
|
unique: boolean;
|
|
@@ -362,6 +362,7 @@ interface ParseResult {
|
|
|
362
362
|
mapping?: SchemaMappingEntry[];
|
|
363
363
|
}
|
|
364
364
|
interface MigrationConfig {
|
|
365
|
+
databaseDialect: 'sqlite';
|
|
365
366
|
schemaPath: string;
|
|
366
367
|
drizzleSchemaPath: string;
|
|
367
368
|
migrationsPath: string;
|
package/dist/index.mjs
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import { _ as doctorDiagnosticsSeverityRows, g as buildDoctorDiagnostics, i as generateDrizzleConfig, m as
|
|
1
|
+
import { _ as doctorDiagnosticsSeverityRows, g as buildDoctorDiagnostics, h as parseJsonSchema, i as generateDrizzleConfig, m as JsonSchemaDefinitionSchema, n as collectDoctorDiagnostics, r as createMigrationConfig, t as generateDrizzleSchema, v as doctorDiagnosticsTableRows, y as formatDoctorDiagnosticsJson } from "./generate-drizzle-schema-B6ocPcWd.mjs";
|
|
2
2
|
|
|
3
3
|
export { JsonSchemaDefinitionSchema, buildDoctorDiagnostics, collectDoctorDiagnostics, createMigrationConfig, doctorDiagnosticsSeverityRows, doctorDiagnosticsTableRows, formatDoctorDiagnosticsJson, generateDrizzleConfig, generateDrizzleSchema, parseJsonSchema };
|
|
@@ -3,8 +3,8 @@ import fs from "node:fs/promises";
|
|
|
3
3
|
import path from "node:path";
|
|
4
4
|
import process from "node:process";
|
|
5
5
|
import { fileURLToPath } from "node:url";
|
|
6
|
-
import Database from "better-sqlite3";
|
|
7
6
|
import { readFile, writeFile } from "jsonfile";
|
|
7
|
+
import Database from "better-sqlite3";
|
|
8
8
|
import * as esbuild from "esbuild";
|
|
9
9
|
import lockfile from "proper-lockfile";
|
|
10
10
|
|