aiex-cli 0.0.1-beta.1 → 0.0.1-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -7
- package/dist/cli.mjs +195 -1
- package/dist/{doctor-Dk4QYsbc.mjs → doctor-BhNd6jt6.mjs} +2 -2
- package/dist/index.mjs +1 -1
- package/dist/web/index.html +2 -1
- package/dist/web/logo.png +0 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
<a href="https://github.com/OSpoon/aiex-cli/blob/main/LICENSE.md"><img src="https://img.shields.io/badge/license-MIT-green?style=flat&colorA=18181B&colorB=green" alt="license"></a>
|
|
5
5
|
</p>
|
|
6
6
|
|
|
7
|
-
<h1 align="center">
|
|
7
|
+
<h1 align="center">AIEX</h1>
|
|
8
8
|
|
|
9
9
|
<p align="center">
|
|
10
10
|
<b>JSON Schema → SQLite — with AI-powered data extraction</b>
|
|
@@ -23,7 +23,8 @@ npm install -g aiex-cli
|
|
|
23
23
|
```bash
|
|
24
24
|
aiex schema --init # set up .aiex/schema/ directory
|
|
25
25
|
aiex schema # generate SQLite from JSON Schema files
|
|
26
|
-
aiex extract -s invoice -f invoice.pdf
|
|
26
|
+
aiex extract -s invoice -f invoice.pdf # extract data with AI
|
|
27
|
+
aiex extract -s invoice -f invoice.pdf --db # extract and insert into database
|
|
27
28
|
```
|
|
28
29
|
|
|
29
30
|
<br>
|
|
@@ -47,6 +48,8 @@ aiex schema --init
|
|
|
47
48
|
|
|
48
49
|
Creates a `.aiex/` directory with example schemas to get you started.
|
|
49
50
|
|
|
51
|
+
Add your own JSON Schema files to `.aiex/schema/` (one file per table), then run `aiex schema` to migrate them into the database.
|
|
52
|
+
|
|
50
53
|
### 2. Visual Editor
|
|
51
54
|
|
|
52
55
|
```bash
|
|
@@ -72,11 +75,12 @@ aiex extract -s <schema> -t <text> # from text
|
|
|
72
75
|
|
|
73
76
|
The AI reads your document and outputs structured JSON matching your schema.
|
|
74
77
|
|
|
75
|
-
**
|
|
78
|
+
**Examples:**
|
|
76
79
|
```bash
|
|
77
|
-
aiex extract -s paper -f research.pdf
|
|
80
|
+
aiex extract -s paper -f research.pdf # save result to .aiex/extracted/
|
|
81
|
+
aiex extract -s paper -f research.pdf --db # also insert into SQLite database
|
|
78
82
|
```
|
|
79
|
-
|
|
83
|
+
Saves the extracted result to `.aiex/extracted/<schema-name>-<timestamp>.json` with fields like `title`, `firstAuthor`, `journal`, `year` — exactly as defined in your schema. Add `--db` to also insert the data directly into the SQLite database.
|
|
80
84
|
|
|
81
85
|
<br>
|
|
82
86
|
|
|
@@ -85,10 +89,11 @@ Creates `output.json` with fields like `title`, `firstAuthor`, `journal`, `year`
|
|
|
85
89
|
| Command | Description |
|
|
86
90
|
| --- | --- |
|
|
87
91
|
| `aiex schema --init` | Scaffold `.aiex/` directory with example schemas |
|
|
88
|
-
| `aiex schema
|
|
92
|
+
| `aiex schema` | Parse JSON Schema files and migrate to SQLite |
|
|
89
93
|
| `aiex schema --generate` | Generate Drizzle schema code only (skip migration) |
|
|
90
94
|
| `aiex web` | Launch visual schema editor in browser |
|
|
91
|
-
| `aiex extract -s <name>` | Extract structured data from documents via AI |
|
|
95
|
+
| `aiex extract -s <name> -f <file>` | Extract structured data from documents via AI |
|
|
96
|
+
| `aiex extract -s <name> -f <file> --db` | Extract and insert into SQLite database |
|
|
92
97
|
| `aiex doctor` | System and configuration diagnostics |
|
|
93
98
|
|
|
94
99
|
<br>
|
package/dist/cli.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { C as
|
|
1
|
+
import { C as doctorDiagnosticsTableRows, a as writeAIConfig, b as toSnakeCase, c as PLACEHOLDER_TEXT, d as seedConfig, f as description$1, g as createMigrationConfig, h as version, i as readAIConfig, l as AIConfigSchema, m as package_default, n as getDefaultAIConfig, o as DEFAULT_PROMPT_CONFIG, p as name, r as maskApiKey, s as PLACEHOLDER_SCHEMA, t as collectDoctorDiagnostics, u as createConfig, v as JsonSchemaDefinitionSchema, w as formatDoctorDiagnosticsJson, x as generateDrizzleSchema, y as parseJsonSchema } from "./doctor-BhNd6jt6.mjs";
|
|
2
2
|
import { createRequire } from "node:module";
|
|
3
3
|
import path from "node:path";
|
|
4
4
|
import process from "node:process";
|
|
@@ -10,6 +10,7 @@ import updateNotifier from "update-notifier";
|
|
|
10
10
|
import CliTable3 from "cli-table3";
|
|
11
11
|
import { consola } from "consola";
|
|
12
12
|
import { intro, outro, spinner } from "@clack/prompts";
|
|
13
|
+
import Database from "better-sqlite3";
|
|
13
14
|
import pc from "picocolors";
|
|
14
15
|
import { createOpenAICompatible } from "@ai-sdk/openai-compatible";
|
|
15
16
|
import { Output, generateText, jsonSchema } from "ai";
|
|
@@ -9174,6 +9175,143 @@ async function extractStructuredData(input) {
|
|
|
9174
9175
|
}
|
|
9175
9176
|
}
|
|
9176
9177
|
|
|
9178
|
+
//#endregion
|
|
9179
|
+
//#region src/core/ai-extraction/inserter.ts
|
|
9180
|
+
const DRIZZLE_MODE_RE = /mode:\s*'(\w+)'/;
|
|
9181
|
+
function extractDrizzleMode(column) {
|
|
9182
|
+
return column.drizzleType.match(DRIZZLE_MODE_RE)?.[1];
|
|
9183
|
+
}
|
|
9184
|
+
function convertValue(value, column) {
|
|
9185
|
+
if (value === null || value === void 0) return null;
|
|
9186
|
+
const mode = extractDrizzleMode(column);
|
|
9187
|
+
if (mode === "json") return typeof value === "string" ? value : JSON.stringify(value);
|
|
9188
|
+
if (mode === "boolean") return value ? 1 : 0;
|
|
9189
|
+
if (mode === "timestamp" || mode === "timestamp_ms") {
|
|
9190
|
+
if (typeof value === "string") {
|
|
9191
|
+
const ms = Date.parse(value);
|
|
9192
|
+
if (Number.isNaN(ms)) return value;
|
|
9193
|
+
return mode === "timestamp_ms" ? ms : Math.floor(ms / 1e3);
|
|
9194
|
+
}
|
|
9195
|
+
return value;
|
|
9196
|
+
}
|
|
9197
|
+
return value;
|
|
9198
|
+
}
|
|
9199
|
+
function buildInsertSql(table, data) {
|
|
9200
|
+
const columns = [];
|
|
9201
|
+
const values = [];
|
|
9202
|
+
for (const col of table.columns) {
|
|
9203
|
+
if (col.isAutoIncrement) continue;
|
|
9204
|
+
const value = data[col.name];
|
|
9205
|
+
if (value === void 0) {
|
|
9206
|
+
if (col.defaultValue !== void 0) {
|
|
9207
|
+
columns.push(col.name);
|
|
9208
|
+
values.push(convertValue(JSON.parse(col.defaultValue), col));
|
|
9209
|
+
}
|
|
9210
|
+
continue;
|
|
9211
|
+
}
|
|
9212
|
+
columns.push(col.name);
|
|
9213
|
+
values.push(convertValue(value, col));
|
|
9214
|
+
}
|
|
9215
|
+
const placeholders = values.map(() => "?").join(", ");
|
|
9216
|
+
return {
|
|
9217
|
+
sql: `INSERT INTO ${table.name} (${columns.join(", ")}) VALUES (${placeholders})`,
|
|
9218
|
+
values
|
|
9219
|
+
};
|
|
9220
|
+
}
|
|
9221
|
+
function insertTableRow({ db, table, data, parentRowId, foreignKeyColumn }) {
|
|
9222
|
+
const rowData = { ...data };
|
|
9223
|
+
if (parentRowId !== void 0 && foreignKeyColumn) rowData[foreignKeyColumn] = parentRowId;
|
|
9224
|
+
const { sql, values } = buildInsertSql(table, rowData);
|
|
9225
|
+
const info = db.prepare(sql).run(...values);
|
|
9226
|
+
return Number(info.lastInsertRowid);
|
|
9227
|
+
}
|
|
9228
|
+
function parseDataByColumns(data, schema, table) {
|
|
9229
|
+
const result = {};
|
|
9230
|
+
if ("properties" in schema) {
|
|
9231
|
+
const s = schema;
|
|
9232
|
+
for (const [propName, prop] of Object.entries(s.properties)) {
|
|
9233
|
+
if (prop.nested?.enabled) continue;
|
|
9234
|
+
if (prop.type === "array" && prop.items?.nested?.enabled) continue;
|
|
9235
|
+
const colName = toSnakeCase(propName);
|
|
9236
|
+
if (table.columns.some((c) => c.name === colName && c.isAutoIncrement)) continue;
|
|
9237
|
+
if (propName in data) result[colName] = data[propName];
|
|
9238
|
+
}
|
|
9239
|
+
}
|
|
9240
|
+
if (schema.table?.timestamps) {
|
|
9241
|
+
if (!("created_at" in result)) result.created_at = Math.floor(Date.now() / 1e3);
|
|
9242
|
+
if (!("updated_at" in result)) result.updated_at = Math.floor(Date.now() / 1e3);
|
|
9243
|
+
}
|
|
9244
|
+
return result;
|
|
9245
|
+
}
|
|
9246
|
+
function insertExtractedData(db, schema, data) {
|
|
9247
|
+
const inserted = [];
|
|
9248
|
+
try {
|
|
9249
|
+
const parseResult = parseJsonSchema(schema);
|
|
9250
|
+
const mainTable = parseResult.tables[0];
|
|
9251
|
+
db.transaction(() => {
|
|
9252
|
+
const mainRowId = insertTableRow({
|
|
9253
|
+
db,
|
|
9254
|
+
table: mainTable,
|
|
9255
|
+
data: parseDataByColumns(data, schema, mainTable)
|
|
9256
|
+
});
|
|
9257
|
+
inserted.push({
|
|
9258
|
+
table: mainTable.name,
|
|
9259
|
+
rowId: mainRowId
|
|
9260
|
+
});
|
|
9261
|
+
for (const revRel of parseResult.reverseRelations) {
|
|
9262
|
+
const rel = parseResult.relations.find((r) => r.fromTable === revRel.toTable && r.toTable === revRel.fromTable);
|
|
9263
|
+
if (!rel) continue;
|
|
9264
|
+
const propEntry = Object.entries(schema.properties).find(([key]) => toSnakeCase(key) === revRel.name && key in data);
|
|
9265
|
+
if (!propEntry) continue;
|
|
9266
|
+
const [propName] = propEntry;
|
|
9267
|
+
const nestedValue = data[propName];
|
|
9268
|
+
if (nestedValue === null || nestedValue === void 0) continue;
|
|
9269
|
+
const nestedTable = parseResult.tables.find((t) => t.name === revRel.toTable);
|
|
9270
|
+
if (!nestedTable) continue;
|
|
9271
|
+
if (revRel.type === "has-one") {
|
|
9272
|
+
const rowId = insertTableRow({
|
|
9273
|
+
db,
|
|
9274
|
+
table: nestedTable,
|
|
9275
|
+
data: parseDataByColumns(nestedValue, schema.properties[propName], nestedTable),
|
|
9276
|
+
parentRowId: mainRowId,
|
|
9277
|
+
foreignKeyColumn: rel.fromColumn
|
|
9278
|
+
});
|
|
9279
|
+
inserted.push({
|
|
9280
|
+
table: revRel.toTable,
|
|
9281
|
+
rowId
|
|
9282
|
+
});
|
|
9283
|
+
} else if (revRel.type === "has-many") {
|
|
9284
|
+
const items = nestedValue;
|
|
9285
|
+
for (const item of items) {
|
|
9286
|
+
const rowId = insertTableRow({
|
|
9287
|
+
db,
|
|
9288
|
+
table: nestedTable,
|
|
9289
|
+
data: parseDataByColumns(item, schema.properties[propName].items, nestedTable),
|
|
9290
|
+
parentRowId: mainRowId,
|
|
9291
|
+
foreignKeyColumn: rel.fromColumn
|
|
9292
|
+
});
|
|
9293
|
+
inserted.push({
|
|
9294
|
+
table: revRel.toTable,
|
|
9295
|
+
rowId
|
|
9296
|
+
});
|
|
9297
|
+
}
|
|
9298
|
+
}
|
|
9299
|
+
}
|
|
9300
|
+
return mainRowId;
|
|
9301
|
+
})();
|
|
9302
|
+
return {
|
|
9303
|
+
success: true,
|
|
9304
|
+
tablesInserted: inserted
|
|
9305
|
+
};
|
|
9306
|
+
} catch (e) {
|
|
9307
|
+
return {
|
|
9308
|
+
success: false,
|
|
9309
|
+
tablesInserted: inserted,
|
|
9310
|
+
error: e instanceof Error ? e.message : String(e)
|
|
9311
|
+
};
|
|
9312
|
+
}
|
|
9313
|
+
}
|
|
9314
|
+
|
|
9177
9315
|
//#endregion
|
|
9178
9316
|
//#region src/core/ai-extraction/snapshot.ts
|
|
9179
9317
|
async function savePromptSnapshot(schema, aiexDir) {
|
|
@@ -9197,6 +9335,25 @@ const IMAGE_EXTENSIONS = new Set([
|
|
|
9197
9335
|
"bmp",
|
|
9198
9336
|
"svg"
|
|
9199
9337
|
]);
|
|
9338
|
+
async function ensureDatabaseReady(dbPath, schema) {
|
|
9339
|
+
try {
|
|
9340
|
+
await fs.access(dbPath);
|
|
9341
|
+
} catch {
|
|
9342
|
+
return `Database not found at ${pc.cyan(".aiex/database.db")}. Run ${pc.cyan("aiex schema")} first to create the database.`;
|
|
9343
|
+
}
|
|
9344
|
+
try {
|
|
9345
|
+
const result = parseJsonSchema(schema);
|
|
9346
|
+
const db = new Database(dbPath);
|
|
9347
|
+
try {
|
|
9348
|
+
for (const table of result.tables) if (!db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name=?`).get(table.name)) return `Table "${table.name}" not found in database. Run ${pc.cyan("aiex schema")} first to create tables.`;
|
|
9349
|
+
} finally {
|
|
9350
|
+
db.close();
|
|
9351
|
+
}
|
|
9352
|
+
} catch (e) {
|
|
9353
|
+
return `Cannot verify database: ${e instanceof Error ? e.message : String(e)}`;
|
|
9354
|
+
}
|
|
9355
|
+
return null;
|
|
9356
|
+
}
|
|
9200
9357
|
const extractCommand = defineCommand({
|
|
9201
9358
|
meta: {
|
|
9202
9359
|
name: "extract",
|
|
@@ -9218,6 +9375,12 @@ const extractCommand = defineCommand({
|
|
|
9218
9375
|
type: "string",
|
|
9219
9376
|
alias: "f",
|
|
9220
9377
|
description: "File path (text/image/PDF) to extract from"
|
|
9378
|
+
},
|
|
9379
|
+
db: {
|
|
9380
|
+
type: "boolean",
|
|
9381
|
+
alias: "d",
|
|
9382
|
+
description: "Insert extracted data into SQLite database",
|
|
9383
|
+
default: false
|
|
9221
9384
|
}
|
|
9222
9385
|
},
|
|
9223
9386
|
async run({ args }) {
|
|
@@ -9302,6 +9465,37 @@ const extractCommand = defineCommand({
|
|
|
9302
9465
|
s.stop("Extraction complete");
|
|
9303
9466
|
if (result.outputPath) consola.success(`Result saved: ${pc.cyan(result.outputPath)}`);
|
|
9304
9467
|
if (result.tokensUsed) consola.info(pc.gray(`Token usage: prompt=${result.tokensUsed.prompt}, completion=${result.tokensUsed.completion}, total=${result.tokensUsed.total}`));
|
|
9468
|
+
if (args.db && result.data) {
|
|
9469
|
+
const s2 = spinner();
|
|
9470
|
+
s2.start("Inserting into database...");
|
|
9471
|
+
const dbError = await ensureDatabaseReady(config.databasePath, schema);
|
|
9472
|
+
if (dbError) {
|
|
9473
|
+
s2.stop("Database not ready");
|
|
9474
|
+
consola.error(dbError);
|
|
9475
|
+
outro("Failed!");
|
|
9476
|
+
return;
|
|
9477
|
+
}
|
|
9478
|
+
try {
|
|
9479
|
+
const db = new Database(config.databasePath);
|
|
9480
|
+
try {
|
|
9481
|
+
const insertResult = insertExtractedData(db, schema, result.data);
|
|
9482
|
+
if (insertResult.success) s2.stop(`Inserted into ${insertResult.tablesInserted.length} table(s)`);
|
|
9483
|
+
else {
|
|
9484
|
+
s2.stop("Database insert failed");
|
|
9485
|
+
consola.error(insertResult.error || "Unknown error");
|
|
9486
|
+
outro("Failed!");
|
|
9487
|
+
return;
|
|
9488
|
+
}
|
|
9489
|
+
} finally {
|
|
9490
|
+
db.close();
|
|
9491
|
+
}
|
|
9492
|
+
} catch (e) {
|
|
9493
|
+
s2.stop("Database insert failed");
|
|
9494
|
+
consola.error(e instanceof Error ? e.message : String(e));
|
|
9495
|
+
outro("Failed!");
|
|
9496
|
+
return;
|
|
9497
|
+
}
|
|
9498
|
+
}
|
|
9305
9499
|
outro("Done!");
|
|
9306
9500
|
}
|
|
9307
9501
|
});
|
|
@@ -411,7 +411,7 @@ function generateDrizzleConfig() {
|
|
|
411
411
|
//#endregion
|
|
412
412
|
//#region package.json
|
|
413
413
|
var name = "aiex-cli";
|
|
414
|
-
var version = "0.0.1-beta.
|
|
414
|
+
var version = "0.0.1-beta.3";
|
|
415
415
|
var description = "JSON Schema → SQLite with AI-powered data extraction";
|
|
416
416
|
var package_default = {
|
|
417
417
|
name,
|
|
@@ -708,4 +708,4 @@ async function collectDoctorDiagnostics(options = {}) {
|
|
|
708
708
|
}
|
|
709
709
|
|
|
710
710
|
//#endregion
|
|
711
|
-
export {
|
|
711
|
+
export { doctorDiagnosticsTableRows as C, buildDoctorDiagnostics as S, generateDrizzleConfig as _, writeAIConfig as a, toSnakeCase as b, PLACEHOLDER_TEXT as c, seedConfig as d, description as f, createMigrationConfig as g, version as h, readAIConfig as i, AIConfigSchema as l, package_default as m, getDefaultAIConfig as n, DEFAULT_PROMPT_CONFIG as o, name as p, maskApiKey as r, PLACEHOLDER_SCHEMA as s, collectDoctorDiagnostics as t, createConfig as u, JsonSchemaDefinitionSchema as v, formatDoctorDiagnosticsJson as w, generateDrizzleSchema as x, parseJsonSchema as y };
|
package/dist/index.mjs
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import { C as
|
|
1
|
+
import { C as doctorDiagnosticsTableRows, S as buildDoctorDiagnostics, _ as generateDrizzleConfig, g as createMigrationConfig, t as collectDoctorDiagnostics, v as JsonSchemaDefinitionSchema, w as formatDoctorDiagnosticsJson, x as generateDrizzleSchema, y as parseJsonSchema } from "./doctor-BhNd6jt6.mjs";
|
|
2
2
|
|
|
3
3
|
export { JsonSchemaDefinitionSchema, buildDoctorDiagnostics, collectDoctorDiagnostics, createMigrationConfig, doctorDiagnosticsTableRows, formatDoctorDiagnosticsJson, generateDrizzleConfig, generateDrizzleSchema, parseJsonSchema };
|
package/dist/web/index.html
CHANGED
|
@@ -3,7 +3,8 @@
|
|
|
3
3
|
<head>
|
|
4
4
|
<meta charset="UTF-8" />
|
|
5
5
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
6
|
-
<title>
|
|
6
|
+
<title>AIEX Schema Editor</title>
|
|
7
|
+
<link rel="icon" href="/logo.png" type="image/png" />
|
|
7
8
|
<script>
|
|
8
9
|
;(function () {
|
|
9
10
|
var key = "jscb-color-scheme"
|
|
Binary file
|