aiex-cli 0.0.2-beta.9 → 0.0.3-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { S as version, T as formatDoctorDiagnosticsJson, _ as createConfig, a as parseJsonSchema, b as name, c as getDefaultAIConfig, d as DEFAULT_MARKITDOWN_CONFIG, f as DEFAULT_MINERU_CONFIG, g as AIConfigSchema, h as PLACEHOLDER_TEXT, i as JsonSchemaDefinitionSchema, l as readAIConfig, m as PLACEHOLDER_SCHEMA, n as createMigrationConfig, o as toSnakeCase, p as DEFAULT_PROMPT_CONFIG, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as writeAIConfig, v as seedConfig, w as doctorDiagnosticsTableRows, x as package_default, y as description } from "./doctor-collector-Cb9X2mxU.mjs";
1
+ import { S as version, T as formatDoctorDiagnosticsJson, _ as createConfig, a as parseJsonSchema, b as name, c as getDefaultAIConfig, d as DEFAULT_MARKITDOWN_CONFIG, f as DEFAULT_MINERU_CONFIG, g as AIConfigSchema, h as PLACEHOLDER_TEXT, i as JsonSchemaDefinitionSchema, l as readAIConfig, m as PLACEHOLDER_SCHEMA, n as createMigrationConfig, o as toSnakeCase, p as DEFAULT_PROMPT_CONFIG, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as writeAIConfig, v as seedConfig, w as doctorDiagnosticsTableRows, x as package_default, y as description } from "./doctor-collector-CjFTz8p4.mjs";
2
2
  import { createRequire } from "node:module";
3
3
  import fs from "node:fs/promises";
4
4
  import os from "node:os";
@@ -13388,6 +13388,8 @@ async function savePromptSnapshot(schema, aiexDir) {
13388
13388
  //#region src/core/extraction-audit.ts
13389
13389
  const AUDIT_ID_RE = /^[\w.-]+$/;
13390
13390
  const STALE_AFTER_MS = 1800 * 1e3;
13391
+ const CACHE_TTL_MS = 5e3;
13392
+ const recordCache = /* @__PURE__ */ new Map();
13391
13393
  function auditDir(aiexDir) {
13392
13394
  return path.join(aiexDir, "extracted", "_audit");
13393
13395
  }
@@ -13414,6 +13416,7 @@ async function createExtractionAuditRecord(aiexDir, input) {
13414
13416
  spaces: 2,
13415
13417
  EOL: "\n"
13416
13418
  });
13419
+ clearRecordCache(aiexDir);
13417
13420
  return record;
13418
13421
  }
13419
13422
  async function updateExtractionAuditRecord(aiexDir, id, patch) {
@@ -13430,6 +13433,7 @@ async function updateExtractionAuditRecord(aiexDir, id, patch) {
13430
13433
  spaces: 2,
13431
13434
  EOL: "\n"
13432
13435
  });
13436
+ clearRecordCache(aiexDir);
13433
13437
  return record;
13434
13438
  }
13435
13439
  async function readExtractionAuditRecord(aiexDir, id) {
@@ -13457,19 +13461,38 @@ async function markStaleIfNeeded(aiexDir, record) {
13457
13461
  spaces: 2,
13458
13462
  EOL: "\n"
13459
13463
  });
13464
+ clearRecordCache(aiexDir);
13460
13465
  return staleRecord;
13461
13466
  }
13467
+ function getCachedRecords(aiexDir) {
13468
+ const entry = recordCache.get(aiexDir);
13469
+ if (entry && Date.now() - entry.timestamp < CACHE_TTL_MS) return entry.records;
13470
+ return null;
13471
+ }
13472
+ function setCachedRecords(aiexDir, records) {
13473
+ recordCache.set(aiexDir, {
13474
+ records,
13475
+ timestamp: Date.now()
13476
+ });
13477
+ }
13478
+ function clearRecordCache(aiexDir) {
13479
+ recordCache.delete(aiexDir);
13480
+ }
13462
13481
  async function listExtractionAuditRecords(aiexDir) {
13482
+ const cached = getCachedRecords(aiexDir);
13483
+ if (cached) return cached;
13463
13484
  try {
13464
13485
  const dir = auditDir(aiexDir);
13465
13486
  const files = await fs.readdir(dir);
13466
- return (await Promise.all(files.filter((file) => file.endsWith(".json")).map(async (file) => {
13487
+ const result = (await Promise.all(files.filter((file) => file.endsWith(".json")).map(async (file) => {
13467
13488
  try {
13468
13489
  return await markStaleIfNeeded(aiexDir, await readFile(path.join(dir, file)));
13469
13490
  } catch {
13470
13491
  return null;
13471
13492
  }
13472
13493
  }))).filter((record) => !!record).sort((a, b) => b.createdAt.localeCompare(a.createdAt));
13494
+ setCachedRecords(aiexDir, result);
13495
+ return result;
13473
13496
  } catch {
13474
13497
  return [];
13475
13498
  }
@@ -13486,6 +13509,7 @@ async function deleteExtractionAuditRecord(aiexDir, id) {
13486
13509
  const uploadFiles = await fs.readdir(uploadsDir).catch(() => []);
13487
13510
  await Promise.all(uploadFiles.filter((file) => file.startsWith(`${id}-`)).map((file) => fs.unlink(path.join(uploadsDir, file)).catch(() => {})));
13488
13511
  await fs.unlink(auditPath(aiexDir, id)).catch(() => {});
13512
+ clearRecordCache(aiexDir);
13489
13513
  return true;
13490
13514
  }
13491
13515
 
@@ -13632,34 +13656,34 @@ function suggestFieldMap(schemaFields, databaseProperties) {
13632
13656
  }
13633
13657
  return fieldMap;
13634
13658
  }
13635
- function hasProperties(value) {
13636
- return !!value && typeof value === "object" && !!value.properties && typeof value.properties === "object";
13659
+ function isDataSourceResponse(value) {
13660
+ return !!value && typeof value === "object" && typeof value.properties === "object" && !Array.isArray(value);
13637
13661
  }
13638
13662
  function firstDataSourceId(database) {
13639
- return (Array.isArray(database?.data_sources) ? database.data_sources : []).find((source) => typeof source?.id === "string" && source.id.trim())?.id;
13663
+ return database.data_sources?.find((source) => typeof source.id === "string" && source.id.trim())?.id;
13640
13664
  }
13641
13665
  async function resolveNotionDataSource(notion, inputId) {
13642
13666
  const id = parseNotionDatabaseId(inputId);
13643
13667
  if (!id) throw new Error("Notion database or data source URL/ID is required.");
13644
13668
  try {
13645
13669
  const dataSource$1 = await notion.dataSources.retrieve({ data_source_id: id });
13646
- if (hasProperties(dataSource$1)) return {
13670
+ if (isDataSourceResponse(dataSource$1)) return {
13647
13671
  databaseId: typeof dataSource$1.parent?.database_id === "string" ? dataSource$1.parent.database_id : id,
13648
- dataSourceId: dataSource$1.id ?? id,
13672
+ dataSourceId: dataSource$1.id,
13649
13673
  properties: dataSource$1.properties,
13650
- parent: { data_source_id: dataSource$1.id ?? id }
13674
+ parent: { data_source_id: dataSource$1.id }
13651
13675
  };
13652
13676
  } catch {}
13653
13677
  const database = await notion.databases.retrieve({ database_id: id });
13654
13678
  const dataSourceId = firstDataSourceId(database);
13655
13679
  if (!dataSourceId) throw new Error("No data source found for this Notion database. Copy the data source link from Notion, or share the source database with the integration.");
13656
13680
  const dataSource = await notion.dataSources.retrieve({ data_source_id: dataSourceId });
13657
- if (!hasProperties(dataSource)) throw new Error("Notion data source did not return properties. Make sure it is shared with the integration and is not a linked data source.");
13681
+ if (!isDataSourceResponse(dataSource)) throw new Error("Notion data source did not return properties. Make sure it is shared with the integration and is not a linked data source.");
13658
13682
  return {
13659
- databaseId: database.id ?? id,
13660
- dataSourceId: dataSource.id ?? dataSourceId,
13683
+ databaseId: database.id,
13684
+ dataSourceId: dataSource.id,
13661
13685
  properties: dataSource.properties,
13662
- parent: { data_source_id: dataSource.id ?? dataSourceId }
13686
+ parent: { data_source_id: dataSource.id }
13663
13687
  };
13664
13688
  }
13665
13689
  async function inspectNotionDatabase(input) {
@@ -13987,7 +14011,12 @@ async function listSchemas(aiexDir) {
13987
14011
  return [];
13988
14012
  }
13989
14013
  }
14014
+ function getFileSizeMB(filePath) {
14015
+ return fs$1.statSync(filePath).size / (1024 * 1024);
14016
+ }
13990
14017
  async function readExtractFileInput(filePath, aiConfig) {
14018
+ const sizeMB = getFileSizeMB(filePath);
14019
+ if (sizeMB > 150) throw new Error(`File size (${sizeMB.toFixed(1)}MB) exceeds 150MB limit: ${filePath}`);
13991
14020
  const ext = path.extname(filePath).toLowerCase().replace(".", "");
13992
14021
  if (FILE_PART_EXTENSIONS.has(ext)) return {
13993
14022
  text: "",
@@ -14964,6 +14993,7 @@ function aiRoutes(config) {
14964
14993
  //#region src/server/routes/data.ts
14965
14994
  const FILE_REGEX = /\.json$/;
14966
14995
  const EXTRACTION_TIMESTAMP_RE = /-\d{4}-\d{2}-\d{2}T/;
14996
+ const INTERNAL_ROWID_COLUMN = "__aiex_rowid";
14967
14997
  const TIMESTAMP_CLEANUP = /(\d{2})-(\d{2})-(\d{2})/;
14968
14998
  const TIMESTAMP_TZ = /(\d{3})Z/;
14969
14999
  const tableParamSchema = z.object({ name: z.string().regex(/^[a-z][a-z0-9_]*$/) });
@@ -14980,6 +15010,31 @@ function invalidParamResponse$1(message) {
14980
15010
  if (!result.success) return c.json({ error: message }, 400);
14981
15011
  };
14982
15012
  }
15013
+ function getAuditNotionStatus(record) {
15014
+ if (record.notionPages?.length) return "synced";
15015
+ if (record.status === "failed") return "failed";
15016
+ return "not_synced";
15017
+ }
15018
+ async function getRowExtractionActions(aiexDir, tableName) {
15019
+ const actions = /* @__PURE__ */ new Map();
15020
+ const auditRecords = await listExtractionAuditRecords(aiexDir);
15021
+ for (const record of auditRecords) {
15022
+ if (!record.outputName) continue;
15023
+ for (const inserted of record.tablesInserted ?? []) {
15024
+ if (inserted.table !== tableName) continue;
15025
+ const key = String(inserted.rowId);
15026
+ if (actions.has(key)) continue;
15027
+ const notionPages = record.notionPages?.length ? record.notionPages : void 0;
15028
+ actions.set(key, {
15029
+ extractionName: record.outputName,
15030
+ notionStatus: getAuditNotionStatus(record),
15031
+ notionPages,
15032
+ notionError: !notionPages && record.status === "failed" ? record.error : void 0
15033
+ });
15034
+ }
15035
+ }
15036
+ return actions;
15037
+ }
14983
15038
  function schemaNameFromExtractionFile(name$1) {
14984
15039
  const stem = name$1.replace(FILE_REGEX, "");
14985
15040
  const match = stem.match(EXTRACTION_TIMESTAMP_RE);
@@ -14997,22 +15052,27 @@ function dataRoutes(config) {
14997
15052
  try {
14998
15053
  await fs.mkdir(extractedDir, { recursive: true });
14999
15054
  const jsonFiles = (await fs.readdir(extractedDir)).filter((f) => f.endsWith(".json") && !f.endsWith(".prompt.md"));
15055
+ const auditRecords = await listExtractionAuditRecords(aiexDir);
15056
+ const auditByOutputName = new Map(auditRecords.map((record) => [record.outputName, record]));
15000
15057
  const records = [];
15001
15058
  for (const file of jsonFiles) {
15002
- const name$1 = file.replace(FILE_REGEX, "");
15003
- const idx = name$1.lastIndexOf("-");
15004
- if (idx === -1) continue;
15005
- const schemaName = name$1.slice(0, idx);
15006
- const timestamp = name$1.slice(idx + 1).replace(/-/g, (d, i) => i === 4 || i === 7 ? "-" : d).replace(TIMESTAMP_CLEANUP, (_, h, m, s) => `${h}:${m}:${s}`).replace(TIMESTAMP_TZ, ".$1Z");
15059
+ const schemaName = schemaNameFromExtractionFile(file);
15060
+ if (!schemaName) continue;
15061
+ const timestamp = file.replace(FILE_REGEX, "").slice(schemaName.length + 1).replace(/-/g, (d, i) => i === 4 || i === 7 ? "-" : d).replace(TIMESTAMP_CLEANUP, (_, h, m, s) => `${h}:${m}:${s}`).replace(TIMESTAMP_TZ, ".$1Z");
15007
15062
  const filePath = path.join(extractedDir, file);
15008
15063
  try {
15009
15064
  const stat = await fs.stat(filePath);
15065
+ const audit = auditByOutputName.get(file);
15066
+ const notionPages = audit?.notionPages?.length ? audit.notionPages : void 0;
15010
15067
  records.push({
15011
15068
  name: file,
15012
15069
  schemaName,
15013
15070
  timestamp,
15014
15071
  fileSize: stat.size,
15015
- modifiedAt: stat.mtime.toISOString()
15072
+ modifiedAt: stat.mtime.toISOString(),
15073
+ notionStatus: notionPages ? "synced" : audit?.status === "failed" ? "failed" : "not_synced",
15074
+ notionPages,
15075
+ notionError: !notionPages && audit?.status === "failed" ? audit.error : void 0
15016
15076
  });
15017
15077
  } catch {
15018
15078
  continue;
@@ -15099,16 +15159,24 @@ function dataRoutes(config) {
15099
15159
  const offset = (page - 1) * pageSize;
15100
15160
  const totalPages = Math.max(1, Math.ceil(total / pageSize));
15101
15161
  const result = await sql`
15102
- select *
15162
+ select rowid as ${sql.raw(INTERNAL_ROWID_COLUMN)}, *
15103
15163
  from ${sql.table(tableName)}
15104
15164
  ${searchCondition}
15105
15165
  ${orderBy}
15106
15166
  limit ${pageSize}
15107
15167
  offset ${offset}
15108
15168
  `.execute(db);
15169
+ const actionsByRowId = await getRowExtractionActions(aiexDir, tableName);
15170
+ const rowActions = Object.fromEntries(result.rows.map((row, index) => {
15171
+ const rowId = row[INTERNAL_ROWID_COLUMN];
15172
+ const action = rowId === null || rowId === void 0 ? void 0 : actionsByRowId.get(String(rowId));
15173
+ return action ? [String(index), action] : null;
15174
+ }).filter((entry) => !!entry));
15175
+ const rows = result.rows.map(({ [INTERNAL_ROWID_COLUMN]: _rowid, ...row }) => row);
15109
15176
  return c.json({
15110
15177
  columns,
15111
- rows: result.rows,
15178
+ rows,
15179
+ rowActions,
15112
15180
  total,
15113
15181
  page,
15114
15182
  pageSize,
@@ -15162,9 +15230,19 @@ function dataRoutes(config) {
15162
15230
  databaseId: page.databaseId,
15163
15231
  pageId: page.pageId
15164
15232
  }];
15165
- const record = (await listExtractionAuditRecords(aiexDir)).find((record$1) => record$1.outputName === name$1);
15233
+ let record = (await listExtractionAuditRecords(aiexDir)).find((record$1) => record$1.outputName === name$1);
15234
+ if (!record) record = await createExtractionAuditRecord(aiexDir, {
15235
+ schemaName,
15236
+ source: {
15237
+ type: "file",
15238
+ filePath,
15239
+ fileName: name$1
15240
+ }
15241
+ });
15166
15242
  if (record) await updateExtractionAuditRecord(aiexDir, record.id, {
15167
15243
  status: "succeeded",
15244
+ outputPath: filePath,
15245
+ outputName: name$1,
15168
15246
  notionPages,
15169
15247
  error: void 0
15170
15248
  });
@@ -15173,15 +15251,79 @@ function dataRoutes(config) {
15173
15251
  notionPages
15174
15252
  });
15175
15253
  } catch (error) {
15254
+ const message = error instanceof Error ? error.message : String(error);
15255
+ const record = (await listExtractionAuditRecords(aiexDir)).find((record$1) => record$1.outputName === name$1);
15256
+ if (record) await updateExtractionAuditRecord(aiexDir, record.id, {
15257
+ status: "failed",
15258
+ outputPath: filePath,
15259
+ outputName: name$1,
15260
+ error: message
15261
+ });
15176
15262
  return c.json({
15177
15263
  success: false,
15178
- error: error instanceof Error ? error.message : String(error)
15264
+ error: message
15179
15265
  }, 500);
15180
15266
  }
15181
15267
  });
15182
15268
  return app;
15183
15269
  }
15184
15270
 
15271
+ //#endregion
15272
+ //#region src/core/file-constants.ts
15273
+ const MAX_UPLOAD_SIZE = 150 * 1024 * 1024;
15274
+ const MAX_UPLOAD_SIZE_TEXT = "150MB";
15275
+ const SUPPORTED_MIME_TYPES = new Set([
15276
+ "image/png",
15277
+ "image/jpeg",
15278
+ "image/gif",
15279
+ "image/webp",
15280
+ "image/bmp",
15281
+ "image/svg+xml",
15282
+ "application/pdf",
15283
+ "text/plain",
15284
+ "text/markdown",
15285
+ "text/csv",
15286
+ "application/json",
15287
+ "text/html",
15288
+ "text/xml",
15289
+ "application/x-yaml",
15290
+ "text/yaml"
15291
+ ]);
15292
+ const MIME_TO_EXT = {
15293
+ "image/png": "png",
15294
+ "image/jpeg": "jpg",
15295
+ "image/gif": "gif",
15296
+ "image/webp": "webp",
15297
+ "image/bmp": "bmp",
15298
+ "image/svg+xml": "svg",
15299
+ "application/pdf": "pdf",
15300
+ "text/plain": "txt",
15301
+ "text/markdown": "md",
15302
+ "text/csv": "csv",
15303
+ "application/json": "json",
15304
+ "text/html": "html",
15305
+ "text/xml": "xml",
15306
+ "application/x-yaml": "yaml",
15307
+ "text/yaml": "yaml"
15308
+ };
15309
+ function getExtensionFromMime(mimeType) {
15310
+ return MIME_TO_EXT[mimeType];
15311
+ }
15312
+ function isAllowedMimeType(mimeType) {
15313
+ return SUPPORTED_MIME_TYPES.has(mimeType);
15314
+ }
15315
+ var FileValidationError = class extends Error {
15316
+ constructor(message) {
15317
+ super(message);
15318
+ this.name = "FileValidationError";
15319
+ }
15320
+ };
15321
+ function validateFileUpload(file) {
15322
+ if (file.size === 0) throw new FileValidationError("Uploaded file is empty");
15323
+ if (file.size > MAX_UPLOAD_SIZE) throw new FileValidationError(`File size (${(file.size / 1024 / 1024).toFixed(1)}MB) exceeds ${MAX_UPLOAD_SIZE_TEXT} limit`);
15324
+ if (!isAllowedMimeType(file.type)) throw new FileValidationError(`Unsupported file type "${file.type}". Supported types: ${[...SUPPORTED_MIME_TYPES].join(", ")}`);
15325
+ }
15326
+
15185
15327
  //#endregion
15186
15328
  //#region src/server/routes/extract.ts
15187
15329
  function getFormString(value) {
@@ -15195,9 +15337,16 @@ function getFormFile(value) {
15195
15337
  function safeUploadName(name$1) {
15196
15338
  return path.basename(name$1).replace(/[^\w.-]/g, "_") || "upload.txt";
15197
15339
  }
15340
+ function safeUploadNameForMime(file) {
15341
+ const safeName = safeUploadName(file.name);
15342
+ const ext = getExtensionFromMime(file.type);
15343
+ if (!ext) throw new FileValidationError(`Unsupported file type "${file.type}"`);
15344
+ return `${path.parse(safeName).name || "upload"}.${ext}`;
15345
+ }
15198
15346
  async function saveUploadToFile(file, uploadsDir, id) {
15347
+ validateFileUpload(file);
15199
15348
  await fs.mkdir(uploadsDir, { recursive: true });
15200
- const filePath = path.join(uploadsDir, `${id}-${safeUploadName(file.name)}`);
15349
+ const filePath = path.join(uploadsDir, `${id}-${safeUploadNameForMime(file)}`);
15201
15350
  const buffer = Buffer.from(await file.arrayBuffer());
15202
15351
  await fs.writeFile(filePath, buffer);
15203
15352
  return filePath;
@@ -15365,11 +15514,25 @@ function extractRoutes(config) {
15365
15514
  });
15366
15515
  let filePath;
15367
15516
  if (file) {
15368
- filePath = await saveUploadToFile(file, uploadsDir, audit.id);
15517
+ try {
15518
+ filePath = await saveUploadToFile(file, uploadsDir, audit.id);
15519
+ } catch (e) {
15520
+ if (e instanceof FileValidationError) {
15521
+ await updateExtractionAuditRecord(aiexDir, audit.id, {
15522
+ status: "failed",
15523
+ error: e.message
15524
+ });
15525
+ return c.json({
15526
+ success: false,
15527
+ error: e.message
15528
+ }, 400);
15529
+ }
15530
+ throw e;
15531
+ }
15369
15532
  await updateExtractionAuditRecord(aiexDir, audit.id, { source: {
15370
15533
  type: "file",
15371
15534
  filePath,
15372
- fileName: safeUploadName(file.name)
15535
+ fileName: path.basename(filePath)
15373
15536
  } });
15374
15537
  }
15375
15538
  return executeAuditedExtraction({
@@ -65,7 +65,7 @@ function doctorDiagnosticsTableRows(d) {
65
65
  //#endregion
66
66
  //#region package.json
67
67
  var name = "aiex-cli";
68
- var version = "0.0.2-beta.9";
68
+ var version = "0.0.3-beta.1";
69
69
  var description = "JSON Schema → SQLite with AI-powered data extraction";
70
70
  var package_default = {
71
71
  name,
package/dist/index.mjs CHANGED
@@ -1,3 +1,3 @@
1
- import { C as buildDoctorDiagnostics, T as formatDoctorDiagnosticsJson, a as parseJsonSchema, i as JsonSchemaDefinitionSchema, n as createMigrationConfig, r as generateDrizzleConfig, s as generateDrizzleSchema, t as collectDoctorDiagnostics, w as doctorDiagnosticsTableRows } from "./doctor-collector-Cb9X2mxU.mjs";
1
+ import { C as buildDoctorDiagnostics, T as formatDoctorDiagnosticsJson, a as parseJsonSchema, i as JsonSchemaDefinitionSchema, n as createMigrationConfig, r as generateDrizzleConfig, s as generateDrizzleSchema, t as collectDoctorDiagnostics, w as doctorDiagnosticsTableRows } from "./doctor-collector-CjFTz8p4.mjs";
2
2
 
3
3
  export { JsonSchemaDefinitionSchema, buildDoctorDiagnostics, collectDoctorDiagnostics, createMigrationConfig, doctorDiagnosticsTableRows, formatDoctorDiagnosticsJson, generateDrizzleConfig, generateDrizzleSchema, parseJsonSchema };