aiex-cli 0.0.2 → 0.0.3-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { S as version, T as formatDoctorDiagnosticsJson, _ as createConfig, a as parseJsonSchema, b as name, c as getDefaultAIConfig, d as DEFAULT_MARKITDOWN_CONFIG, f as DEFAULT_MINERU_CONFIG, g as AIConfigSchema, h as PLACEHOLDER_TEXT, i as JsonSchemaDefinitionSchema, l as readAIConfig, m as PLACEHOLDER_SCHEMA, n as createMigrationConfig, o as toSnakeCase, p as DEFAULT_PROMPT_CONFIG, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as writeAIConfig, v as seedConfig, w as doctorDiagnosticsTableRows, x as package_default, y as description } from "./doctor-collector-DR7s0UUh.mjs";
1
+ import { S as version, T as formatDoctorDiagnosticsJson, _ as createConfig, a as parseJsonSchema, b as name, c as getDefaultAIConfig, d as DEFAULT_MARKITDOWN_CONFIG, f as DEFAULT_MINERU_CONFIG, g as AIConfigSchema, h as PLACEHOLDER_TEXT, i as JsonSchemaDefinitionSchema, l as readAIConfig, m as PLACEHOLDER_SCHEMA, n as createMigrationConfig, o as toSnakeCase, p as DEFAULT_PROMPT_CONFIG, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as writeAIConfig, v as seedConfig, w as doctorDiagnosticsTableRows, x as package_default, y as description } from "./doctor-collector-Bnkbl48V.mjs";
2
2
  import { createRequire } from "node:module";
3
3
  import fs from "node:fs/promises";
4
4
  import os from "node:os";
@@ -13388,6 +13388,8 @@ async function savePromptSnapshot(schema, aiexDir) {
13388
13388
  //#region src/core/extraction-audit.ts
13389
13389
  const AUDIT_ID_RE = /^[\w.-]+$/;
13390
13390
  const STALE_AFTER_MS = 1800 * 1e3;
13391
+ const CACHE_TTL_MS = 5e3;
13392
+ const recordCache = /* @__PURE__ */ new Map();
13391
13393
  function auditDir(aiexDir) {
13392
13394
  return path.join(aiexDir, "extracted", "_audit");
13393
13395
  }
@@ -13414,6 +13416,7 @@ async function createExtractionAuditRecord(aiexDir, input) {
13414
13416
  spaces: 2,
13415
13417
  EOL: "\n"
13416
13418
  });
13419
+ clearRecordCache(aiexDir);
13417
13420
  return record;
13418
13421
  }
13419
13422
  async function updateExtractionAuditRecord(aiexDir, id, patch) {
@@ -13430,6 +13433,7 @@ async function updateExtractionAuditRecord(aiexDir, id, patch) {
13430
13433
  spaces: 2,
13431
13434
  EOL: "\n"
13432
13435
  });
13436
+ clearRecordCache(aiexDir);
13433
13437
  return record;
13434
13438
  }
13435
13439
  async function readExtractionAuditRecord(aiexDir, id) {
@@ -13457,19 +13461,38 @@ async function markStaleIfNeeded(aiexDir, record) {
13457
13461
  spaces: 2,
13458
13462
  EOL: "\n"
13459
13463
  });
13464
+ clearRecordCache(aiexDir);
13460
13465
  return staleRecord;
13461
13466
  }
13467
+ function getCachedRecords(aiexDir) {
13468
+ const entry = recordCache.get(aiexDir);
13469
+ if (entry && Date.now() - entry.timestamp < CACHE_TTL_MS) return entry.records;
13470
+ return null;
13471
+ }
13472
+ function setCachedRecords(aiexDir, records) {
13473
+ recordCache.set(aiexDir, {
13474
+ records,
13475
+ timestamp: Date.now()
13476
+ });
13477
+ }
13478
+ function clearRecordCache(aiexDir) {
13479
+ recordCache.delete(aiexDir);
13480
+ }
13462
13481
  async function listExtractionAuditRecords(aiexDir) {
13482
+ const cached = getCachedRecords(aiexDir);
13483
+ if (cached) return cached;
13463
13484
  try {
13464
13485
  const dir = auditDir(aiexDir);
13465
13486
  const files = await fs.readdir(dir);
13466
- return (await Promise.all(files.filter((file) => file.endsWith(".json")).map(async (file) => {
13487
+ const result = (await Promise.all(files.filter((file) => file.endsWith(".json")).map(async (file) => {
13467
13488
  try {
13468
13489
  return await markStaleIfNeeded(aiexDir, await readFile(path.join(dir, file)));
13469
13490
  } catch {
13470
13491
  return null;
13471
13492
  }
13472
13493
  }))).filter((record) => !!record).sort((a, b) => b.createdAt.localeCompare(a.createdAt));
13494
+ setCachedRecords(aiexDir, result);
13495
+ return result;
13473
13496
  } catch {
13474
13497
  return [];
13475
13498
  }
@@ -13486,9 +13509,77 @@ async function deleteExtractionAuditRecord(aiexDir, id) {
13486
13509
  const uploadFiles = await fs.readdir(uploadsDir).catch(() => []);
13487
13510
  await Promise.all(uploadFiles.filter((file) => file.startsWith(`${id}-`)).map((file) => fs.unlink(path.join(uploadsDir, file)).catch(() => {})));
13488
13511
  await fs.unlink(auditPath(aiexDir, id)).catch(() => {});
13512
+ clearRecordCache(aiexDir);
13489
13513
  return true;
13490
13514
  }
13491
13515
 
13516
+ //#endregion
13517
+ //#region src/core/file-constants.ts
13518
+ const MAX_UPLOAD_SIZE = 150 * 1024 * 1024;
13519
+ const MAX_UPLOAD_SIZE_TEXT = "150MB";
13520
+ const SUPPORTED_FILE_TYPES_TEXT = "images, PDF, text, markdown, CSV, JSON, HTML, XML, YAML";
13521
+ const MISSING_UPLOAD_FILE_TEXT = "Uploaded file is no longer available. Re-run extraction with the original file.";
13522
+ const SUPPORTED_MIME_TYPES = new Set([
13523
+ "image/png",
13524
+ "image/jpeg",
13525
+ "image/gif",
13526
+ "image/webp",
13527
+ "image/bmp",
13528
+ "image/svg+xml",
13529
+ "application/pdf",
13530
+ "text/plain",
13531
+ "text/markdown",
13532
+ "text/csv",
13533
+ "application/json",
13534
+ "text/html",
13535
+ "text/xml",
13536
+ "application/x-yaml",
13537
+ "text/yaml"
13538
+ ]);
13539
+ const MIME_TO_EXT = {
13540
+ "image/png": "png",
13541
+ "image/jpeg": "jpg",
13542
+ "image/gif": "gif",
13543
+ "image/webp": "webp",
13544
+ "image/bmp": "bmp",
13545
+ "image/svg+xml": "svg",
13546
+ "application/pdf": "pdf",
13547
+ "text/plain": "txt",
13548
+ "text/markdown": "md",
13549
+ "text/csv": "csv",
13550
+ "application/json": "json",
13551
+ "text/html": "html",
13552
+ "text/xml": "xml",
13553
+ "application/x-yaml": "yaml",
13554
+ "text/yaml": "yaml"
13555
+ };
13556
+ function bytesToMB(bytes) {
13557
+ return bytes / (1024 * 1024);
13558
+ }
13559
+ function getExtensionFromMime(mimeType) {
13560
+ return MIME_TO_EXT[mimeType];
13561
+ }
13562
+ function isAllowedMimeType(mimeType) {
13563
+ return SUPPORTED_MIME_TYPES.has(mimeType);
13564
+ }
13565
+ function unsupportedFileTypeMessage(mimeType) {
13566
+ return `Unsupported file type "${mimeType}". Supported: ${SUPPORTED_FILE_TYPES_TEXT}.`;
13567
+ }
13568
+ function isMissingUploadFileError(error) {
13569
+ return !!error && typeof error === "object" && error.code === "ENOENT";
13570
+ }
13571
+ var FileValidationError = class extends Error {
13572
+ constructor(message) {
13573
+ super(message);
13574
+ this.name = "FileValidationError";
13575
+ }
13576
+ };
13577
+ function validateFileUpload(file) {
13578
+ if (file.size === 0) throw new FileValidationError("Uploaded file is empty");
13579
+ if (file.size > MAX_UPLOAD_SIZE) throw new FileValidationError(`File size (${bytesToMB(file.size).toFixed(1)}MB) exceeds ${MAX_UPLOAD_SIZE_TEXT} limit`);
13580
+ if (!isAllowedMimeType(file.type)) throw new FileValidationError(unsupportedFileTypeMessage(file.type));
13581
+ }
13582
+
13492
13583
  //#endregion
13493
13584
  //#region src/core/notion-sink.ts
13494
13585
  const RICH_TEXT_LIMIT = 2e3;
@@ -13632,34 +13723,34 @@ function suggestFieldMap(schemaFields, databaseProperties) {
13632
13723
  }
13633
13724
  return fieldMap;
13634
13725
  }
13635
- function hasProperties(value) {
13636
- return !!value && typeof value === "object" && !!value.properties && typeof value.properties === "object";
13726
+ function isDataSourceResponse(value) {
13727
+ return !!value && typeof value === "object" && typeof value.properties === "object" && !Array.isArray(value);
13637
13728
  }
13638
13729
  function firstDataSourceId(database) {
13639
- return (Array.isArray(database?.data_sources) ? database.data_sources : []).find((source) => typeof source?.id === "string" && source.id.trim())?.id;
13730
+ return database.data_sources?.find((source) => typeof source.id === "string" && source.id.trim())?.id;
13640
13731
  }
13641
13732
  async function resolveNotionDataSource(notion, inputId) {
13642
13733
  const id = parseNotionDatabaseId(inputId);
13643
13734
  if (!id) throw new Error("Notion database or data source URL/ID is required.");
13644
13735
  try {
13645
13736
  const dataSource$1 = await notion.dataSources.retrieve({ data_source_id: id });
13646
- if (hasProperties(dataSource$1)) return {
13737
+ if (isDataSourceResponse(dataSource$1)) return {
13647
13738
  databaseId: typeof dataSource$1.parent?.database_id === "string" ? dataSource$1.parent.database_id : id,
13648
- dataSourceId: dataSource$1.id ?? id,
13739
+ dataSourceId: dataSource$1.id,
13649
13740
  properties: dataSource$1.properties,
13650
- parent: { data_source_id: dataSource$1.id ?? id }
13741
+ parent: { data_source_id: dataSource$1.id }
13651
13742
  };
13652
13743
  } catch {}
13653
13744
  const database = await notion.databases.retrieve({ database_id: id });
13654
13745
  const dataSourceId = firstDataSourceId(database);
13655
13746
  if (!dataSourceId) throw new Error("No data source found for this Notion database. Copy the data source link from Notion, or share the source database with the integration.");
13656
13747
  const dataSource = await notion.dataSources.retrieve({ data_source_id: dataSourceId });
13657
- if (!hasProperties(dataSource)) throw new Error("Notion data source did not return properties. Make sure it is shared with the integration and is not a linked data source.");
13748
+ if (!isDataSourceResponse(dataSource)) throw new Error("Notion data source did not return properties. Make sure it is shared with the integration and is not a linked data source.");
13658
13749
  return {
13659
- databaseId: database.id ?? id,
13660
- dataSourceId: dataSource.id ?? dataSourceId,
13750
+ databaseId: database.id,
13751
+ dataSourceId: dataSource.id,
13661
13752
  properties: dataSource.properties,
13662
- parent: { data_source_id: dataSource.id ?? dataSourceId }
13753
+ parent: { data_source_id: dataSource.id }
13663
13754
  };
13664
13755
  }
13665
13756
  async function inspectNotionDatabase(input) {
@@ -13988,6 +14079,8 @@ async function listSchemas(aiexDir) {
13988
14079
  }
13989
14080
  }
13990
14081
  async function readExtractFileInput(filePath, aiConfig) {
14082
+ const stat = fs$1.statSync(filePath);
14083
+ if (stat.size > MAX_UPLOAD_SIZE) throw new Error(`File size (${bytesToMB(stat.size).toFixed(1)}MB) exceeds ${MAX_UPLOAD_SIZE_TEXT} limit: ${filePath}`);
13991
14084
  const ext = path.extname(filePath).toLowerCase().replace(".", "");
13992
14085
  if (FILE_PART_EXTENSIONS.has(ext)) return {
13993
14086
  text: "",
@@ -14383,6 +14476,10 @@ const retryCommand = defineCommand({
14383
14476
  }
14384
14477
  outro("Done!");
14385
14478
  } catch (error) {
14479
+ if (isMissingUploadFileError(error)) {
14480
+ failCommand(MISSING_UPLOAD_FILE_TEXT);
14481
+ return;
14482
+ }
14386
14483
  failCommand(error instanceof Error ? error.message : String(error));
14387
14484
  }
14388
14485
  }
@@ -14435,7 +14532,7 @@ const extractCommand = defineCommand({
14435
14532
  file: {
14436
14533
  type: "string",
14437
14534
  alias: "f",
14438
- description: "File path (image/PDF) to extract from"
14535
+ description: `File path to extract from. Supported: ${SUPPORTED_FILE_TYPES_TEXT}.`
14439
14536
  },
14440
14537
  model: {
14441
14538
  type: "string",
@@ -15252,94 +15349,56 @@ function getFormFile(value) {
15252
15349
  function safeUploadName(name$1) {
15253
15350
  return path.basename(name$1).replace(/[^\w.-]/g, "_") || "upload.txt";
15254
15351
  }
15352
+ function safeUploadNameForMime(file) {
15353
+ const safeName = safeUploadName(file.name);
15354
+ const ext = getExtensionFromMime(file.type);
15355
+ if (!ext) throw new FileValidationError(unsupportedFileTypeMessage(file.type));
15356
+ return `${path.parse(safeName).name || "upload"}.${ext}`;
15357
+ }
15358
+ function jsonResponse(body, status) {
15359
+ return new Response(JSON.stringify(body), {
15360
+ status,
15361
+ headers: { "content-type": "application/json" }
15362
+ });
15363
+ }
15364
+ async function auditFailureResponse(aiexDir, auditId, error, status) {
15365
+ const record = await updateExtractionAuditRecord(aiexDir, auditId, {
15366
+ status: "failed",
15367
+ error
15368
+ });
15369
+ return jsonResponse({
15370
+ success: false,
15371
+ error: record.error,
15372
+ auditId: record.id
15373
+ }, status);
15374
+ }
15255
15375
  async function saveUploadToFile(file, uploadsDir, id) {
15376
+ validateFileUpload(file);
15256
15377
  await fs.mkdir(uploadsDir, { recursive: true });
15257
- const filePath = path.join(uploadsDir, `${id}-${safeUploadName(file.name)}`);
15378
+ const filePath = path.join(uploadsDir, `${id}-${safeUploadNameForMime(file)}`);
15258
15379
  const buffer = Buffer.from(await file.arrayBuffer());
15259
15380
  await fs.writeFile(filePath, buffer);
15260
15381
  return filePath;
15261
15382
  }
15262
15383
  async function executeAuditedExtraction(input) {
15263
15384
  const aiConfig = await readAIConfig(input.aiexDir);
15264
- if (!aiConfig) {
15265
- const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
15266
- status: "failed",
15267
- error: "AI configuration not found. Configure AI settings first."
15268
- });
15269
- return new Response(JSON.stringify({
15270
- success: false,
15271
- error: record$1.error,
15272
- auditId: record$1.id
15273
- }), {
15274
- status: 400,
15275
- headers: { "content-type": "application/json" }
15276
- });
15277
- }
15278
- if (!aiConfig.provider.apiKey) {
15279
- const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
15280
- status: "failed",
15281
- error: "API Key not configured. Configure AI settings first."
15282
- });
15283
- return new Response(JSON.stringify({
15284
- success: false,
15285
- error: record$1.error,
15286
- auditId: record$1.id
15287
- }), {
15288
- status: 400,
15289
- headers: { "content-type": "application/json" }
15290
- });
15291
- }
15292
- if (!aiConfig.provider.models?.length) {
15293
- const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
15294
- status: "failed",
15295
- error: "No models configured. Add at least one model in AI Settings."
15296
- });
15297
- return new Response(JSON.stringify({
15298
- success: false,
15299
- error: record$1.error,
15300
- auditId: record$1.id
15301
- }), {
15302
- status: 400,
15303
- headers: { "content-type": "application/json" }
15304
- });
15305
- }
15385
+ if (!aiConfig) return auditFailureResponse(input.aiexDir, input.auditId, "AI configuration not found. Configure AI settings first.", 400);
15386
+ if (!aiConfig.provider.apiKey) return auditFailureResponse(input.aiexDir, input.auditId, "API Key not configured. Configure AI settings first.", 400);
15387
+ if (!aiConfig.provider.models?.length) return auditFailureResponse(input.aiexDir, input.auditId, "No models configured. Add at least one model in AI Settings.", 400);
15306
15388
  const modelOverride = input.modelName ? aiConfig.provider.models.find((model) => model.name === input.modelName) : void 0;
15307
- if (input.modelName && !modelOverride) {
15308
- const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
15309
- status: "failed",
15310
- error: `Model "${input.modelName}" not found in AI settings`
15311
- });
15312
- return new Response(JSON.stringify({
15313
- success: false,
15314
- error: record$1.error,
15315
- auditId: record$1.id
15316
- }), {
15317
- status: 400,
15318
- headers: { "content-type": "application/json" }
15319
- });
15320
- }
15389
+ if (input.modelName && !modelOverride) return auditFailureResponse(input.aiexDir, input.auditId, `Model "${input.modelName}" not found in AI settings`, 400);
15321
15390
  let inputText = input.text;
15322
15391
  let inputFilePath = input.filePath;
15323
- if (input.filePath) {
15392
+ if (input.filePath) try {
15324
15393
  const source = await readExtractFileInput(input.filePath, aiConfig);
15325
15394
  inputText = source.text;
15326
15395
  inputFilePath = source.filePath;
15396
+ } catch (error) {
15397
+ if (isMissingUploadFileError(error)) return auditFailureResponse(input.aiexDir, input.auditId, MISSING_UPLOAD_FILE_TEXT, 400);
15398
+ throw error;
15327
15399
  }
15328
15400
  const result = await extractSingle(input.aiexDir, input.config, aiConfig, input.schemaName, inputText, inputFilePath, modelOverride, { quiet: true });
15329
- if (!result.success) {
15330
- const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
15331
- status: "failed",
15332
- error: result.error || "Extraction failed"
15333
- });
15334
- return new Response(JSON.stringify({
15335
- success: false,
15336
- error: record$1.error,
15337
- auditId: record$1.id
15338
- }), {
15339
- status: 500,
15340
- headers: { "content-type": "application/json" }
15341
- });
15342
- }
15401
+ if (!result.success) return auditFailureResponse(input.aiexDir, input.auditId, result.error || "Extraction failed", 500);
15343
15402
  const notionPages = [];
15344
15403
  if (aiConfig.notion?.enabled && aiConfig.notion.schemas?.[input.schemaName]?.databaseId?.trim()) try {
15345
15404
  if (!result.data || typeof result.data !== "object" || Array.isArray(result.data)) throw new Error("Extraction result is not an object and cannot be written to Notion.");
@@ -15353,14 +15412,11 @@ async function executeAuditedExtraction(input) {
15353
15412
  tokensUsed: result.tokensUsed,
15354
15413
  error: error instanceof Error ? error.message : String(error)
15355
15414
  });
15356
- return new Response(JSON.stringify({
15415
+ return jsonResponse({
15357
15416
  success: false,
15358
15417
  error: record$1.error,
15359
15418
  auditId: record$1.id
15360
- }), {
15361
- status: 500,
15362
- headers: { "content-type": "application/json" }
15363
- });
15419
+ }, 500);
15364
15420
  }
15365
15421
  const record = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
15366
15422
  status: "succeeded",
@@ -15370,7 +15426,7 @@ async function executeAuditedExtraction(input) {
15370
15426
  notionPages: notionPages.length > 0 ? notionPages : void 0,
15371
15427
  tokensUsed: result.tokensUsed
15372
15428
  });
15373
- return new Response(JSON.stringify({
15429
+ return jsonResponse({
15374
15430
  success: true,
15375
15431
  outputPath: record.outputPath,
15376
15432
  outputName: record.outputName,
@@ -15378,10 +15434,7 @@ async function executeAuditedExtraction(input) {
15378
15434
  notionPages: record.notionPages,
15379
15435
  tokensUsed: record.tokensUsed,
15380
15436
  auditId: record.id
15381
- }), {
15382
- status: 200,
15383
- headers: { "content-type": "application/json" }
15384
- });
15437
+ }, 200);
15385
15438
  }
15386
15439
  function extractRoutes(config) {
15387
15440
  const app = new Hono();
@@ -15422,11 +15475,26 @@ function extractRoutes(config) {
15422
15475
  });
15423
15476
  let filePath;
15424
15477
  if (file) {
15425
- filePath = await saveUploadToFile(file, uploadsDir, audit.id);
15478
+ try {
15479
+ filePath = await saveUploadToFile(file, uploadsDir, audit.id);
15480
+ } catch (e) {
15481
+ if (e instanceof FileValidationError) {
15482
+ await updateExtractionAuditRecord(aiexDir, audit.id, {
15483
+ status: "failed",
15484
+ error: e.message
15485
+ });
15486
+ return c.json({
15487
+ success: false,
15488
+ error: e.message,
15489
+ auditId: audit.id
15490
+ }, 400);
15491
+ }
15492
+ throw e;
15493
+ }
15426
15494
  await updateExtractionAuditRecord(aiexDir, audit.id, { source: {
15427
15495
  type: "file",
15428
15496
  filePath,
15429
- fileName: safeUploadName(file.name)
15497
+ fileName: path.basename(filePath)
15430
15498
  } });
15431
15499
  }
15432
15500
  return executeAuditedExtraction({
@@ -65,7 +65,7 @@ function doctorDiagnosticsTableRows(d) {
65
65
  //#endregion
66
66
  //#region package.json
67
67
  var name = "aiex-cli";
68
- var version = "0.0.2";
68
+ var version = "0.0.3-beta.2";
69
69
  var description = "JSON Schema → SQLite with AI-powered data extraction";
70
70
  var package_default = {
71
71
  name,
package/dist/index.mjs CHANGED
@@ -1,3 +1,3 @@
1
- import { C as buildDoctorDiagnostics, T as formatDoctorDiagnosticsJson, a as parseJsonSchema, i as JsonSchemaDefinitionSchema, n as createMigrationConfig, r as generateDrizzleConfig, s as generateDrizzleSchema, t as collectDoctorDiagnostics, w as doctorDiagnosticsTableRows } from "./doctor-collector-DR7s0UUh.mjs";
1
+ import { C as buildDoctorDiagnostics, T as formatDoctorDiagnosticsJson, a as parseJsonSchema, i as JsonSchemaDefinitionSchema, n as createMigrationConfig, r as generateDrizzleConfig, s as generateDrizzleSchema, t as collectDoctorDiagnostics, w as doctorDiagnosticsTableRows } from "./doctor-collector-Bnkbl48V.mjs";
2
2
 
3
3
  export { JsonSchemaDefinitionSchema, buildDoctorDiagnostics, collectDoctorDiagnostics, createMigrationConfig, doctorDiagnosticsTableRows, formatDoctorDiagnosticsJson, generateDrizzleConfig, generateDrizzleSchema, parseJsonSchema };
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "aiex-cli",
3
3
  "type": "module",
4
- "version": "0.0.2",
4
+ "version": "0.0.3-beta.2",
5
5
  "description": "JSON Schema → SQLite with AI-powered data extraction",
6
6
  "author": "OSpoon <zxin088@gmail.com>",
7
7
  "license": "MIT",