aiex-cli 0.0.2 → 0.0.3-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { S as version, T as formatDoctorDiagnosticsJson, _ as createConfig, a as parseJsonSchema, b as name, c as getDefaultAIConfig, d as DEFAULT_MARKITDOWN_CONFIG, f as DEFAULT_MINERU_CONFIG, g as AIConfigSchema, h as PLACEHOLDER_TEXT, i as JsonSchemaDefinitionSchema, l as readAIConfig, m as PLACEHOLDER_SCHEMA, n as createMigrationConfig, o as toSnakeCase, p as DEFAULT_PROMPT_CONFIG, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as writeAIConfig, v as seedConfig, w as doctorDiagnosticsTableRows, x as package_default, y as description } from "./doctor-collector-DR7s0UUh.mjs";
1
+ import { S as version, T as formatDoctorDiagnosticsJson, _ as createConfig, a as parseJsonSchema, b as name, c as getDefaultAIConfig, d as DEFAULT_MARKITDOWN_CONFIG, f as DEFAULT_MINERU_CONFIG, g as AIConfigSchema, h as PLACEHOLDER_TEXT, i as JsonSchemaDefinitionSchema, l as readAIConfig, m as PLACEHOLDER_SCHEMA, n as createMigrationConfig, o as toSnakeCase, p as DEFAULT_PROMPT_CONFIG, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as writeAIConfig, v as seedConfig, w as doctorDiagnosticsTableRows, x as package_default, y as description } from "./doctor-collector-CjFTz8p4.mjs";
2
2
  import { createRequire } from "node:module";
3
3
  import fs from "node:fs/promises";
4
4
  import os from "node:os";
@@ -13388,6 +13388,8 @@ async function savePromptSnapshot(schema, aiexDir) {
13388
13388
  //#region src/core/extraction-audit.ts
13389
13389
  const AUDIT_ID_RE = /^[\w.-]+$/;
13390
13390
  const STALE_AFTER_MS = 1800 * 1e3;
13391
+ const CACHE_TTL_MS = 5e3;
13392
+ const recordCache = /* @__PURE__ */ new Map();
13391
13393
  function auditDir(aiexDir) {
13392
13394
  return path.join(aiexDir, "extracted", "_audit");
13393
13395
  }
@@ -13414,6 +13416,7 @@ async function createExtractionAuditRecord(aiexDir, input) {
13414
13416
  spaces: 2,
13415
13417
  EOL: "\n"
13416
13418
  });
13419
+ clearRecordCache(aiexDir);
13417
13420
  return record;
13418
13421
  }
13419
13422
  async function updateExtractionAuditRecord(aiexDir, id, patch) {
@@ -13430,6 +13433,7 @@ async function updateExtractionAuditRecord(aiexDir, id, patch) {
13430
13433
  spaces: 2,
13431
13434
  EOL: "\n"
13432
13435
  });
13436
+ clearRecordCache(aiexDir);
13433
13437
  return record;
13434
13438
  }
13435
13439
  async function readExtractionAuditRecord(aiexDir, id) {
@@ -13457,19 +13461,38 @@ async function markStaleIfNeeded(aiexDir, record) {
13457
13461
  spaces: 2,
13458
13462
  EOL: "\n"
13459
13463
  });
13464
+ clearRecordCache(aiexDir);
13460
13465
  return staleRecord;
13461
13466
  }
13467
+ function getCachedRecords(aiexDir) {
13468
+ const entry = recordCache.get(aiexDir);
13469
+ if (entry && Date.now() - entry.timestamp < CACHE_TTL_MS) return entry.records;
13470
+ return null;
13471
+ }
13472
+ function setCachedRecords(aiexDir, records) {
13473
+ recordCache.set(aiexDir, {
13474
+ records,
13475
+ timestamp: Date.now()
13476
+ });
13477
+ }
13478
+ function clearRecordCache(aiexDir) {
13479
+ recordCache.delete(aiexDir);
13480
+ }
13462
13481
  async function listExtractionAuditRecords(aiexDir) {
13482
+ const cached = getCachedRecords(aiexDir);
13483
+ if (cached) return cached;
13463
13484
  try {
13464
13485
  const dir = auditDir(aiexDir);
13465
13486
  const files = await fs.readdir(dir);
13466
- return (await Promise.all(files.filter((file) => file.endsWith(".json")).map(async (file) => {
13487
+ const result = (await Promise.all(files.filter((file) => file.endsWith(".json")).map(async (file) => {
13467
13488
  try {
13468
13489
  return await markStaleIfNeeded(aiexDir, await readFile(path.join(dir, file)));
13469
13490
  } catch {
13470
13491
  return null;
13471
13492
  }
13472
13493
  }))).filter((record) => !!record).sort((a, b) => b.createdAt.localeCompare(a.createdAt));
13494
+ setCachedRecords(aiexDir, result);
13495
+ return result;
13473
13496
  } catch {
13474
13497
  return [];
13475
13498
  }
@@ -13486,6 +13509,7 @@ async function deleteExtractionAuditRecord(aiexDir, id) {
13486
13509
  const uploadFiles = await fs.readdir(uploadsDir).catch(() => []);
13487
13510
  await Promise.all(uploadFiles.filter((file) => file.startsWith(`${id}-`)).map((file) => fs.unlink(path.join(uploadsDir, file)).catch(() => {})));
13488
13511
  await fs.unlink(auditPath(aiexDir, id)).catch(() => {});
13512
+ clearRecordCache(aiexDir);
13489
13513
  return true;
13490
13514
  }
13491
13515
 
@@ -13632,34 +13656,34 @@ function suggestFieldMap(schemaFields, databaseProperties) {
13632
13656
  }
13633
13657
  return fieldMap;
13634
13658
  }
13635
- function hasProperties(value) {
13636
- return !!value && typeof value === "object" && !!value.properties && typeof value.properties === "object";
13659
+ function isDataSourceResponse(value) {
13660
+ return !!value && typeof value === "object" && typeof value.properties === "object" && !Array.isArray(value);
13637
13661
  }
13638
13662
  function firstDataSourceId(database) {
13639
- return (Array.isArray(database?.data_sources) ? database.data_sources : []).find((source) => typeof source?.id === "string" && source.id.trim())?.id;
13663
+ return database.data_sources?.find((source) => typeof source.id === "string" && source.id.trim())?.id;
13640
13664
  }
13641
13665
  async function resolveNotionDataSource(notion, inputId) {
13642
13666
  const id = parseNotionDatabaseId(inputId);
13643
13667
  if (!id) throw new Error("Notion database or data source URL/ID is required.");
13644
13668
  try {
13645
13669
  const dataSource$1 = await notion.dataSources.retrieve({ data_source_id: id });
13646
- if (hasProperties(dataSource$1)) return {
13670
+ if (isDataSourceResponse(dataSource$1)) return {
13647
13671
  databaseId: typeof dataSource$1.parent?.database_id === "string" ? dataSource$1.parent.database_id : id,
13648
- dataSourceId: dataSource$1.id ?? id,
13672
+ dataSourceId: dataSource$1.id,
13649
13673
  properties: dataSource$1.properties,
13650
- parent: { data_source_id: dataSource$1.id ?? id }
13674
+ parent: { data_source_id: dataSource$1.id }
13651
13675
  };
13652
13676
  } catch {}
13653
13677
  const database = await notion.databases.retrieve({ database_id: id });
13654
13678
  const dataSourceId = firstDataSourceId(database);
13655
13679
  if (!dataSourceId) throw new Error("No data source found for this Notion database. Copy the data source link from Notion, or share the source database with the integration.");
13656
13680
  const dataSource = await notion.dataSources.retrieve({ data_source_id: dataSourceId });
13657
- if (!hasProperties(dataSource)) throw new Error("Notion data source did not return properties. Make sure it is shared with the integration and is not a linked data source.");
13681
+ if (!isDataSourceResponse(dataSource)) throw new Error("Notion data source did not return properties. Make sure it is shared with the integration and is not a linked data source.");
13658
13682
  return {
13659
- databaseId: database.id ?? id,
13660
- dataSourceId: dataSource.id ?? dataSourceId,
13683
+ databaseId: database.id,
13684
+ dataSourceId: dataSource.id,
13661
13685
  properties: dataSource.properties,
13662
- parent: { data_source_id: dataSource.id ?? dataSourceId }
13686
+ parent: { data_source_id: dataSource.id }
13663
13687
  };
13664
13688
  }
13665
13689
  async function inspectNotionDatabase(input) {
@@ -13987,7 +14011,12 @@ async function listSchemas(aiexDir) {
13987
14011
  return [];
13988
14012
  }
13989
14013
  }
14014
+ function getFileSizeMB(filePath) {
14015
+ return fs$1.statSync(filePath).size / (1024 * 1024);
14016
+ }
13990
14017
  async function readExtractFileInput(filePath, aiConfig) {
14018
+ const sizeMB = getFileSizeMB(filePath);
14019
+ if (sizeMB > 150) throw new Error(`File size (${sizeMB.toFixed(1)}MB) exceeds 150MB limit: ${filePath}`);
13991
14020
  const ext = path.extname(filePath).toLowerCase().replace(".", "");
13992
14021
  if (FILE_PART_EXTENSIONS.has(ext)) return {
13993
14022
  text: "",
@@ -15239,6 +15268,62 @@ function dataRoutes(config) {
15239
15268
  return app;
15240
15269
  }
15241
15270
 
15271
+ //#endregion
15272
+ //#region src/core/file-constants.ts
15273
+ const MAX_UPLOAD_SIZE = 150 * 1024 * 1024;
15274
+ const MAX_UPLOAD_SIZE_TEXT = "150MB";
15275
+ const SUPPORTED_MIME_TYPES = new Set([
15276
+ "image/png",
15277
+ "image/jpeg",
15278
+ "image/gif",
15279
+ "image/webp",
15280
+ "image/bmp",
15281
+ "image/svg+xml",
15282
+ "application/pdf",
15283
+ "text/plain",
15284
+ "text/markdown",
15285
+ "text/csv",
15286
+ "application/json",
15287
+ "text/html",
15288
+ "text/xml",
15289
+ "application/x-yaml",
15290
+ "text/yaml"
15291
+ ]);
15292
+ const MIME_TO_EXT = {
15293
+ "image/png": "png",
15294
+ "image/jpeg": "jpg",
15295
+ "image/gif": "gif",
15296
+ "image/webp": "webp",
15297
+ "image/bmp": "bmp",
15298
+ "image/svg+xml": "svg",
15299
+ "application/pdf": "pdf",
15300
+ "text/plain": "txt",
15301
+ "text/markdown": "md",
15302
+ "text/csv": "csv",
15303
+ "application/json": "json",
15304
+ "text/html": "html",
15305
+ "text/xml": "xml",
15306
+ "application/x-yaml": "yaml",
15307
+ "text/yaml": "yaml"
15308
+ };
15309
+ function getExtensionFromMime(mimeType) {
15310
+ return MIME_TO_EXT[mimeType];
15311
+ }
15312
+ function isAllowedMimeType(mimeType) {
15313
+ return SUPPORTED_MIME_TYPES.has(mimeType);
15314
+ }
15315
+ var FileValidationError = class extends Error {
15316
+ constructor(message) {
15317
+ super(message);
15318
+ this.name = "FileValidationError";
15319
+ }
15320
+ };
15321
+ function validateFileUpload(file) {
15322
+ if (file.size === 0) throw new FileValidationError("Uploaded file is empty");
15323
+ if (file.size > MAX_UPLOAD_SIZE) throw new FileValidationError(`File size (${(file.size / 1024 / 1024).toFixed(1)}MB) exceeds ${MAX_UPLOAD_SIZE_TEXT} limit`);
15324
+ if (!isAllowedMimeType(file.type)) throw new FileValidationError(`Unsupported file type "${file.type}". Supported types: ${[...SUPPORTED_MIME_TYPES].join(", ")}`);
15325
+ }
15326
+
15242
15327
  //#endregion
15243
15328
  //#region src/server/routes/extract.ts
15244
15329
  function getFormString(value) {
@@ -15252,9 +15337,16 @@ function getFormFile(value) {
15252
15337
  function safeUploadName(name$1) {
15253
15338
  return path.basename(name$1).replace(/[^\w.-]/g, "_") || "upload.txt";
15254
15339
  }
15340
+ function safeUploadNameForMime(file) {
15341
+ const safeName = safeUploadName(file.name);
15342
+ const ext = getExtensionFromMime(file.type);
15343
+ if (!ext) throw new FileValidationError(`Unsupported file type "${file.type}"`);
15344
+ return `${path.parse(safeName).name || "upload"}.${ext}`;
15345
+ }
15255
15346
  async function saveUploadToFile(file, uploadsDir, id) {
15347
+ validateFileUpload(file);
15256
15348
  await fs.mkdir(uploadsDir, { recursive: true });
15257
- const filePath = path.join(uploadsDir, `${id}-${safeUploadName(file.name)}`);
15349
+ const filePath = path.join(uploadsDir, `${id}-${safeUploadNameForMime(file)}`);
15258
15350
  const buffer = Buffer.from(await file.arrayBuffer());
15259
15351
  await fs.writeFile(filePath, buffer);
15260
15352
  return filePath;
@@ -15422,11 +15514,25 @@ function extractRoutes(config) {
15422
15514
  });
15423
15515
  let filePath;
15424
15516
  if (file) {
15425
- filePath = await saveUploadToFile(file, uploadsDir, audit.id);
15517
+ try {
15518
+ filePath = await saveUploadToFile(file, uploadsDir, audit.id);
15519
+ } catch (e) {
15520
+ if (e instanceof FileValidationError) {
15521
+ await updateExtractionAuditRecord(aiexDir, audit.id, {
15522
+ status: "failed",
15523
+ error: e.message
15524
+ });
15525
+ return c.json({
15526
+ success: false,
15527
+ error: e.message
15528
+ }, 400);
15529
+ }
15530
+ throw e;
15531
+ }
15426
15532
  await updateExtractionAuditRecord(aiexDir, audit.id, { source: {
15427
15533
  type: "file",
15428
15534
  filePath,
15429
- fileName: safeUploadName(file.name)
15535
+ fileName: path.basename(filePath)
15430
15536
  } });
15431
15537
  }
15432
15538
  return executeAuditedExtraction({
@@ -65,7 +65,7 @@ function doctorDiagnosticsTableRows(d) {
65
65
  //#endregion
66
66
  //#region package.json
67
67
  var name = "aiex-cli";
68
- var version = "0.0.2";
68
+ var version = "0.0.3-beta.1";
69
69
  var description = "JSON Schema → SQLite with AI-powered data extraction";
70
70
  var package_default = {
71
71
  name,
package/dist/index.mjs CHANGED
@@ -1,3 +1,3 @@
1
- import { C as buildDoctorDiagnostics, T as formatDoctorDiagnosticsJson, a as parseJsonSchema, i as JsonSchemaDefinitionSchema, n as createMigrationConfig, r as generateDrizzleConfig, s as generateDrizzleSchema, t as collectDoctorDiagnostics, w as doctorDiagnosticsTableRows } from "./doctor-collector-DR7s0UUh.mjs";
1
+ import { C as buildDoctorDiagnostics, T as formatDoctorDiagnosticsJson, a as parseJsonSchema, i as JsonSchemaDefinitionSchema, n as createMigrationConfig, r as generateDrizzleConfig, s as generateDrizzleSchema, t as collectDoctorDiagnostics, w as doctorDiagnosticsTableRows } from "./doctor-collector-CjFTz8p4.mjs";
2
2
 
3
3
  export { JsonSchemaDefinitionSchema, buildDoctorDiagnostics, collectDoctorDiagnostics, createMigrationConfig, doctorDiagnosticsTableRows, formatDoctorDiagnosticsJson, generateDrizzleConfig, generateDrizzleSchema, parseJsonSchema };
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "aiex-cli",
3
3
  "type": "module",
4
- "version": "0.0.2",
4
+ "version": "0.0.3-beta.1",
5
5
  "description": "JSON Schema → SQLite with AI-powered data extraction",
6
6
  "author": "OSpoon <zxin088@gmail.com>",
7
7
  "license": "MIT",