aiex-cli 0.1.1-beta.8 → 0.1.1-beta.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { A as seedConfig, C as CORRECTION_SYSTEM_PROMPT, D as PLACEHOLDER_TEXT, E as PLACEHOLDER_SCHEMA, M as name, N as package_default, O as buildCorrectionUserPrompt, P as version, S as DEFAULT_MINERU_CONFIG, T as EVIDENCE_INSTRUCTIONS, _ as doctorDiagnosticsSeverityRows, a as recognizeImageText, b as DEFAULT_LITEPARSE_CONFIG, c as t, d as readAIConfig, f as writeAIConfig, h as parseJsonSchema, j as description, k as createConfig, l as createProjectDatabase, m as JsonSchemaDefinitionSchema, n as collectDoctorDiagnostics, o as shouldUseImageOcrFallback, p as AIConfigSchema, r as createMigrationConfig, s as initI18n, t as generateDrizzleSchema, u as getDefaultAIConfig, v as doctorDiagnosticsTableRows, w as DEFAULT_PROMPT_CONFIG, x as DEFAULT_MINERU_API_CONFIG, y as formatDoctorDiagnosticsJson } from "./generate-drizzle-schema-BcStmKUc.mjs";
1
+ import { A as seedConfig, C as CORRECTION_SYSTEM_PROMPT, D as PLACEHOLDER_TEXT, E as PLACEHOLDER_SCHEMA, M as name, N as package_default, O as buildCorrectionUserPrompt, P as version, S as DEFAULT_MINERU_CONFIG, T as EVIDENCE_INSTRUCTIONS, _ as doctorDiagnosticsSeverityRows, a as recognizeImageText, b as DEFAULT_LITEPARSE_CONFIG, c as t, d as readAIConfig, f as writeAIConfig, h as parseJsonSchema, j as description, k as createConfig, l as createProjectDatabase, m as JsonSchemaDefinitionSchema, n as collectDoctorDiagnostics, o as shouldUseImageOcrFallback, p as AIConfigSchema, r as createMigrationConfig, s as initI18n, t as generateDrizzleSchema, u as getDefaultAIConfig, v as doctorDiagnosticsTableRows, w as DEFAULT_PROMPT_CONFIG, x as DEFAULT_MINERU_API_CONFIG, y as formatDoctorDiagnosticsJson } from "./generate-drizzle-schema-BdkbSP5F.mjs";
2
2
  import { createRequire } from "node:module";
3
3
  import fs from "node:fs/promises";
4
4
  import os from "node:os";
@@ -1256,6 +1256,19 @@ function verifyFieldEvidence(input) {
1256
1256
  }
1257
1257
  return Object.keys(verified).length > 0 ? verified : void 0;
1258
1258
  }
1259
+ function findInvalidFieldEvidence(input) {
1260
+ if (!input.text || !isRecord$3(input.data) || !input.rawEvidence) return [];
1261
+ const invalid = [];
1262
+ for (const [field, raw] of Object.entries(input.rawEvidence)) {
1263
+ const property = input.schema.properties[field];
1264
+ if (!property) continue;
1265
+ if (property.type !== "string" && property.type !== "number" && property.type !== "integer") continue;
1266
+ if (typeof raw.quote !== "string" || raw.quote.trim().length === 0) continue;
1267
+ if (!input.text.includes(raw.quote)) continue;
1268
+ if (!quoteContainsValue(raw.quote, input.data[field], property)) invalid.push(field);
1269
+ }
1270
+ return invalid;
1271
+ }
1259
1272
  function isEvidenceEligibleProperty(property) {
1260
1273
  return property.type === "string" || property.type === "number" || property.type === "integer";
1261
1274
  }
@@ -1266,6 +1279,7 @@ function buildFieldEvidenceQuality(input) {
1266
1279
  const unsupportedFields = [];
1267
1280
  const missingFields = [];
1268
1281
  const invalidFields = [];
1282
+ const invalidEvidenceFields = new Set(input.invalidEvidenceFields ?? []);
1269
1283
  for (const [field, property] of Object.entries(input.schema.properties)) {
1270
1284
  if (!isEvidenceEligibleProperty(property)) continue;
1271
1285
  const value = input.data[field];
@@ -1273,12 +1287,15 @@ function buildFieldEvidenceQuality(input) {
1273
1287
  if (value === null || value === void 0) {
1274
1288
  status = "missing";
1275
1289
  missingFields.push(field);
1290
+ } else if (invalidEvidenceFields.has(field)) {
1291
+ status = "invalid";
1292
+ invalidFields.push(field);
1276
1293
  } else if (input.verifiedEvidence?.[field]) {
1277
1294
  status = "supported";
1278
1295
  supportedFields.push(field);
1279
1296
  } else if (input.rawEvidence?.[field]) {
1280
- status = "invalid";
1281
- invalidFields.push(field);
1297
+ status = "unsupported";
1298
+ unsupportedFields.push(field);
1282
1299
  } else {
1283
1300
  status = "unsupported";
1284
1301
  unsupportedFields.push(field);
@@ -1900,11 +1917,18 @@ async function extractStructuredData(input) {
1900
1917
  data: businessData,
1901
1918
  rawEvidence: stripped.rawEvidence
1902
1919
  }) : void 0;
1920
+ const invalidEvidenceFields = canLocateEvidence ? findInvalidFieldEvidence({
1921
+ schema,
1922
+ text: text$1,
1923
+ data: businessData,
1924
+ rawEvidence: stripped.rawEvidence
1925
+ }) : [];
1903
1926
  const evidenceQuality = canLocateEvidence ? buildFieldEvidenceQuality({
1904
1927
  schema,
1905
1928
  data: businessData,
1906
1929
  rawEvidence: stripped.rawEvidence,
1907
- verifiedEvidence: evidence
1930
+ verifiedEvidence: evidence,
1931
+ invalidEvidenceFields
1908
1932
  }) : void 0;
1909
1933
  return {
1910
1934
  success: true,
@@ -12,7 +12,7 @@ import { Kysely, SqliteDialect, sql } from "kysely";
12
12
 
13
13
  //#region package.json
14
14
  var name = "aiex-cli";
15
- var version = "0.1.1-beta.8";
15
+ var version = "0.1.1-beta.9";
16
16
  var description = "JSON Schema → SQLite with AI-powered data extraction";
17
17
  var package_default = {
18
18
  name,
@@ -167,6 +167,9 @@ const EVIDENCE_INSTRUCTIONS = `Evidence requirements:
167
167
  - Also return a top-level "_evidence" object.
168
168
  - For each top-level scalar field you extracted from the text, include "_evidence.<field>.quote".
169
169
  - The quote must be an exact contiguous substring copied from the input text.
170
+ - Prefer the shortest quote that still uniquely identifies the field in the document.
171
+ - Include the field label and nearby context when a value is repeated, for example "考试年份:2017年" instead of "2017", or "语文 106 150 71%" instead of "150".
172
+ - Do not use a quote that supports a different field with the same repeated value.
170
173
  - Do not invent offsets. Only provide quotes.
171
174
  - If no exact quote supports a field, omit that field from "_evidence".`;
172
175
  const CORRECTION_SYSTEM_PROMPT = `You are a precise data correction assistant. Your task is to correct validation errors in a previously generated JSON object to make it comply with the provided JSON Schema.
package/dist/index.mjs CHANGED
@@ -1,3 +1,3 @@
1
- import { _ as doctorDiagnosticsSeverityRows, g as buildDoctorDiagnostics, h as parseJsonSchema, i as generateDrizzleConfig, m as JsonSchemaDefinitionSchema, n as collectDoctorDiagnostics, r as createMigrationConfig, t as generateDrizzleSchema, v as doctorDiagnosticsTableRows, y as formatDoctorDiagnosticsJson } from "./generate-drizzle-schema-BcStmKUc.mjs";
1
+ import { _ as doctorDiagnosticsSeverityRows, g as buildDoctorDiagnostics, h as parseJsonSchema, i as generateDrizzleConfig, m as JsonSchemaDefinitionSchema, n as collectDoctorDiagnostics, r as createMigrationConfig, t as generateDrizzleSchema, v as doctorDiagnosticsTableRows, y as formatDoctorDiagnosticsJson } from "./generate-drizzle-schema-BdkbSP5F.mjs";
2
2
 
3
3
  export { JsonSchemaDefinitionSchema, buildDoctorDiagnostics, collectDoctorDiagnostics, createMigrationConfig, doctorDiagnosticsSeverityRows, doctorDiagnosticsTableRows, formatDoctorDiagnosticsJson, generateDrizzleConfig, generateDrizzleSchema, parseJsonSchema };
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "aiex-cli",
3
3
  "type": "module",
4
- "version": "0.1.1-beta.8",
4
+ "version": "0.1.1-beta.9",
5
5
  "description": "JSON Schema → SQLite with AI-powered data extraction",
6
6
  "author": "OSpoon <zxin088@gmail.com>",
7
7
  "license": "MIT",