aiex-cli 0.1.1-beta.8 → 0.1.1-beta.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { A as seedConfig, C as CORRECTION_SYSTEM_PROMPT, D as PLACEHOLDER_TEXT, E as PLACEHOLDER_SCHEMA, M as name, N as package_default, O as buildCorrectionUserPrompt, P as version, S as DEFAULT_MINERU_CONFIG, T as EVIDENCE_INSTRUCTIONS, _ as doctorDiagnosticsSeverityRows, a as recognizeImageText, b as DEFAULT_LITEPARSE_CONFIG, c as t, d as readAIConfig, f as writeAIConfig, h as parseJsonSchema, j as description, k as createConfig, l as createProjectDatabase, m as JsonSchemaDefinitionSchema, n as collectDoctorDiagnostics, o as shouldUseImageOcrFallback, p as AIConfigSchema, r as createMigrationConfig, s as initI18n, t as generateDrizzleSchema, u as getDefaultAIConfig, v as doctorDiagnosticsTableRows, w as DEFAULT_PROMPT_CONFIG, x as DEFAULT_MINERU_API_CONFIG, y as formatDoctorDiagnosticsJson } from "./generate-drizzle-schema-
|
|
1
|
+
import { A as seedConfig, C as CORRECTION_SYSTEM_PROMPT, D as PLACEHOLDER_TEXT, E as PLACEHOLDER_SCHEMA, M as name, N as package_default, O as buildCorrectionUserPrompt, P as version, S as DEFAULT_MINERU_CONFIG, T as EVIDENCE_INSTRUCTIONS, _ as doctorDiagnosticsSeverityRows, a as recognizeImageText, b as DEFAULT_LITEPARSE_CONFIG, c as t, d as readAIConfig, f as writeAIConfig, h as parseJsonSchema, j as description, k as createConfig, l as createProjectDatabase, m as JsonSchemaDefinitionSchema, n as collectDoctorDiagnostics, o as shouldUseImageOcrFallback, p as AIConfigSchema, r as createMigrationConfig, s as initI18n, t as generateDrizzleSchema, u as getDefaultAIConfig, v as doctorDiagnosticsTableRows, w as DEFAULT_PROMPT_CONFIG, x as DEFAULT_MINERU_API_CONFIG, y as formatDoctorDiagnosticsJson } from "./generate-drizzle-schema-BdkbSP5F.mjs";
|
|
2
2
|
import { createRequire } from "node:module";
|
|
3
3
|
import fs from "node:fs/promises";
|
|
4
4
|
import os from "node:os";
|
|
@@ -1256,6 +1256,19 @@ function verifyFieldEvidence(input) {
|
|
|
1256
1256
|
}
|
|
1257
1257
|
return Object.keys(verified).length > 0 ? verified : void 0;
|
|
1258
1258
|
}
|
|
1259
|
+
function findInvalidFieldEvidence(input) {
|
|
1260
|
+
if (!input.text || !isRecord$3(input.data) || !input.rawEvidence) return [];
|
|
1261
|
+
const invalid = [];
|
|
1262
|
+
for (const [field, raw] of Object.entries(input.rawEvidence)) {
|
|
1263
|
+
const property = input.schema.properties[field];
|
|
1264
|
+
if (!property) continue;
|
|
1265
|
+
if (property.type !== "string" && property.type !== "number" && property.type !== "integer") continue;
|
|
1266
|
+
if (typeof raw.quote !== "string" || raw.quote.trim().length === 0) continue;
|
|
1267
|
+
if (!input.text.includes(raw.quote)) continue;
|
|
1268
|
+
if (!quoteContainsValue(raw.quote, input.data[field], property)) invalid.push(field);
|
|
1269
|
+
}
|
|
1270
|
+
return invalid;
|
|
1271
|
+
}
|
|
1259
1272
|
function isEvidenceEligibleProperty(property) {
|
|
1260
1273
|
return property.type === "string" || property.type === "number" || property.type === "integer";
|
|
1261
1274
|
}
|
|
@@ -1266,6 +1279,7 @@ function buildFieldEvidenceQuality(input) {
|
|
|
1266
1279
|
const unsupportedFields = [];
|
|
1267
1280
|
const missingFields = [];
|
|
1268
1281
|
const invalidFields = [];
|
|
1282
|
+
const invalidEvidenceFields = new Set(input.invalidEvidenceFields ?? []);
|
|
1269
1283
|
for (const [field, property] of Object.entries(input.schema.properties)) {
|
|
1270
1284
|
if (!isEvidenceEligibleProperty(property)) continue;
|
|
1271
1285
|
const value = input.data[field];
|
|
@@ -1273,12 +1287,15 @@ function buildFieldEvidenceQuality(input) {
|
|
|
1273
1287
|
if (value === null || value === void 0) {
|
|
1274
1288
|
status = "missing";
|
|
1275
1289
|
missingFields.push(field);
|
|
1290
|
+
} else if (invalidEvidenceFields.has(field)) {
|
|
1291
|
+
status = "invalid";
|
|
1292
|
+
invalidFields.push(field);
|
|
1276
1293
|
} else if (input.verifiedEvidence?.[field]) {
|
|
1277
1294
|
status = "supported";
|
|
1278
1295
|
supportedFields.push(field);
|
|
1279
1296
|
} else if (input.rawEvidence?.[field]) {
|
|
1280
|
-
status = "
|
|
1281
|
-
|
|
1297
|
+
status = "unsupported";
|
|
1298
|
+
unsupportedFields.push(field);
|
|
1282
1299
|
} else {
|
|
1283
1300
|
status = "unsupported";
|
|
1284
1301
|
unsupportedFields.push(field);
|
|
@@ -1900,11 +1917,18 @@ async function extractStructuredData(input) {
|
|
|
1900
1917
|
data: businessData,
|
|
1901
1918
|
rawEvidence: stripped.rawEvidence
|
|
1902
1919
|
}) : void 0;
|
|
1920
|
+
const invalidEvidenceFields = canLocateEvidence ? findInvalidFieldEvidence({
|
|
1921
|
+
schema,
|
|
1922
|
+
text: text$1,
|
|
1923
|
+
data: businessData,
|
|
1924
|
+
rawEvidence: stripped.rawEvidence
|
|
1925
|
+
}) : [];
|
|
1903
1926
|
const evidenceQuality = canLocateEvidence ? buildFieldEvidenceQuality({
|
|
1904
1927
|
schema,
|
|
1905
1928
|
data: businessData,
|
|
1906
1929
|
rawEvidence: stripped.rawEvidence,
|
|
1907
|
-
verifiedEvidence: evidence
|
|
1930
|
+
verifiedEvidence: evidence,
|
|
1931
|
+
invalidEvidenceFields
|
|
1908
1932
|
}) : void 0;
|
|
1909
1933
|
return {
|
|
1910
1934
|
success: true,
|
|
@@ -12,7 +12,7 @@ import { Kysely, SqliteDialect, sql } from "kysely";
|
|
|
12
12
|
|
|
13
13
|
//#region package.json
|
|
14
14
|
var name = "aiex-cli";
|
|
15
|
-
var version = "0.1.1-beta.
|
|
15
|
+
var version = "0.1.1-beta.9";
|
|
16
16
|
var description = "JSON Schema → SQLite with AI-powered data extraction";
|
|
17
17
|
var package_default = {
|
|
18
18
|
name,
|
|
@@ -167,6 +167,9 @@ const EVIDENCE_INSTRUCTIONS = `Evidence requirements:
|
|
|
167
167
|
- Also return a top-level "_evidence" object.
|
|
168
168
|
- For each top-level scalar field you extracted from the text, include "_evidence.<field>.quote".
|
|
169
169
|
- The quote must be an exact contiguous substring copied from the input text.
|
|
170
|
+
- Prefer the shortest quote that still uniquely identifies the field in the document.
|
|
171
|
+
- Include the field label and nearby context when a value is repeated, for example "考试年份:2017年" instead of "2017", or "语文 106 150 71%" instead of "150".
|
|
172
|
+
- Do not use a quote that supports a different field with the same repeated value.
|
|
170
173
|
- Do not invent offsets. Only provide quotes.
|
|
171
174
|
- If no exact quote supports a field, omit that field from "_evidence".`;
|
|
172
175
|
const CORRECTION_SYSTEM_PROMPT = `You are a precise data correction assistant. Your task is to correct validation errors in a previously generated JSON object to make it comply with the provided JSON Schema.
|
package/dist/index.mjs
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import { _ as doctorDiagnosticsSeverityRows, g as buildDoctorDiagnostics, h as parseJsonSchema, i as generateDrizzleConfig, m as JsonSchemaDefinitionSchema, n as collectDoctorDiagnostics, r as createMigrationConfig, t as generateDrizzleSchema, v as doctorDiagnosticsTableRows, y as formatDoctorDiagnosticsJson } from "./generate-drizzle-schema-
|
|
1
|
+
import { _ as doctorDiagnosticsSeverityRows, g as buildDoctorDiagnostics, h as parseJsonSchema, i as generateDrizzleConfig, m as JsonSchemaDefinitionSchema, n as collectDoctorDiagnostics, r as createMigrationConfig, t as generateDrizzleSchema, v as doctorDiagnosticsTableRows, y as formatDoctorDiagnosticsJson } from "./generate-drizzle-schema-BdkbSP5F.mjs";
|
|
2
2
|
|
|
3
3
|
export { JsonSchemaDefinitionSchema, buildDoctorDiagnostics, collectDoctorDiagnostics, createMigrationConfig, doctorDiagnosticsSeverityRows, doctorDiagnosticsTableRows, formatDoctorDiagnosticsJson, generateDrizzleConfig, generateDrizzleSchema, parseJsonSchema };
|