unrag 0.2.8 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +982 -59
- package/package.json +1 -1
- package/registry/core/ingest.ts +35 -8
- package/registry/core/types.ts +8 -1
- package/registry/docs/unrag.md +3 -1
- package/registry/eval/dataset.ts +224 -0
- package/registry/eval/index.ts +39 -0
- package/registry/eval/metrics.ts +85 -0
- package/registry/eval/report.ts +342 -0
- package/registry/eval/runner.ts +450 -0
- package/registry/manifest.json +9 -0
- package/registry/store/drizzle-postgres-pgvector/schema.ts +1 -1
- package/registry/store/drizzle-postgres-pgvector/store.ts +40 -44
- package/registry/store/prisma-postgres-pgvector/store.ts +25 -20
- package/registry/store/raw-sql-postgres-pgvector/store.ts +24 -22
package/dist/cli/index.js
CHANGED
|
@@ -396,6 +396,10 @@ async function copyRegistryFiles(selection) {
|
|
|
396
396
|
src: path2.join(selection.registryRoot, "core/retrieve.ts"),
|
|
397
397
|
dest: path2.join(installBaseAbs, "core/retrieve.ts")
|
|
398
398
|
},
|
|
399
|
+
{
|
|
400
|
+
src: path2.join(selection.registryRoot, "core/rerank.ts"),
|
|
401
|
+
dest: path2.join(installBaseAbs, "core/rerank.ts")
|
|
402
|
+
},
|
|
399
403
|
{
|
|
400
404
|
src: path2.join(selection.registryRoot, "embedding/_shared.ts"),
|
|
401
405
|
dest: path2.join(installBaseAbs, "embedding/_shared.ts")
|
|
@@ -707,6 +711,9 @@ function isPresetPayloadV1(x) {
|
|
|
707
711
|
return false;
|
|
708
712
|
if (!Array.isArray(o.modules.extractors) || !Array.isArray(o.modules.connectors))
|
|
709
713
|
return false;
|
|
714
|
+
if ("batteries" in o.modules && o.modules.batteries != null && !Array.isArray(o.modules.batteries)) {
|
|
715
|
+
return false;
|
|
716
|
+
}
|
|
710
717
|
return true;
|
|
711
718
|
}
|
|
712
719
|
function toPresetUrl(input) {
|
|
@@ -879,6 +886,7 @@ function depsForBattery(battery) {
|
|
|
879
886
|
deps["ai"] = "^6.0.3";
|
|
880
887
|
deps["@ai-sdk/cohere"] = "^3.0.1";
|
|
881
888
|
}
|
|
889
|
+
if (battery === "eval") {}
|
|
882
890
|
return { deps, devDeps };
|
|
883
891
|
}
|
|
884
892
|
function installCmd(pm) {
|
|
@@ -983,6 +991,314 @@ async function patchTsconfigPaths(params) {
|
|
|
983
991
|
return { changed: true, file: configFile };
|
|
984
992
|
}
|
|
985
993
|
|
|
994
|
+
// cli/commands/init.ts
|
|
995
|
+
import { writeFile as writeFile5 } from "node:fs/promises";
|
|
996
|
+
|
|
997
|
+
// cli/lib/evalBatteryScaffold.ts
|
|
998
|
+
var EVAL_SAMPLE_DATASET_V1 = {
|
|
999
|
+
version: "1",
|
|
1000
|
+
id: "sample",
|
|
1001
|
+
description: "Tiny dataset to validate retrieval changes.",
|
|
1002
|
+
defaults: {
|
|
1003
|
+
topK: 10,
|
|
1004
|
+
scopePrefix: "eval:sample:",
|
|
1005
|
+
mode: "retrieve",
|
|
1006
|
+
thresholds: { min: { recallAtK: 0.75 } }
|
|
1007
|
+
},
|
|
1008
|
+
documents: [
|
|
1009
|
+
{
|
|
1010
|
+
sourceId: "eval:sample:doc:refund-policy",
|
|
1011
|
+
content: "Refunds are available within 30 days of purchase, provided you have a receipt."
|
|
1012
|
+
},
|
|
1013
|
+
{
|
|
1014
|
+
sourceId: "eval:sample:doc:contact-support",
|
|
1015
|
+
content: "Contact support by emailing support@example.com. Response times are typically under 24 hours."
|
|
1016
|
+
}
|
|
1017
|
+
],
|
|
1018
|
+
queries: [
|
|
1019
|
+
{
|
|
1020
|
+
id: "q_refund_window",
|
|
1021
|
+
query: "How long do I have to request a refund?",
|
|
1022
|
+
relevant: { sourceIds: ["eval:sample:doc:refund-policy"] }
|
|
1023
|
+
},
|
|
1024
|
+
{
|
|
1025
|
+
id: "q_contact_support",
|
|
1026
|
+
query: "How do I contact support?",
|
|
1027
|
+
relevant: { sourceIds: ["eval:sample:doc:contact-support"] }
|
|
1028
|
+
}
|
|
1029
|
+
]
|
|
1030
|
+
};
|
|
1031
|
+
var EVAL_CONFIG_DEFAULT = {
|
|
1032
|
+
thresholds: { min: { recallAtK: 0.75 } },
|
|
1033
|
+
cleanup: "none",
|
|
1034
|
+
ingest: true
|
|
1035
|
+
};
|
|
1036
|
+
var EVAL_PACKAGE_JSON_SCRIPTS = {
|
|
1037
|
+
"unrag:eval": `bun run scripts/unrag-eval.ts -- --dataset .unrag/eval/datasets/sample.json`,
|
|
1038
|
+
"unrag:eval:ci": `bun run scripts/unrag-eval.ts -- --dataset .unrag/eval/datasets/sample.json --ci`
|
|
1039
|
+
};
|
|
1040
|
+
function renderEvalRunnerScript(opts) {
|
|
1041
|
+
const installImportBase = `../${opts.installDir.replace(/\\/g, "/")}`;
|
|
1042
|
+
return `/**
|
|
1043
|
+
* Unrag eval runner entrypoint (generated).
|
|
1044
|
+
*
|
|
1045
|
+
* You own this file — customize it freely.
|
|
1046
|
+
*/
|
|
1047
|
+
|
|
1048
|
+
import path from "node:path";
|
|
1049
|
+
import { access, readFile } from "node:fs/promises";
|
|
1050
|
+
|
|
1051
|
+
import { createUnragEngine } from "../unrag.config";
|
|
1052
|
+
import {
|
|
1053
|
+
runEval,
|
|
1054
|
+
readEvalReportFromFile,
|
|
1055
|
+
writeEvalReport,
|
|
1056
|
+
writeEvalSummaryMd,
|
|
1057
|
+
diffEvalReports,
|
|
1058
|
+
writeEvalDiffJson,
|
|
1059
|
+
writeEvalDiffMd,
|
|
1060
|
+
type EvalMode,
|
|
1061
|
+
type EvalThresholds,
|
|
1062
|
+
type EvalCleanupPolicy,
|
|
1063
|
+
} from "${installImportBase}/eval";
|
|
1064
|
+
|
|
1065
|
+
type CliArgs = {
|
|
1066
|
+
dataset?: string;
|
|
1067
|
+
baseline?: string;
|
|
1068
|
+
outputDir?: string;
|
|
1069
|
+
mode?: EvalMode;
|
|
1070
|
+
topK?: number;
|
|
1071
|
+
rerankTopK?: number;
|
|
1072
|
+
scopePrefix?: string;
|
|
1073
|
+
ingest?: boolean;
|
|
1074
|
+
cleanup?: EvalCleanupPolicy;
|
|
1075
|
+
thresholds?: Partial<EvalThresholds>;
|
|
1076
|
+
ci?: boolean;
|
|
1077
|
+
allowAssets?: boolean;
|
|
1078
|
+
allowNonEvalPrefix?: boolean;
|
|
1079
|
+
yes?: boolean;
|
|
1080
|
+
includeNdcg?: boolean;
|
|
1081
|
+
};
|
|
1082
|
+
|
|
1083
|
+
async function fileExists(p: string): Promise<boolean> {
|
|
1084
|
+
try {
|
|
1085
|
+
await access(p);
|
|
1086
|
+
return true;
|
|
1087
|
+
} catch {
|
|
1088
|
+
return false;
|
|
1089
|
+
}
|
|
1090
|
+
}
|
|
1091
|
+
|
|
1092
|
+
async function loadEnvFilesBestEffort(projectRoot: string) {
|
|
1093
|
+
const nodeEnv = process.env.NODE_ENV ?? "development";
|
|
1094
|
+
const candidates = [
|
|
1095
|
+
".env",
|
|
1096
|
+
".env.local",
|
|
1097
|
+
\`.env.\${nodeEnv}\`,
|
|
1098
|
+
\`.env.\${nodeEnv}.local\`,
|
|
1099
|
+
];
|
|
1100
|
+
for (const rel of candidates) {
|
|
1101
|
+
const abs = path.join(projectRoot, rel);
|
|
1102
|
+
if (!(await fileExists(abs))) continue;
|
|
1103
|
+
try {
|
|
1104
|
+
const raw = await readFile(abs, "utf8");
|
|
1105
|
+
for (const line of raw.split(/\\r?\\n/)) {
|
|
1106
|
+
const trimmed = line.trim();
|
|
1107
|
+
if (!trimmed || trimmed.startsWith("#")) continue;
|
|
1108
|
+
const eq = trimmed.indexOf("=");
|
|
1109
|
+
if (eq < 0) continue;
|
|
1110
|
+
const key = trimmed.slice(0, eq).trim();
|
|
1111
|
+
const value = trimmed.slice(eq + 1).trim().replace(/^"|"$/g, "");
|
|
1112
|
+
if (!key) continue;
|
|
1113
|
+
if (process.env[key] == null) process.env[key] = value;
|
|
1114
|
+
}
|
|
1115
|
+
} catch {
|
|
1116
|
+
// ignore
|
|
1117
|
+
}
|
|
1118
|
+
}
|
|
1119
|
+
}
|
|
1120
|
+
|
|
1121
|
+
function parseThresholdExpr(expr: string): Partial<EvalThresholds> {
|
|
1122
|
+
// Accept both:
|
|
1123
|
+
// - "min.recallAtK=0.75"
|
|
1124
|
+
// - "recallAtK=0.75" (shorthand for min)
|
|
1125
|
+
const [lhsRaw, rhsRaw] = String(expr ?? "").split("=");
|
|
1126
|
+
const lhs = (lhsRaw ?? "").trim();
|
|
1127
|
+
const rhs = Number(String(rhsRaw ?? "").trim());
|
|
1128
|
+
if (!lhs || Number.isNaN(rhs)) return {};
|
|
1129
|
+
|
|
1130
|
+
const parts = lhs.split(".").map((p) => p.trim()).filter(Boolean);
|
|
1131
|
+
const level = parts.length === 2 ? parts[0] : "min";
|
|
1132
|
+
const metric = parts.length === 2 ? parts[1] : parts[0];
|
|
1133
|
+
if (level !== "min") return {};
|
|
1134
|
+
|
|
1135
|
+
const allowed = new Set(["hitAtK", "precisionAtK", "recallAtK", "mrrAtK", "ndcgAtK"]);
|
|
1136
|
+
if (!allowed.has(metric)) return {};
|
|
1137
|
+
return { min: { [metric]: rhs } } as any;
|
|
1138
|
+
}
|
|
1139
|
+
|
|
1140
|
+
function mergeThresholds(
|
|
1141
|
+
a: Partial<EvalThresholds> | undefined,
|
|
1142
|
+
b: Partial<EvalThresholds> | undefined
|
|
1143
|
+
): Partial<EvalThresholds> | undefined {
|
|
1144
|
+
if (!a && !b) return undefined;
|
|
1145
|
+
const out: any = { ...(a ?? {}) };
|
|
1146
|
+
if (b?.min) out.min = { ...(out.min ?? {}), ...(b.min as any) };
|
|
1147
|
+
return out;
|
|
1148
|
+
}
|
|
1149
|
+
|
|
1150
|
+
function parseArgs(argv: string[]): CliArgs {
|
|
1151
|
+
const out: CliArgs = {};
|
|
1152
|
+
const thresholds: Partial<EvalThresholds>[] = [];
|
|
1153
|
+
|
|
1154
|
+
for (let i = 0; i < argv.length; i++) {
|
|
1155
|
+
const a = argv[i];
|
|
1156
|
+
if (a === "--dataset") out.dataset = argv[++i];
|
|
1157
|
+
else if (a === "--baseline") out.baseline = argv[++i];
|
|
1158
|
+
else if (a === "--outputDir" || a === "--output-dir") out.outputDir = argv[++i];
|
|
1159
|
+
else if (a === "--mode") out.mode = argv[++i] as EvalMode;
|
|
1160
|
+
else if (a === "--topK" || a === "--top-k") out.topK = Number(argv[++i]);
|
|
1161
|
+
else if (a === "--rerankTopK" || a === "--rerank-top-k") out.rerankTopK = Number(argv[++i]);
|
|
1162
|
+
else if (a === "--scopePrefix" || a === "--scope-prefix") out.scopePrefix = argv[++i];
|
|
1163
|
+
else if (a === "--no-ingest") out.ingest = false;
|
|
1164
|
+
else if (a === "--cleanup") out.cleanup = argv[++i] as EvalCleanupPolicy;
|
|
1165
|
+
else if (a === "--threshold") thresholds.push(parseThresholdExpr(argv[++i] ?? ""));
|
|
1166
|
+
else if (a === "--ci") out.ci = true;
|
|
1167
|
+
else if (a === "--allow-assets") out.allowAssets = true;
|
|
1168
|
+
else if (a === "--allow-non-eval-prefix" || a === "--allow-custom-prefix") out.allowNonEvalPrefix = true;
|
|
1169
|
+
else if (a === "--yes" || a === "-y") out.yes = true;
|
|
1170
|
+
else if (a === "--include-ndcg") out.includeNdcg = true;
|
|
1171
|
+
else if (a === "--help" || a === "-h") {
|
|
1172
|
+
printHelp();
|
|
1173
|
+
process.exit(0);
|
|
1174
|
+
}
|
|
1175
|
+
}
|
|
1176
|
+
|
|
1177
|
+
for (const t of thresholds) out.thresholds = mergeThresholds(out.thresholds ?? {}, t);
|
|
1178
|
+
return out;
|
|
1179
|
+
}
|
|
1180
|
+
|
|
1181
|
+
function printHelp() {
|
|
1182
|
+
console.log(
|
|
1183
|
+
[
|
|
1184
|
+
"unrag-eval — retrieval eval harness",
|
|
1185
|
+
"",
|
|
1186
|
+
"Usage:",
|
|
1187
|
+
" bun run scripts/unrag-eval.ts -- --dataset .unrag/eval/datasets/sample.json",
|
|
1188
|
+
"",
|
|
1189
|
+
"Options:",
|
|
1190
|
+
" --dataset <path> Dataset JSON path (required)",
|
|
1191
|
+
" --baseline <report.json> Baseline report for diffing",
|
|
1192
|
+
" --output-dir <dir> Output dir (default: .unrag/eval/runs/<ts>-<datasetId>)",
|
|
1193
|
+
" --mode retrieve|retrieve+rerank Override mode",
|
|
1194
|
+
" --top-k <n> Override topK",
|
|
1195
|
+
" --rerank-top-k <n> In rerank mode, retrieve N candidates before reranking (default: topK*3)",
|
|
1196
|
+
" --scope-prefix <prefix> Override scopePrefix",
|
|
1197
|
+
" --no-ingest Skip dataset document ingest",
|
|
1198
|
+
" --cleanup none|on-success|always Cleanup policy when ingesting",
|
|
1199
|
+
" --threshold <k=v> Repeatable thresholds (e.g. min.recallAtK=0.75)",
|
|
1200
|
+
" --ci CI mode (non-interactive)",
|
|
1201
|
+
" --yes, -y Allow dangerous operations when explicitly enabled",
|
|
1202
|
+
" --allow-assets Allow documents[].assets ingestion (advanced)",
|
|
1203
|
+
" --allow-custom-prefix Allow scopePrefix outside eval:* (dangerous)",
|
|
1204
|
+
" --include-ndcg Compute nDCG@k (optional)",
|
|
1205
|
+
].join("\\n")
|
|
1206
|
+
);
|
|
1207
|
+
}
|
|
1208
|
+
|
|
1209
|
+
async function readConfigFile(projectRoot: string): Promise<any | null> {
|
|
1210
|
+
const abs = path.join(projectRoot, ".unrag/eval/config.json");
|
|
1211
|
+
if (!(await fileExists(abs))) return null;
|
|
1212
|
+
const raw = await readFile(abs, "utf8");
|
|
1213
|
+
try {
|
|
1214
|
+
return JSON.parse(raw);
|
|
1215
|
+
} catch (e) {
|
|
1216
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
1217
|
+
throw new Error(\`Failed to parse .unrag/eval/config.json: \${msg}\`);
|
|
1218
|
+
}
|
|
1219
|
+
}
|
|
1220
|
+
|
|
1221
|
+
function sanitizeMode(v: any): EvalMode | undefined {
|
|
1222
|
+
if (v === "retrieve" || v === "retrieve+rerank") return v;
|
|
1223
|
+
return undefined;
|
|
1224
|
+
}
|
|
1225
|
+
|
|
1226
|
+
function sanitizeCleanup(v: any): EvalCleanupPolicy | undefined {
|
|
1227
|
+
if (v === "none" || v === "on-success" || v === "always") return v;
|
|
1228
|
+
return undefined;
|
|
1229
|
+
}
|
|
1230
|
+
|
|
1231
|
+
async function main() {
|
|
1232
|
+
const projectRoot = path.join(process.cwd());
|
|
1233
|
+
await loadEnvFilesBestEffort(projectRoot);
|
|
1234
|
+
|
|
1235
|
+
const cli = parseArgs(process.argv.slice(2));
|
|
1236
|
+
const cfg = await readConfigFile(projectRoot);
|
|
1237
|
+
|
|
1238
|
+
const datasetPath = cli.dataset ?? cfg?.dataset ?? ".unrag/eval/datasets/sample.json";
|
|
1239
|
+
if (!datasetPath) throw new Error("--dataset is required");
|
|
1240
|
+
|
|
1241
|
+
const engine = createUnragEngine();
|
|
1242
|
+
|
|
1243
|
+
const mode = sanitizeMode(cli.mode ?? cfg?.mode) ?? undefined;
|
|
1244
|
+
const cleanup = sanitizeCleanup(cli.cleanup ?? cfg?.cleanup) ?? undefined;
|
|
1245
|
+
|
|
1246
|
+
const result = await runEval({
|
|
1247
|
+
engine,
|
|
1248
|
+
datasetPath,
|
|
1249
|
+
mode,
|
|
1250
|
+
topK: typeof cli.topK === "number" ? cli.topK : undefined,
|
|
1251
|
+
rerankTopK: typeof cli.rerankTopK === "number" ? cli.rerankTopK : undefined,
|
|
1252
|
+
scopePrefix: typeof cli.scopePrefix === "string" ? cli.scopePrefix : undefined,
|
|
1253
|
+
ingest: typeof cli.ingest === "boolean" ? cli.ingest : (typeof cfg?.ingest === "boolean" ? cfg.ingest : undefined),
|
|
1254
|
+
cleanup,
|
|
1255
|
+
thresholds: mergeThresholds(cfg?.thresholds, cli.thresholds),
|
|
1256
|
+
ci: Boolean(cli.ci),
|
|
1257
|
+
allowAssets: Boolean(cli.allowAssets),
|
|
1258
|
+
allowNonEvalPrefix: Boolean(cli.allowNonEvalPrefix),
|
|
1259
|
+
yes: Boolean(cli.yes),
|
|
1260
|
+
includeNdcg: Boolean(cli.includeNdcg),
|
|
1261
|
+
});
|
|
1262
|
+
|
|
1263
|
+
const outputDir = cli.outputDir ?? cfg?.outputDir ?? result.outputDir;
|
|
1264
|
+
|
|
1265
|
+
const reportPath = await writeEvalReport(outputDir, result.report);
|
|
1266
|
+
const summaryPath = await writeEvalSummaryMd(outputDir, result.report);
|
|
1267
|
+
|
|
1268
|
+
let diffPaths: { json: string; md: string } | null = null;
|
|
1269
|
+
const baselinePath = cli.baseline ?? cfg?.baseline;
|
|
1270
|
+
if (baselinePath) {
|
|
1271
|
+
const baseline = await readEvalReportFromFile(baselinePath);
|
|
1272
|
+
const diff = diffEvalReports({ baseline, candidate: result.report, baselinePath, candidatePath: reportPath });
|
|
1273
|
+
const diffJson = await writeEvalDiffJson(outputDir, diff);
|
|
1274
|
+
const diffMd = await writeEvalDiffMd(outputDir, diff);
|
|
1275
|
+
diffPaths = { json: diffJson, md: diffMd };
|
|
1276
|
+
}
|
|
1277
|
+
|
|
1278
|
+
console.log(
|
|
1279
|
+
[
|
|
1280
|
+
\`[unrag:eval] Wrote report: \${reportPath}\`,
|
|
1281
|
+
\`[unrag:eval] Wrote summary: \${summaryPath}\`,
|
|
1282
|
+
diffPaths ? \`[unrag:eval] Wrote diff: \${diffPaths.json} (+ \${diffPaths.md})\` : "",
|
|
1283
|
+
result.thresholdFailures.length > 0
|
|
1284
|
+
? \`[unrag:eval] Threshold failures:\\n- \${result.thresholdFailures.join("\\n- ")}\`
|
|
1285
|
+
: \`[unrag:eval] Thresholds: pass\`,
|
|
1286
|
+
]
|
|
1287
|
+
.filter(Boolean)
|
|
1288
|
+
.join("\\n")
|
|
1289
|
+
);
|
|
1290
|
+
|
|
1291
|
+
process.exitCode = result.exitCode;
|
|
1292
|
+
}
|
|
1293
|
+
|
|
1294
|
+
main().catch((err) => {
|
|
1295
|
+
const msg = err instanceof Error ? err.stack ?? err.message : String(err);
|
|
1296
|
+
console.error(\`[unrag:eval] Error: \${msg}\`);
|
|
1297
|
+
process.exitCode = 2;
|
|
1298
|
+
});
|
|
1299
|
+
`;
|
|
1300
|
+
}
|
|
1301
|
+
|
|
986
1302
|
// cli/commands/init.ts
|
|
987
1303
|
var CONFIG_FILE = "unrag.json";
|
|
988
1304
|
var CONFIG_VERSION = 1;
|
|
@@ -1069,6 +1385,7 @@ var parseInitArgs = (args) => {
|
|
|
1069
1385
|
};
|
|
1070
1386
|
var toExtractors = (xs) => (Array.isArray(xs) ? xs : []).map((s) => String(s).trim()).filter(Boolean);
|
|
1071
1387
|
var toConnectors = (xs) => (Array.isArray(xs) ? xs : []).map((s) => String(s).trim()).filter(Boolean);
|
|
1388
|
+
var toBatteries = (xs) => (Array.isArray(xs) ? xs : []).map((s) => String(s).trim()).filter(Boolean);
|
|
1072
1389
|
async function initCommand(args) {
|
|
1073
1390
|
const root = await tryFindProjectRoot(process.cwd());
|
|
1074
1391
|
if (!root) {
|
|
@@ -1291,7 +1608,34 @@ async function initCommand(args) {
|
|
|
1291
1608
|
Object.assign(connectorDeps, r.deps);
|
|
1292
1609
|
Object.assign(connectorDevDeps, r.devDeps);
|
|
1293
1610
|
}
|
|
1294
|
-
const
|
|
1611
|
+
const batteriesFromPreset = preset ? Array.from(new Set(toBatteries(preset.modules?.batteries))).sort() : [];
|
|
1612
|
+
const availableBatteryIds = new Set((manifest.batteries ?? []).filter((b) => b.status === "available").map((b) => String(b.id)));
|
|
1613
|
+
if (preset) {
|
|
1614
|
+
const unknown = batteriesFromPreset.filter((b) => !availableBatteryIds.has(b));
|
|
1615
|
+
if (unknown.length > 0) {
|
|
1616
|
+
throw new Error(`Preset contains unknown/unavailable batteries: ${unknown.join(", ")}`);
|
|
1617
|
+
}
|
|
1618
|
+
}
|
|
1619
|
+
if (batteriesFromPreset.length > 0) {
|
|
1620
|
+
for (const battery of batteriesFromPreset) {
|
|
1621
|
+
await copyBatteryFiles({
|
|
1622
|
+
projectRoot: root,
|
|
1623
|
+
registryRoot,
|
|
1624
|
+
installDir,
|
|
1625
|
+
battery,
|
|
1626
|
+
yes: nonInteractive,
|
|
1627
|
+
overwrite: overwritePolicy
|
|
1628
|
+
});
|
|
1629
|
+
}
|
|
1630
|
+
}
|
|
1631
|
+
const batteryDeps = {};
|
|
1632
|
+
const batteryDevDeps = {};
|
|
1633
|
+
for (const b of batteriesFromPreset) {
|
|
1634
|
+
const r = depsForBattery(b);
|
|
1635
|
+
Object.assign(batteryDeps, r.deps);
|
|
1636
|
+
Object.assign(batteryDevDeps, r.devDeps);
|
|
1637
|
+
}
|
|
1638
|
+
const merged = mergeDeps(pkg, { ...deps, ...embeddingDeps.deps, ...extractorDeps, ...connectorDeps, ...batteryDeps }, { ...devDeps, ...embeddingDeps.devDeps, ...extractorDevDeps, ...connectorDevDeps, ...batteryDevDeps });
|
|
1295
1639
|
if (merged.changes.length > 0) {
|
|
1296
1640
|
await writePackageJson(root, merged.pkg);
|
|
1297
1641
|
if (!noInstall) {
|
|
@@ -1308,9 +1652,42 @@ async function initCommand(args) {
|
|
|
1308
1652
|
extractors: Array.from(new Set([
|
|
1309
1653
|
...existing?.extractors ?? [],
|
|
1310
1654
|
...richMediaEnabled ? selectedExtractors : []
|
|
1311
|
-
])).sort()
|
|
1655
|
+
])).sort(),
|
|
1656
|
+
batteries: Array.from(new Set([...existing?.batteries ?? [], ...batteriesFromPreset])).sort()
|
|
1312
1657
|
};
|
|
1313
1658
|
await writeJsonFile(path6.join(root, CONFIG_FILE), config);
|
|
1659
|
+
const writeTextFile = async (absPath, content) => {
|
|
1660
|
+
await ensureDir(path6.dirname(absPath));
|
|
1661
|
+
await writeFile5(absPath, content, "utf8");
|
|
1662
|
+
};
|
|
1663
|
+
const writeIfMissing = async (absPath, content) => {
|
|
1664
|
+
if (await exists(absPath))
|
|
1665
|
+
return false;
|
|
1666
|
+
await writeTextFile(absPath, content);
|
|
1667
|
+
return true;
|
|
1668
|
+
};
|
|
1669
|
+
if (batteriesFromPreset.includes("eval")) {
|
|
1670
|
+
const datasetAbs = path6.join(root, ".unrag/eval/datasets/sample.json");
|
|
1671
|
+
const evalConfigAbs = path6.join(root, ".unrag/eval/config.json");
|
|
1672
|
+
const scriptAbs = path6.join(root, "scripts/unrag-eval.ts");
|
|
1673
|
+
await writeIfMissing(datasetAbs, JSON.stringify(EVAL_SAMPLE_DATASET_V1, null, 2) + `
|
|
1674
|
+
`);
|
|
1675
|
+
await writeIfMissing(evalConfigAbs, JSON.stringify(EVAL_CONFIG_DEFAULT, null, 2) + `
|
|
1676
|
+
`);
|
|
1677
|
+
await writeIfMissing(scriptAbs, renderEvalRunnerScript({ installDir }));
|
|
1678
|
+
const pkg2 = await readPackageJson(root);
|
|
1679
|
+
const existingScripts = pkg2.scripts ?? {};
|
|
1680
|
+
const toAdd = {};
|
|
1681
|
+
for (const [name, cmd] of Object.entries(EVAL_PACKAGE_JSON_SCRIPTS)) {
|
|
1682
|
+
if (!(name in existingScripts)) {
|
|
1683
|
+
toAdd[name] = cmd;
|
|
1684
|
+
}
|
|
1685
|
+
}
|
|
1686
|
+
if (Object.keys(toAdd).length > 0) {
|
|
1687
|
+
pkg2.scripts = { ...existingScripts, ...toAdd };
|
|
1688
|
+
await writePackageJson(root, pkg2);
|
|
1689
|
+
}
|
|
1690
|
+
}
|
|
1314
1691
|
const pm = await detectPackageManager(root);
|
|
1315
1692
|
const installLine = merged.changes.length === 0 ? "Dependencies already satisfied." : noInstall ? `Next: run \`${installCmd(pm)}\`` : "Dependencies installed.";
|
|
1316
1693
|
const isNext = Boolean((merged.pkg.dependencies ?? {})["next"]) || Boolean((merged.pkg.devDependencies ?? {})["next"]);
|
|
@@ -1437,12 +1814,92 @@ async function initCommand(args) {
|
|
|
1437
1814
|
}
|
|
1438
1815
|
|
|
1439
1816
|
// cli/commands/add.ts
|
|
1440
|
-
import { outro as outro2 } from "@clack/prompts";
|
|
1817
|
+
import { cancel as cancel3, confirm as confirm3, isCancel as isCancel3, outro as outro2, select as select2, text as text2 } from "@clack/prompts";
|
|
1818
|
+
import { writeFile as writeFile6 } from "node:fs/promises";
|
|
1441
1819
|
import path7 from "node:path";
|
|
1442
1820
|
import { fileURLToPath as fileURLToPath2 } from "node:url";
|
|
1443
1821
|
var CONFIG_FILE2 = "unrag.json";
|
|
1444
1822
|
var __filename3 = fileURLToPath2(import.meta.url);
|
|
1445
1823
|
var __dirname3 = path7.dirname(__filename3);
|
|
1824
|
+
var writeTextFile = async (absPath, content) => {
|
|
1825
|
+
await ensureDir(path7.dirname(absPath));
|
|
1826
|
+
await writeFile6(absPath, content, "utf8");
|
|
1827
|
+
};
|
|
1828
|
+
var shouldWriteFile = async (absPath, projectRoot, nonInteractive) => {
|
|
1829
|
+
if (!await exists(absPath))
|
|
1830
|
+
return true;
|
|
1831
|
+
if (nonInteractive)
|
|
1832
|
+
return false;
|
|
1833
|
+
const answer = await confirm3({
|
|
1834
|
+
message: `Overwrite ${path7.relative(projectRoot, absPath)}?`,
|
|
1835
|
+
initialValue: false
|
|
1836
|
+
});
|
|
1837
|
+
if (isCancel3(answer)) {
|
|
1838
|
+
cancel3("Cancelled.");
|
|
1839
|
+
return false;
|
|
1840
|
+
}
|
|
1841
|
+
return Boolean(answer);
|
|
1842
|
+
};
|
|
1843
|
+
var addPackageJsonScripts = async (args) => {
|
|
1844
|
+
const existing = args.pkg.scripts ?? {};
|
|
1845
|
+
const desired = args.scripts;
|
|
1846
|
+
const conflicting = Object.keys(desired).filter((k) => (k in existing));
|
|
1847
|
+
const toAdd = { ...desired };
|
|
1848
|
+
if (conflicting.length > 0 && args.nonInteractive) {
|
|
1849
|
+
for (const k of conflicting)
|
|
1850
|
+
delete toAdd[k];
|
|
1851
|
+
}
|
|
1852
|
+
if (conflicting.length > 0 && !args.nonInteractive) {
|
|
1853
|
+
for (const scriptName of conflicting) {
|
|
1854
|
+
const action = await select2({
|
|
1855
|
+
message: `Script "${scriptName}" already exists. What would you like to do?`,
|
|
1856
|
+
options: [
|
|
1857
|
+
{ value: "keep", label: "Keep existing", hint: existing[scriptName] },
|
|
1858
|
+
{ value: "overwrite", label: "Overwrite", hint: desired[scriptName] },
|
|
1859
|
+
{ value: "rename", label: "Add with different name", hint: `${scriptName}:new` }
|
|
1860
|
+
],
|
|
1861
|
+
initialValue: "keep"
|
|
1862
|
+
});
|
|
1863
|
+
if (isCancel3(action)) {
|
|
1864
|
+
cancel3("Cancelled.");
|
|
1865
|
+
return { added: [], kept: Object.keys(desired) };
|
|
1866
|
+
}
|
|
1867
|
+
if (action === "keep") {
|
|
1868
|
+
delete toAdd[scriptName];
|
|
1869
|
+
continue;
|
|
1870
|
+
}
|
|
1871
|
+
if (action === "rename") {
|
|
1872
|
+
const newName = await text2({
|
|
1873
|
+
message: `New script name for ${scriptName}`,
|
|
1874
|
+
initialValue: `${scriptName}:new`,
|
|
1875
|
+
validate: (v) => {
|
|
1876
|
+
const s = String(v).trim();
|
|
1877
|
+
if (!s)
|
|
1878
|
+
return "Script name is required";
|
|
1879
|
+
if (s in existing || s in toAdd)
|
|
1880
|
+
return "Script name already exists";
|
|
1881
|
+
return;
|
|
1882
|
+
}
|
|
1883
|
+
});
|
|
1884
|
+
if (isCancel3(newName)) {
|
|
1885
|
+
cancel3("Cancelled.");
|
|
1886
|
+
return { added: [], kept: Object.keys(desired) };
|
|
1887
|
+
}
|
|
1888
|
+
const nextName = String(newName).trim();
|
|
1889
|
+
const value = toAdd[scriptName];
|
|
1890
|
+
delete toAdd[scriptName];
|
|
1891
|
+
toAdd[nextName] = value;
|
|
1892
|
+
}
|
|
1893
|
+
}
|
|
1894
|
+
}
|
|
1895
|
+
const added = Object.keys(toAdd);
|
|
1896
|
+
if (added.length > 0) {
|
|
1897
|
+
args.pkg.scripts = { ...existing, ...toAdd };
|
|
1898
|
+
await writePackageJson(args.projectRoot, args.pkg);
|
|
1899
|
+
}
|
|
1900
|
+
const kept = conflicting.filter((k) => !(k in toAdd));
|
|
1901
|
+
return { added, kept };
|
|
1902
|
+
};
|
|
1446
1903
|
var parseAddArgs = (args) => {
|
|
1447
1904
|
const out = {};
|
|
1448
1905
|
for (let i = 0;i < args.length; i++) {
|
|
@@ -1539,6 +1996,339 @@ Available batteries: ${Array.from(availableBatteries).join(", ")}`);
|
|
|
1539
1996
|
}
|
|
1540
1997
|
const batteries = Array.from(new Set([...config.batteries ?? [], battery])).sort();
|
|
1541
1998
|
await writeJsonFile(configPath, { ...config, batteries });
|
|
1999
|
+
if (battery === "eval") {
|
|
2000
|
+
const datasetAbs = path7.join(root, ".unrag/eval/datasets/sample.json");
|
|
2001
|
+
const configAbs = path7.join(root, ".unrag/eval/config.json");
|
|
2002
|
+
const scriptAbs = path7.join(root, "scripts/unrag-eval.ts");
|
|
2003
|
+
const sampleDataset = {
|
|
2004
|
+
version: "1",
|
|
2005
|
+
id: "sample",
|
|
2006
|
+
description: "Tiny dataset to validate retrieval changes.",
|
|
2007
|
+
defaults: {
|
|
2008
|
+
topK: 10,
|
|
2009
|
+
scopePrefix: "eval:sample:",
|
|
2010
|
+
mode: "retrieve",
|
|
2011
|
+
thresholds: { min: { recallAtK: 0.75 } }
|
|
2012
|
+
},
|
|
2013
|
+
documents: [
|
|
2014
|
+
{
|
|
2015
|
+
sourceId: "eval:sample:doc:refund-policy",
|
|
2016
|
+
content: "Refunds are available within 30 days of purchase, provided you have a receipt."
|
|
2017
|
+
},
|
|
2018
|
+
{
|
|
2019
|
+
sourceId: "eval:sample:doc:contact-support",
|
|
2020
|
+
content: "Contact support by emailing support@example.com. Response times are typically under 24 hours."
|
|
2021
|
+
}
|
|
2022
|
+
],
|
|
2023
|
+
queries: [
|
|
2024
|
+
{
|
|
2025
|
+
id: "q_refund_window",
|
|
2026
|
+
query: "How long do I have to request a refund?",
|
|
2027
|
+
relevant: { sourceIds: ["eval:sample:doc:refund-policy"] }
|
|
2028
|
+
},
|
|
2029
|
+
{
|
|
2030
|
+
id: "q_contact_support",
|
|
2031
|
+
query: "How do I contact support?",
|
|
2032
|
+
relevant: { sourceIds: ["eval:sample:doc:contact-support"] }
|
|
2033
|
+
}
|
|
2034
|
+
]
|
|
2035
|
+
};
|
|
2036
|
+
const evalConfig = {
|
|
2037
|
+
thresholds: { min: { recallAtK: 0.75 } },
|
|
2038
|
+
cleanup: "none",
|
|
2039
|
+
ingest: true
|
|
2040
|
+
};
|
|
2041
|
+
const installImportBase = `../${config.installDir.replace(/\\/g, "/")}`;
|
|
2042
|
+
const script = `/**
|
|
2043
|
+
* Unrag eval runner entrypoint (generated).
|
|
2044
|
+
*
|
|
2045
|
+
* You own this file — customize it freely.
|
|
2046
|
+
*/
|
|
2047
|
+
|
|
2048
|
+
import path from "node:path";
|
|
2049
|
+
import { access, readFile } from "node:fs/promises";
|
|
2050
|
+
|
|
2051
|
+
import { createUnragEngine } from "../unrag.config";
|
|
2052
|
+
import {
|
|
2053
|
+
runEval,
|
|
2054
|
+
readEvalReportFromFile,
|
|
2055
|
+
writeEvalReport,
|
|
2056
|
+
writeEvalSummaryMd,
|
|
2057
|
+
diffEvalReports,
|
|
2058
|
+
writeEvalDiffJson,
|
|
2059
|
+
writeEvalDiffMd,
|
|
2060
|
+
type EvalMode,
|
|
2061
|
+
type EvalThresholds,
|
|
2062
|
+
type EvalCleanupPolicy,
|
|
2063
|
+
} from "${installImportBase}/eval";
|
|
2064
|
+
|
|
2065
|
+
type CliArgs = {
|
|
2066
|
+
dataset?: string;
|
|
2067
|
+
baseline?: string;
|
|
2068
|
+
outputDir?: string;
|
|
2069
|
+
mode?: EvalMode;
|
|
2070
|
+
topK?: number;
|
|
2071
|
+
rerankTopK?: number;
|
|
2072
|
+
scopePrefix?: string;
|
|
2073
|
+
ingest?: boolean;
|
|
2074
|
+
cleanup?: EvalCleanupPolicy;
|
|
2075
|
+
thresholds?: Partial<EvalThresholds>;
|
|
2076
|
+
ci?: boolean;
|
|
2077
|
+
allowAssets?: boolean;
|
|
2078
|
+
allowNonEvalPrefix?: boolean;
|
|
2079
|
+
yes?: boolean;
|
|
2080
|
+
includeNdcg?: boolean;
|
|
2081
|
+
};
|
|
2082
|
+
|
|
2083
|
+
async function fileExists(p: string): Promise<boolean> {
|
|
2084
|
+
try {
|
|
2085
|
+
await access(p);
|
|
2086
|
+
return true;
|
|
2087
|
+
} catch {
|
|
2088
|
+
return false;
|
|
2089
|
+
}
|
|
2090
|
+
}
|
|
2091
|
+
|
|
2092
|
+
async function loadEnvFilesBestEffort(projectRoot: string) {
|
|
2093
|
+
const nodeEnv = process.env.NODE_ENV ?? "development";
|
|
2094
|
+
const candidates = [
|
|
2095
|
+
".env",
|
|
2096
|
+
".env.local",
|
|
2097
|
+
\`.env.\${nodeEnv}\`,
|
|
2098
|
+
\`.env.\${nodeEnv}.local\`,
|
|
2099
|
+
];
|
|
2100
|
+
for (const rel of candidates) {
|
|
2101
|
+
const abs = path.join(projectRoot, rel);
|
|
2102
|
+
if (!(await fileExists(abs))) continue;
|
|
2103
|
+
const raw = await readFile(abs, "utf8").catch(() => "");
|
|
2104
|
+
for (const line of raw.split(/\\r?\\n/)) {
|
|
2105
|
+
const s = line.trim();
|
|
2106
|
+
if (!s || s.startsWith("#")) continue;
|
|
2107
|
+
const eq = s.indexOf("=");
|
|
2108
|
+
if (eq < 0) continue;
|
|
2109
|
+
const key = s.slice(0, eq).trim();
|
|
2110
|
+
const value = s.slice(eq + 1).trim().replace(/^"|"$/g, "");
|
|
2111
|
+
if (!key) continue;
|
|
2112
|
+
if (process.env[key] === undefined) process.env[key] = value;
|
|
2113
|
+
}
|
|
2114
|
+
}
|
|
2115
|
+
}
|
|
2116
|
+
|
|
2117
|
+
function parseThresholdExpr(expr: string): Partial<EvalThresholds> {
|
|
2118
|
+
const s = String(expr ?? "").trim();
|
|
2119
|
+
const eq = s.indexOf("=");
|
|
2120
|
+
if (eq < 0) throw new Error(\`Invalid --threshold: "\${s}" (expected key=value)\`);
|
|
2121
|
+
const key = s.slice(0, eq).trim();
|
|
2122
|
+
const value = Number(s.slice(eq + 1).trim());
|
|
2123
|
+
if (!Number.isFinite(value)) throw new Error(\`Invalid --threshold value: "\${s}"\`);
|
|
2124
|
+
|
|
2125
|
+
const out: Partial<EvalThresholds> = {};
|
|
2126
|
+
if (key === "min.hitAtK") out.min = { hitAtK: value };
|
|
2127
|
+
else if (key === "min.recallAtK") out.min = { recallAtK: value };
|
|
2128
|
+
else if (key === "min.mrrAtK") out.min = { mrrAtK: value };
|
|
2129
|
+
else if (key === "max.p95TotalMs") out.max = { p95TotalMs: value };
|
|
2130
|
+
else throw new Error(\`Unknown threshold key: "\${key}"\`);
|
|
2131
|
+
return out;
|
|
2132
|
+
}
|
|
2133
|
+
|
|
2134
|
+
function mergeThresholds(a: Partial<EvalThresholds>, b: Partial<EvalThresholds>): Partial<EvalThresholds> {
|
|
2135
|
+
return {
|
|
2136
|
+
min: { ...(a.min ?? {}), ...(b.min ?? {}) },
|
|
2137
|
+
max: { ...(a.max ?? {}), ...(b.max ?? {}) },
|
|
2138
|
+
};
|
|
2139
|
+
}
|
|
2140
|
+
|
|
2141
|
+
function parseArgs(argv: string[]): CliArgs {
|
|
2142
|
+
const out: CliArgs = {};
|
|
2143
|
+
const thresholds: Partial<EvalThresholds>[] = [];
|
|
2144
|
+
|
|
2145
|
+
for (let i = 0; i < argv.length; i++) {
|
|
2146
|
+
const a = argv[i];
|
|
2147
|
+
if (a === "--dataset") out.dataset = argv[++i];
|
|
2148
|
+
else if (a === "--baseline") out.baseline = argv[++i];
|
|
2149
|
+
else if (a === "--outputDir" || a === "--output-dir") out.outputDir = argv[++i];
|
|
2150
|
+
else if (a === "--mode") out.mode = argv[++i] as EvalMode;
|
|
2151
|
+
else if (a === "--topK" || a === "--top-k") out.topK = Number(argv[++i]);
|
|
2152
|
+
else if (a === "--rerankTopK" || a === "--rerank-top-k") out.rerankTopK = Number(argv[++i]);
|
|
2153
|
+
else if (a === "--scopePrefix" || a === "--scope-prefix") out.scopePrefix = argv[++i];
|
|
2154
|
+
else if (a === "--no-ingest") out.ingest = false;
|
|
2155
|
+
else if (a === "--cleanup") out.cleanup = argv[++i] as EvalCleanupPolicy;
|
|
2156
|
+
else if (a === "--threshold") thresholds.push(parseThresholdExpr(argv[++i] ?? ""));
|
|
2157
|
+
else if (a === "--ci") out.ci = true;
|
|
2158
|
+
else if (a === "--allow-assets") out.allowAssets = true;
|
|
2159
|
+
else if (a === "--allow-non-eval-prefix" || a === "--allow-custom-prefix") out.allowNonEvalPrefix = true;
|
|
2160
|
+
else if (a === "--yes" || a === "-y") out.yes = true;
|
|
2161
|
+
else if (a === "--include-ndcg") out.includeNdcg = true;
|
|
2162
|
+
else if (a === "--help" || a === "-h") {
|
|
2163
|
+
printHelp();
|
|
2164
|
+
process.exit(0);
|
|
2165
|
+
}
|
|
2166
|
+
}
|
|
2167
|
+
|
|
2168
|
+
for (const t of thresholds) out.thresholds = mergeThresholds(out.thresholds ?? {}, t);
|
|
2169
|
+
return out;
|
|
2170
|
+
}
|
|
2171
|
+
|
|
2172
|
+
function printHelp() {
|
|
2173
|
+
console.log(
|
|
2174
|
+
[
|
|
2175
|
+
"unrag-eval — retrieval eval harness",
|
|
2176
|
+
"",
|
|
2177
|
+
"Usage:",
|
|
2178
|
+
" bun run scripts/unrag-eval.ts -- --dataset .unrag/eval/datasets/sample.json",
|
|
2179
|
+
"",
|
|
2180
|
+
"Options:",
|
|
2181
|
+
" --dataset <path> Dataset JSON path (required)",
|
|
2182
|
+
" --baseline <report.json> Baseline report for diffing",
|
|
2183
|
+
" --output-dir <dir> Output dir (default: .unrag/eval/runs/<ts>-<datasetId>)",
|
|
2184
|
+
" --mode retrieve|retrieve+rerank Override mode",
|
|
2185
|
+
" --top-k <n> Override topK",
|
|
2186
|
+
" --rerank-top-k <n> In rerank mode, retrieve N candidates before reranking (default: topK*3)",
|
|
2187
|
+
" --scope-prefix <prefix> Override scopePrefix",
|
|
2188
|
+
" --no-ingest Skip dataset document ingest",
|
|
2189
|
+
" --cleanup none|on-success|always Cleanup policy when ingesting",
|
|
2190
|
+
" --threshold <k=v> Repeatable thresholds (e.g. min.recallAtK=0.75)",
|
|
2191
|
+
" --ci CI mode (non-interactive)",
|
|
2192
|
+
" --yes, -y Allow dangerous operations when explicitly enabled",
|
|
2193
|
+
" --allow-assets Allow documents[].assets ingestion (advanced)",
|
|
2194
|
+
" --allow-custom-prefix Allow scopePrefix outside eval:* (dangerous)",
|
|
2195
|
+
" --include-ndcg Compute nDCG@k (optional)",
|
|
2196
|
+
].join("\\n")
|
|
2197
|
+
);
|
|
2198
|
+
}
|
|
2199
|
+
|
|
2200
|
+
async function readConfigFile(projectRoot: string): Promise<any | null> {
|
|
2201
|
+
const abs = path.join(projectRoot, ".unrag/eval/config.json");
|
|
2202
|
+
if (!(await fileExists(abs))) return null;
|
|
2203
|
+
const raw = await readFile(abs, "utf8");
|
|
2204
|
+
try {
|
|
2205
|
+
return JSON.parse(raw);
|
|
2206
|
+
} catch (e) {
|
|
2207
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
2208
|
+
throw new Error(\`Failed to parse .unrag/eval/config.json: \${msg}\`);
|
|
2209
|
+
}
|
|
2210
|
+
}
|
|
2211
|
+
|
|
2212
|
+
function sanitizeMode(v: any): EvalMode | undefined {
|
|
2213
|
+
if (v === "retrieve" || v === "retrieve+rerank") return v;
|
|
2214
|
+
return undefined;
|
|
2215
|
+
}
|
|
2216
|
+
|
|
2217
|
+
function sanitizeCleanup(v: any): EvalCleanupPolicy | undefined {
|
|
2218
|
+
if (v === "none" || v === "on-success" || v === "always") return v;
|
|
2219
|
+
return undefined;
|
|
2220
|
+
}
|
|
2221
|
+
|
|
2222
|
+
async function main() {
|
|
2223
|
+
const projectRoot = path.join(process.cwd());
|
|
2224
|
+
await loadEnvFilesBestEffort(projectRoot);
|
|
2225
|
+
|
|
2226
|
+
const cli = parseArgs(process.argv.slice(2));
|
|
2227
|
+
const cfg = await readConfigFile(projectRoot);
|
|
2228
|
+
|
|
2229
|
+
const datasetPath = cli.dataset ?? cfg?.dataset ?? ".unrag/eval/datasets/sample.json";
|
|
2230
|
+
if (!datasetPath) throw new Error("--dataset is required");
|
|
2231
|
+
|
|
2232
|
+
const engine = createUnragEngine();
|
|
2233
|
+
|
|
2234
|
+
const thresholds: Partial<EvalThresholds> = mergeThresholds(cfg?.thresholds ?? {}, cli.thresholds ?? {});
|
|
2235
|
+
|
|
2236
|
+
const result = await runEval({
|
|
2237
|
+
engine,
|
|
2238
|
+
datasetPath,
|
|
2239
|
+
mode: cli.mode ?? sanitizeMode(cfg?.mode),
|
|
2240
|
+
topK: cli.topK ?? (typeof cfg?.topK === "number" ? cfg.topK : undefined),
|
|
2241
|
+
rerankTopK: cli.rerankTopK ?? (typeof cfg?.rerankTopK === "number" ? cfg.rerankTopK : undefined),
|
|
2242
|
+
scopePrefix: cli.scopePrefix ?? (typeof cfg?.scopePrefix === "string" ? cfg.scopePrefix : undefined),
|
|
2243
|
+
ingest: cli.ingest ?? (typeof cfg?.ingest === "boolean" ? cfg.ingest : undefined),
|
|
2244
|
+
cleanup: cli.cleanup ?? sanitizeCleanup(cfg?.cleanup) ?? "none",
|
|
2245
|
+
includeNdcg: cli.includeNdcg ?? Boolean(cfg?.includeNdcg),
|
|
2246
|
+
allowAssets: cli.allowAssets ?? Boolean(cfg?.allowAssets),
|
|
2247
|
+
allowNonEvalPrefix: cli.allowNonEvalPrefix ?? Boolean(cfg?.allowNonEvalPrefix),
|
|
2248
|
+
confirmedDangerousDelete: Boolean(cli.yes),
|
|
2249
|
+
thresholds,
|
|
2250
|
+
});
|
|
2251
|
+
|
|
2252
|
+
const ts = new Date().toISOString().replace(/[:.]/g, "-");
|
|
2253
|
+
const outputDir =
|
|
2254
|
+
cli.outputDir ??
|
|
2255
|
+
cfg?.outputDir ??
|
|
2256
|
+
path.join(".unrag/eval/runs", \`\${ts}-\${result.report.dataset.id}\`);
|
|
2257
|
+
|
|
2258
|
+
const reportPath = await writeEvalReport(outputDir, result.report);
|
|
2259
|
+
const summaryPath = await writeEvalSummaryMd(outputDir, result.report);
|
|
2260
|
+
|
|
2261
|
+
let diffPaths: { json: string; md: string } | null = null;
|
|
2262
|
+
const baselinePath = cli.baseline ?? cfg?.baseline;
|
|
2263
|
+
if (baselinePath) {
|
|
2264
|
+
const baseline = await readEvalReportFromFile(baselinePath);
|
|
2265
|
+
const diff = diffEvalReports({ baseline, candidate: result.report, baselinePath, candidatePath: reportPath });
|
|
2266
|
+
const diffJson = await writeEvalDiffJson(outputDir, diff);
|
|
2267
|
+
const diffMd = await writeEvalDiffMd(outputDir, diff);
|
|
2268
|
+
diffPaths = { json: diffJson, md: diffMd };
|
|
2269
|
+
}
|
|
2270
|
+
|
|
2271
|
+
console.log(
|
|
2272
|
+
[
|
|
2273
|
+
\`[unrag:eval] Wrote report: \${reportPath}\`,
|
|
2274
|
+
\`[unrag:eval] Wrote summary: \${summaryPath}\`,
|
|
2275
|
+
diffPaths ? \`[unrag:eval] Wrote diff: \${diffPaths.json} (+ \${diffPaths.md})\` : "",
|
|
2276
|
+
result.thresholdFailures.length > 0
|
|
2277
|
+
? \`[unrag:eval] Threshold failures:\\n- \${result.thresholdFailures.join("\\n- ")}\`
|
|
2278
|
+
: \`[unrag:eval] Thresholds: pass\`,
|
|
2279
|
+
]
|
|
2280
|
+
.filter(Boolean)
|
|
2281
|
+
.join("\\n")
|
|
2282
|
+
);
|
|
2283
|
+
|
|
2284
|
+
process.exitCode = result.exitCode;
|
|
2285
|
+
}
|
|
2286
|
+
|
|
2287
|
+
main().catch((err) => {
|
|
2288
|
+
const msg = err instanceof Error ? err.stack ?? err.message : String(err);
|
|
2289
|
+
console.error(\`[unrag:eval] Error: \${msg}\`);
|
|
2290
|
+
process.exitCode = 2;
|
|
2291
|
+
});
|
|
2292
|
+
`;
|
|
2293
|
+
if (await shouldWriteFile(datasetAbs, root, nonInteractive)) {
|
|
2294
|
+
await writeTextFile(datasetAbs, JSON.stringify(sampleDataset, null, 2) + `
|
|
2295
|
+
`);
|
|
2296
|
+
}
|
|
2297
|
+
if (await shouldWriteFile(configAbs, root, nonInteractive)) {
|
|
2298
|
+
await writeTextFile(configAbs, JSON.stringify(evalConfig, null, 2) + `
|
|
2299
|
+
`);
|
|
2300
|
+
}
|
|
2301
|
+
if (await shouldWriteFile(scriptAbs, root, nonInteractive)) {
|
|
2302
|
+
await writeTextFile(scriptAbs, script);
|
|
2303
|
+
}
|
|
2304
|
+
const scriptsToAdd = {
|
|
2305
|
+
"unrag:eval": `bun run scripts/unrag-eval.ts -- --dataset .unrag/eval/datasets/sample.json`,
|
|
2306
|
+
"unrag:eval:ci": `bun run scripts/unrag-eval.ts -- --dataset .unrag/eval/datasets/sample.json --ci`
|
|
2307
|
+
};
|
|
2308
|
+
const scriptsResult = await addPackageJsonScripts({
|
|
2309
|
+
projectRoot: root,
|
|
2310
|
+
pkg,
|
|
2311
|
+
scripts: scriptsToAdd,
|
|
2312
|
+
nonInteractive
|
|
2313
|
+
});
|
|
2314
|
+
outro2([
|
|
2315
|
+
`Installed battery: ${battery}.`,
|
|
2316
|
+
"",
|
|
2317
|
+
`- Code: ${path7.join(config.installDir, "eval")}`,
|
|
2318
|
+
"",
|
|
2319
|
+
`- Dataset: ${path7.relative(root, datasetAbs)}`,
|
|
2320
|
+
`- Script: ${path7.relative(root, scriptAbs)}`,
|
|
2321
|
+
"",
|
|
2322
|
+
scriptsResult.added.length > 0 ? `Added scripts: ${scriptsResult.added.join(", ")}` : "Added scripts: none",
|
|
2323
|
+
scriptsResult.kept.length > 0 ? `Kept existing scripts: ${scriptsResult.kept.join(", ")}` : "",
|
|
2324
|
+
"",
|
|
2325
|
+
"Next:",
|
|
2326
|
+
" bun run unrag:eval",
|
|
2327
|
+
" bun run unrag:eval:ci"
|
|
2328
|
+
].filter(Boolean).join(`
|
|
2329
|
+
`));
|
|
2330
|
+
return;
|
|
2331
|
+
}
|
|
1542
2332
|
const wiringSnippet = battery === "reranker" ? [
|
|
1543
2333
|
"",
|
|
1544
2334
|
"Next steps:",
|
|
@@ -2776,14 +3566,15 @@ async function runDbChecks(state, options) {
|
|
|
2776
3566
|
summary: `Using ${dbUrlResult.source}`,
|
|
2777
3567
|
details: [redactConnectionString(dbUrlResult.url)]
|
|
2778
3568
|
});
|
|
2779
|
-
let
|
|
3569
|
+
let end;
|
|
2780
3570
|
try {
|
|
2781
3571
|
const pg = await import("pg");
|
|
2782
3572
|
const Pool = pg.default?.Pool ?? pg.Pool;
|
|
2783
3573
|
const pool = new Pool({ connectionString: dbUrlResult.url });
|
|
2784
|
-
|
|
3574
|
+
end = () => pool.end();
|
|
3575
|
+
const client = {
|
|
2785
3576
|
query: (sql, params) => pool.query(sql, params),
|
|
2786
|
-
end
|
|
3577
|
+
end
|
|
2787
3578
|
};
|
|
2788
3579
|
const connectivityResult = await checkConnectivity(client);
|
|
2789
3580
|
results.push(connectivityResult);
|
|
@@ -2796,6 +3587,10 @@ async function runDbChecks(state, options) {
|
|
|
2796
3587
|
const tableNames = await inferTableNames(installDirFull ?? "", state.storeAdapter);
|
|
2797
3588
|
const schemaResults = await checkSchema(client, options.schema, tableNames);
|
|
2798
3589
|
results.push(...schemaResults);
|
|
3590
|
+
const uniquenessResult = await checkSourceIdUniqueness(client, options.schema, tableNames);
|
|
3591
|
+
results.push(uniquenessResult);
|
|
3592
|
+
const duplicatesResult = await checkDuplicateSourceIds(client, options.schema, tableNames);
|
|
3593
|
+
results.push(duplicatesResult);
|
|
2799
3594
|
const indexResults = await checkIndexes(client, options.schema, tableNames);
|
|
2800
3595
|
results.push(...indexResults);
|
|
2801
3596
|
const dimensionResults = await checkDimensionConsistency(client, options.schema, tableNames, options.scope);
|
|
@@ -2814,9 +3609,8 @@ async function runDbChecks(state, options) {
|
|
|
2814
3609
|
]
|
|
2815
3610
|
});
|
|
2816
3611
|
} finally {
|
|
2817
|
-
if (
|
|
2818
|
-
await
|
|
2819
|
-
}
|
|
3612
|
+
if (end)
|
|
3613
|
+
await end().catch(() => {});
|
|
2820
3614
|
}
|
|
2821
3615
|
return results;
|
|
2822
3616
|
}
|
|
@@ -3083,6 +3877,135 @@ async function checkForeignKeys(client, schema, tableNames) {
|
|
|
3083
3877
|
};
|
|
3084
3878
|
}
|
|
3085
3879
|
}
|
|
3880
|
+
async function checkSourceIdUniqueness(client, schema, tableNames) {
|
|
3881
|
+
try {
|
|
3882
|
+
const uniqueConstraintResult = await client.query(`SELECT con.conname as constraint_name
|
|
3883
|
+
FROM pg_constraint con
|
|
3884
|
+
JOIN pg_class t ON t.oid = con.conrelid
|
|
3885
|
+
JOIN pg_namespace n ON n.oid = t.relnamespace
|
|
3886
|
+
WHERE n.nspname = $1
|
|
3887
|
+
AND t.relname = $2
|
|
3888
|
+
AND con.contype = 'u'
|
|
3889
|
+
AND array_length(con.conkey, 1) = 1
|
|
3890
|
+
AND (
|
|
3891
|
+
SELECT a.attname
|
|
3892
|
+
FROM pg_attribute a
|
|
3893
|
+
WHERE a.attrelid = t.oid AND a.attnum = con.conkey[1]
|
|
3894
|
+
) = 'source_id'`, [schema, tableNames.documents]);
|
|
3895
|
+
if (uniqueConstraintResult.rows.length > 0) {
|
|
3896
|
+
return {
|
|
3897
|
+
id: "db-sourceid-unique",
|
|
3898
|
+
title: "documents.source_id uniqueness",
|
|
3899
|
+
status: "pass",
|
|
3900
|
+
summary: "UNIQUE constraint exists on documents.source_id.",
|
|
3901
|
+
details: [`Constraint: ${uniqueConstraintResult.rows[0].constraint_name}`]
|
|
3902
|
+
};
|
|
3903
|
+
}
|
|
3904
|
+
const uniqueIndexResult = await client.query(`SELECT i.relname as indexname, pg_get_indexdef(i.oid) as indexdef
|
|
3905
|
+
FROM pg_index ix
|
|
3906
|
+
JOIN pg_class t ON t.oid = ix.indrelid
|
|
3907
|
+
JOIN pg_class i ON i.oid = ix.indexrelid
|
|
3908
|
+
JOIN pg_namespace n ON n.oid = t.relnamespace
|
|
3909
|
+
WHERE n.nspname = $1
|
|
3910
|
+
AND t.relname = $2
|
|
3911
|
+
AND ix.indisunique = true
|
|
3912
|
+
AND ix.indexprs IS NULL
|
|
3913
|
+
AND ix.indpred IS NULL
|
|
3914
|
+
-- Ensure key columns are exactly (source_id). This also allows INCLUDE columns.
|
|
3915
|
+
AND pg_get_indexdef(i.oid) ~* '\\\\(\\\\s*"?source_id"?\\\\s*\\\\)'`, [schema, tableNames.documents]);
|
|
3916
|
+
if (uniqueIndexResult.rows.length > 0) {
|
|
3917
|
+
return {
|
|
3918
|
+
id: "db-sourceid-unique",
|
|
3919
|
+
title: "documents.source_id uniqueness",
|
|
3920
|
+
status: "pass",
|
|
3921
|
+
summary: "UNIQUE index exists on documents.source_id.",
|
|
3922
|
+
details: [`Index: ${uniqueIndexResult.rows[0].indexname}`]
|
|
3923
|
+
};
|
|
3924
|
+
}
|
|
3925
|
+
return {
|
|
3926
|
+
id: "db-sourceid-unique",
|
|
3927
|
+
title: "documents.source_id uniqueness",
|
|
3928
|
+
status: "fail",
|
|
3929
|
+
summary: "Missing UNIQUE constraint on documents.source_id.",
|
|
3930
|
+
details: [
|
|
3931
|
+
"Unrag requires a unique constraint on documents.source_id for idempotent ingestion.",
|
|
3932
|
+
"Without this constraint, concurrent ingests for the same sourceId may create duplicates."
|
|
3933
|
+
],
|
|
3934
|
+
fixHints: [
|
|
3935
|
+
`ALTER TABLE ${schema}.${tableNames.documents} ADD CONSTRAINT ${tableNames.documents}_source_id_key UNIQUE (source_id);`,
|
|
3936
|
+
"-- Or create a unique index:",
|
|
3937
|
+
`CREATE UNIQUE INDEX ${tableNames.documents}_source_id_unique_idx ON ${schema}.${tableNames.documents}(source_id);`
|
|
3938
|
+
],
|
|
3939
|
+
docsLink: docsUrl("/docs/getting-started/database#schema-requirements")
|
|
3940
|
+
};
|
|
3941
|
+
} catch (err) {
|
|
3942
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
3943
|
+
return {
|
|
3944
|
+
id: "db-sourceid-unique",
|
|
3945
|
+
title: "documents.source_id uniqueness",
|
|
3946
|
+
status: "fail",
|
|
3947
|
+
summary: `Could not check uniqueness constraint: ${message}`
|
|
3948
|
+
};
|
|
3949
|
+
}
|
|
3950
|
+
}
|
|
3951
|
+
async function checkDuplicateSourceIds(client, schema, tableNames) {
|
|
3952
|
+
try {
|
|
3953
|
+
const countResult = await client.query(`SELECT COUNT(*) as duplicate_count
|
|
3954
|
+
FROM (
|
|
3955
|
+
SELECT source_id
|
|
3956
|
+
FROM ${schema}.${tableNames.documents}
|
|
3957
|
+
GROUP BY source_id
|
|
3958
|
+
HAVING COUNT(*) > 1
|
|
3959
|
+
) duplicates`);
|
|
3960
|
+
const duplicateCount = parseInt(countResult.rows[0]?.duplicate_count ?? "0", 10);
|
|
3961
|
+
if (duplicateCount === 0) {
|
|
3962
|
+
return {
|
|
3963
|
+
id: "db-sourceid-duplicates",
|
|
3964
|
+
title: "documents.source_id duplicates",
|
|
3965
|
+
status: "pass",
|
|
3966
|
+
summary: "No duplicate source_id values found."
|
|
3967
|
+
};
|
|
3968
|
+
}
|
|
3969
|
+
const sampleResult = await client.query(`SELECT source_id, COUNT(*) as count
|
|
3970
|
+
FROM ${schema}.${tableNames.documents}
|
|
3971
|
+
GROUP BY source_id
|
|
3972
|
+
HAVING COUNT(*) > 1
|
|
3973
|
+
ORDER BY COUNT(*) DESC
|
|
3974
|
+
LIMIT 5`);
|
|
3975
|
+
const samples = sampleResult.rows.map((r) => `"${r.source_id}" (${r.count} copies)`);
|
|
3976
|
+
return {
|
|
3977
|
+
id: "db-sourceid-duplicates",
|
|
3978
|
+
title: "documents.source_id duplicates",
|
|
3979
|
+
status: "fail",
|
|
3980
|
+
summary: `Found ${duplicateCount} source_id value(s) with duplicates.`,
|
|
3981
|
+
details: [
|
|
3982
|
+
"Duplicate source_id values must be resolved before adding a unique constraint.",
|
|
3983
|
+
"",
|
|
3984
|
+
"Sample duplicates:",
|
|
3985
|
+
...samples,
|
|
3986
|
+
duplicateCount > 5 ? `... and ${duplicateCount - 5} more` : ""
|
|
3987
|
+
].filter(Boolean),
|
|
3988
|
+
fixHints: [
|
|
3989
|
+
"-- Find all duplicates:",
|
|
3990
|
+
`SELECT source_id, COUNT(*), array_agg(id) as document_ids`,
|
|
3991
|
+
`FROM ${schema}.${tableNames.documents}`,
|
|
3992
|
+
`GROUP BY source_id HAVING COUNT(*) > 1;`,
|
|
3993
|
+
"",
|
|
3994
|
+
"-- Resolve duplicates by deleting extra rows for a given source_id.",
|
|
3995
|
+
"-- (Exact strategy depends on your app; pick which document_id to keep and delete the rest.)"
|
|
3996
|
+
],
|
|
3997
|
+
docsLink: docsUrl("/docs/getting-started/database#resolving-duplicates")
|
|
3998
|
+
};
|
|
3999
|
+
} catch (err) {
|
|
4000
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
4001
|
+
return {
|
|
4002
|
+
id: "db-sourceid-duplicates",
|
|
4003
|
+
title: "documents.source_id duplicates",
|
|
4004
|
+
status: "warn",
|
|
4005
|
+
summary: `Could not check for duplicates: ${message}`
|
|
4006
|
+
};
|
|
4007
|
+
}
|
|
4008
|
+
}
|
|
3086
4009
|
async function checkIndexes(client, schema, tableNames) {
|
|
3087
4010
|
const results = [];
|
|
3088
4011
|
try {
|
|
@@ -3500,14 +4423,14 @@ function resolveConfigPath(projectRoot, configPath) {
|
|
|
3500
4423
|
// cli/commands/doctor-setup.ts
|
|
3501
4424
|
import path14 from "node:path";
|
|
3502
4425
|
import {
|
|
3503
|
-
cancel as
|
|
3504
|
-
confirm as
|
|
3505
|
-
isCancel as
|
|
4426
|
+
cancel as cancel4,
|
|
4427
|
+
confirm as confirm4,
|
|
4428
|
+
isCancel as isCancel4,
|
|
3506
4429
|
multiselect,
|
|
3507
4430
|
outro as outro3,
|
|
3508
|
-
select as
|
|
4431
|
+
select as select3,
|
|
3509
4432
|
spinner,
|
|
3510
|
-
text as
|
|
4433
|
+
text as text3
|
|
3511
4434
|
} from "@clack/prompts";
|
|
3512
4435
|
var DEFAULT_CONFIG_PATH = ".unrag/doctor.json";
|
|
3513
4436
|
function parseSetupArgs(args) {
|
|
@@ -3582,7 +4505,7 @@ async function doctorSetupCommand(args) {
|
|
|
3582
4505
|
});
|
|
3583
4506
|
const tableNames = state.installDir ? await inferTableNames(path14.join(projectRoot, state.installDir), state.storeAdapter) : { documents: "documents", chunks: "chunks", embeddings: "embeddings" };
|
|
3584
4507
|
s.stop("Configuration detected.");
|
|
3585
|
-
const configPathAnswer = parsed.configPath ? parsed.configPath : nonInteractive ? DEFAULT_CONFIG_PATH : await
|
|
4508
|
+
const configPathAnswer = parsed.configPath ? parsed.configPath : nonInteractive ? DEFAULT_CONFIG_PATH : await text3({
|
|
3586
4509
|
message: "Config file path",
|
|
3587
4510
|
initialValue: DEFAULT_CONFIG_PATH,
|
|
3588
4511
|
validate: (v) => {
|
|
@@ -3593,20 +4516,20 @@ async function doctorSetupCommand(args) {
|
|
|
3593
4516
|
return;
|
|
3594
4517
|
}
|
|
3595
4518
|
});
|
|
3596
|
-
if (
|
|
3597
|
-
|
|
4519
|
+
if (isCancel4(configPathAnswer)) {
|
|
4520
|
+
cancel4("Cancelled.");
|
|
3598
4521
|
return;
|
|
3599
4522
|
}
|
|
3600
4523
|
const configPath = String(configPathAnswer).trim();
|
|
3601
4524
|
const configFullPath = path14.isAbsolute(configPath) ? configPath : path14.join(projectRoot, configPath);
|
|
3602
4525
|
if (await exists(configFullPath)) {
|
|
3603
4526
|
if (nonInteractive) {} else {
|
|
3604
|
-
const overwrite = await
|
|
4527
|
+
const overwrite = await confirm4({
|
|
3605
4528
|
message: `Config file ${configPath} already exists. Overwrite?`,
|
|
3606
4529
|
initialValue: false
|
|
3607
4530
|
});
|
|
3608
|
-
if (
|
|
3609
|
-
|
|
4531
|
+
if (isCancel4(overwrite)) {
|
|
4532
|
+
cancel4("Cancelled.");
|
|
3610
4533
|
return;
|
|
3611
4534
|
}
|
|
3612
4535
|
if (!overwrite) {
|
|
@@ -3615,7 +4538,7 @@ async function doctorSetupCommand(args) {
|
|
|
3615
4538
|
}
|
|
3616
4539
|
}
|
|
3617
4540
|
}
|
|
3618
|
-
const installDirAnswer = nonInteractive ? state.installDir ?? "lib/unrag" : await
|
|
4541
|
+
const installDirAnswer = nonInteractive ? state.installDir ?? "lib/unrag" : await text3({
|
|
3619
4542
|
message: "Unrag install directory",
|
|
3620
4543
|
initialValue: state.installDir ?? "lib/unrag",
|
|
3621
4544
|
validate: (v) => {
|
|
@@ -3624,8 +4547,8 @@ async function doctorSetupCommand(args) {
|
|
|
3624
4547
|
return;
|
|
3625
4548
|
}
|
|
3626
4549
|
});
|
|
3627
|
-
if (
|
|
3628
|
-
|
|
4550
|
+
if (isCancel4(installDirAnswer)) {
|
|
4551
|
+
cancel4("Cancelled.");
|
|
3629
4552
|
return;
|
|
3630
4553
|
}
|
|
3631
4554
|
const installDir = String(installDirAnswer).trim();
|
|
@@ -3648,13 +4571,13 @@ async function doctorSetupCommand(args) {
|
|
|
3648
4571
|
initialValues: DEFAULT_ENV_LOAD_FILES,
|
|
3649
4572
|
required: false
|
|
3650
4573
|
});
|
|
3651
|
-
if (
|
|
3652
|
-
|
|
4574
|
+
if (isCancel4(envFilesAnswer)) {
|
|
4575
|
+
cancel4("Cancelled.");
|
|
3653
4576
|
return;
|
|
3654
4577
|
}
|
|
3655
4578
|
const envFiles = envFilesAnswer;
|
|
3656
4579
|
const dbEnvVarDefault = state.inferredDbEnvVar ?? "DATABASE_URL";
|
|
3657
|
-
const dbEnvVarAnswer = nonInteractive ? dbEnvVarDefault : await
|
|
4580
|
+
const dbEnvVarAnswer = nonInteractive ? dbEnvVarDefault : await text3({
|
|
3658
4581
|
message: "Database URL environment variable name",
|
|
3659
4582
|
initialValue: dbEnvVarDefault,
|
|
3660
4583
|
validate: (v) => {
|
|
@@ -3665,80 +4588,80 @@ async function doctorSetupCommand(args) {
|
|
|
3665
4588
|
return;
|
|
3666
4589
|
}
|
|
3667
4590
|
});
|
|
3668
|
-
if (
|
|
3669
|
-
|
|
4591
|
+
if (isCancel4(dbEnvVarAnswer)) {
|
|
4592
|
+
cancel4("Cancelled.");
|
|
3670
4593
|
return;
|
|
3671
4594
|
}
|
|
3672
4595
|
const databaseUrlEnv = String(dbEnvVarAnswer).trim();
|
|
3673
|
-
const schemaAnswer = nonInteractive ? "public" : await
|
|
4596
|
+
const schemaAnswer = nonInteractive ? "public" : await text3({
|
|
3674
4597
|
message: "Database schema name",
|
|
3675
4598
|
initialValue: "public"
|
|
3676
4599
|
});
|
|
3677
|
-
if (
|
|
3678
|
-
|
|
4600
|
+
if (isCancel4(schemaAnswer)) {
|
|
4601
|
+
cancel4("Cancelled.");
|
|
3679
4602
|
return;
|
|
3680
4603
|
}
|
|
3681
4604
|
const schema = String(schemaAnswer).trim() || "public";
|
|
3682
|
-
const documentsTableAnswer = nonInteractive ? tableNames.documents : await
|
|
4605
|
+
const documentsTableAnswer = nonInteractive ? tableNames.documents : await text3({
|
|
3683
4606
|
message: "Documents table name",
|
|
3684
4607
|
initialValue: tableNames.documents
|
|
3685
4608
|
});
|
|
3686
|
-
if (
|
|
3687
|
-
|
|
4609
|
+
if (isCancel4(documentsTableAnswer)) {
|
|
4610
|
+
cancel4("Cancelled.");
|
|
3688
4611
|
return;
|
|
3689
4612
|
}
|
|
3690
4613
|
const documentsTable = String(documentsTableAnswer).trim() || "documents";
|
|
3691
|
-
const chunksTableAnswer = nonInteractive ? tableNames.chunks : await
|
|
4614
|
+
const chunksTableAnswer = nonInteractive ? tableNames.chunks : await text3({
|
|
3692
4615
|
message: "Chunks table name",
|
|
3693
4616
|
initialValue: tableNames.chunks
|
|
3694
4617
|
});
|
|
3695
|
-
if (
|
|
3696
|
-
|
|
4618
|
+
if (isCancel4(chunksTableAnswer)) {
|
|
4619
|
+
cancel4("Cancelled.");
|
|
3697
4620
|
return;
|
|
3698
4621
|
}
|
|
3699
4622
|
const chunksTable = String(chunksTableAnswer).trim() || "chunks";
|
|
3700
|
-
const embeddingsTableAnswer = nonInteractive ? tableNames.embeddings : await
|
|
4623
|
+
const embeddingsTableAnswer = nonInteractive ? tableNames.embeddings : await text3({
|
|
3701
4624
|
message: "Embeddings table name",
|
|
3702
4625
|
initialValue: tableNames.embeddings
|
|
3703
4626
|
});
|
|
3704
|
-
if (
|
|
3705
|
-
|
|
4627
|
+
if (isCancel4(embeddingsTableAnswer)) {
|
|
4628
|
+
cancel4("Cancelled.");
|
|
3706
4629
|
return;
|
|
3707
4630
|
}
|
|
3708
4631
|
const embeddingsTable = String(embeddingsTableAnswer).trim() || "embeddings";
|
|
3709
|
-
const scopeAnswer = nonInteractive ? "" : await
|
|
4632
|
+
const scopeAnswer = nonInteractive ? "" : await text3({
|
|
3710
4633
|
message: "Default scope prefix for dimension checks (optional, press enter to skip)",
|
|
3711
4634
|
initialValue: ""
|
|
3712
4635
|
});
|
|
3713
|
-
if (
|
|
3714
|
-
|
|
4636
|
+
if (isCancel4(scopeAnswer)) {
|
|
4637
|
+
cancel4("Cancelled.");
|
|
3715
4638
|
return;
|
|
3716
4639
|
}
|
|
3717
4640
|
const defaultScope = String(scopeAnswer).trim() || null;
|
|
3718
|
-
const strictAnswer = nonInteractive ? false : await
|
|
4641
|
+
const strictAnswer = nonInteractive ? false : await confirm4({
|
|
3719
4642
|
message: "Enable strict mode by default? (treat warnings as failures)",
|
|
3720
4643
|
initialValue: false
|
|
3721
4644
|
});
|
|
3722
|
-
if (
|
|
3723
|
-
|
|
4645
|
+
if (isCancel4(strictAnswer)) {
|
|
4646
|
+
cancel4("Cancelled.");
|
|
3724
4647
|
return;
|
|
3725
4648
|
}
|
|
3726
4649
|
const strictDefault = Boolean(strictAnswer);
|
|
3727
|
-
const ciIncludeDbAnswer = nonInteractive ? true : await
|
|
4650
|
+
const ciIncludeDbAnswer = nonInteractive ? true : await confirm4({
|
|
3728
4651
|
message: "Should CI script include database checks (--db)?",
|
|
3729
4652
|
initialValue: true
|
|
3730
4653
|
});
|
|
3731
|
-
if (
|
|
3732
|
-
|
|
4654
|
+
if (isCancel4(ciIncludeDbAnswer)) {
|
|
4655
|
+
cancel4("Cancelled.");
|
|
3733
4656
|
return;
|
|
3734
4657
|
}
|
|
3735
4658
|
const ciIncludeDb = Boolean(ciIncludeDbAnswer);
|
|
3736
|
-
const ciStrictAnswer = nonInteractive ? true : await
|
|
4659
|
+
const ciStrictAnswer = nonInteractive ? true : await confirm4({
|
|
3737
4660
|
message: "Should CI script use strict mode (--strict)?",
|
|
3738
4661
|
initialValue: true
|
|
3739
4662
|
});
|
|
3740
|
-
if (
|
|
3741
|
-
|
|
4663
|
+
if (isCancel4(ciStrictAnswer)) {
|
|
4664
|
+
cancel4("Cancelled.");
|
|
3742
4665
|
return;
|
|
3743
4666
|
}
|
|
3744
4667
|
const ciStrict = Boolean(ciStrictAnswer);
|
|
@@ -3791,7 +4714,7 @@ async function doctorSetupCommand(args) {
|
|
|
3791
4714
|
let scriptsToAdd = scripts;
|
|
3792
4715
|
if (conflictingScripts.length > 0 && !nonInteractive) {
|
|
3793
4716
|
for (const scriptName of conflictingScripts) {
|
|
3794
|
-
const action = await
|
|
4717
|
+
const action = await select3({
|
|
3795
4718
|
message: `Script "${scriptName}" already exists. What would you like to do?`,
|
|
3796
4719
|
options: [
|
|
3797
4720
|
{ value: "keep", label: "Keep existing", hint: existingScripts[scriptName] },
|
|
@@ -3804,14 +4727,14 @@ async function doctorSetupCommand(args) {
|
|
|
3804
4727
|
],
|
|
3805
4728
|
initialValue: "keep"
|
|
3806
4729
|
});
|
|
3807
|
-
if (
|
|
3808
|
-
|
|
4730
|
+
if (isCancel4(action)) {
|
|
4731
|
+
cancel4("Cancelled.");
|
|
3809
4732
|
return;
|
|
3810
4733
|
}
|
|
3811
4734
|
if (action === "keep") {
|
|
3812
4735
|
delete scriptsToAdd[scriptName];
|
|
3813
4736
|
} else if (action === "rename") {
|
|
3814
|
-
const newName = await
|
|
4737
|
+
const newName = await text3({
|
|
3815
4738
|
message: `New script name for ${scriptName}`,
|
|
3816
4739
|
initialValue: `${scriptName}:new`,
|
|
3817
4740
|
validate: (v) => {
|
|
@@ -3822,8 +4745,8 @@ async function doctorSetupCommand(args) {
|
|
|
3822
4745
|
return;
|
|
3823
4746
|
}
|
|
3824
4747
|
});
|
|
3825
|
-
if (
|
|
3826
|
-
|
|
4748
|
+
if (isCancel4(newName)) {
|
|
4749
|
+
cancel4("Cancelled.");
|
|
3827
4750
|
return;
|
|
3828
4751
|
}
|
|
3829
4752
|
const value = scriptsToAdd[scriptName];
|
|
@@ -4147,7 +5070,7 @@ function renderHelp() {
|
|
|
4147
5070
|
" --alias <@name> Import alias base (e.g. @unrag)",
|
|
4148
5071
|
" --preset <id|url> Install from a web-generated preset (non-interactive)",
|
|
4149
5072
|
" --overwrite <mode> skip | force (when files already exist)",
|
|
4150
|
-
" --rich-media Enable rich media setup (
|
|
5073
|
+
" --rich-media Enable rich media setup (extractors + assetProcessing flags)",
|
|
4151
5074
|
" --no-rich-media Disable rich media setup",
|
|
4152
5075
|
" --extractors <list> Comma-separated extractors (implies --rich-media)",
|
|
4153
5076
|
" --no-install Skip automatic dependency installation",
|