@mcptoolshop/research-os 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +134 -0
- package/README.es.md +25 -2
- package/README.fr.md +24 -1
- package/README.hi.md +39 -1
- package/README.it.md +25 -2
- package/README.ja.md +24 -1
- package/README.md +37 -1
- package/README.pt-BR.md +24 -1
- package/README.zh.md +25 -2
- package/dist/calibration/aggregate-receipt-schema.d.ts +509 -0
- package/dist/calibration/aggregate-receipt-schema.js +143 -0
- package/dist/calibration/aggregate-receipt-schema.js.map +1 -0
- package/dist/calibration/aggregate.d.ts +35 -0
- package/dist/calibration/aggregate.js +454 -0
- package/dist/calibration/aggregate.js.map +1 -0
- package/dist/calibration/receipt-schema.d.ts +317 -0
- package/dist/calibration/receipt-schema.js +68 -0
- package/dist/calibration/receipt-schema.js.map +1 -0
- package/dist/calibration/receipt.d.ts +31 -0
- package/dist/calibration/receipt.js +151 -0
- package/dist/calibration/receipt.js.map +1 -0
- package/dist/cli.js +136 -9
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +10 -1
- package/dist/index.js +13 -6
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -3922,7 +3922,11 @@ var init_schema9 = __esm({
|
|
|
3922
3922
|
finding_ids: z11.array(z11.string()),
|
|
3923
3923
|
reviewer: ReviewerNameSchema,
|
|
3924
3924
|
review_method: z11.string().min(1),
|
|
3925
|
-
created_at: z11.string()
|
|
3925
|
+
created_at: z11.string(),
|
|
3926
|
+
// v0.5: optional profile lineage. Additive-optional — pre-v0.5 records
|
|
3927
|
+
// without this field parse cleanly. Frozen packs unaffected (Zod .optional()
|
|
3928
|
+
// with no .default() leaves absent keys absent on round-trip).
|
|
3929
|
+
profile: z11.string().optional()
|
|
3926
3930
|
});
|
|
3927
3931
|
ReviewSnapshotSchema = z11.object({
|
|
3928
3932
|
section_id: z11.string().regex(/^[0-9]{2}-[a-z0-9-]+$/),
|
|
@@ -5925,7 +5929,7 @@ function pickHighestPriority(decisions) {
|
|
|
5925
5929
|
return "accepted_for_synthesis";
|
|
5926
5930
|
}
|
|
5927
5931
|
function deriveClaimReviews(args) {
|
|
5928
|
-
const { claims, findings, reviewer, reviewMethod, activeSectionWaivers } = args;
|
|
5932
|
+
const { claims, findings, reviewer, reviewMethod, activeSectionWaivers, profile } = args;
|
|
5929
5933
|
const reviews = [];
|
|
5930
5934
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
5931
5935
|
const monopolyWaived = Array.isArray(activeSectionWaivers) && activeSectionWaivers.some((w) => w.scope === "min_independent_publishers");
|
|
@@ -5940,7 +5944,8 @@ function deriveClaimReviews(args) {
|
|
|
5940
5944
|
finding_ids: [],
|
|
5941
5945
|
reviewer,
|
|
5942
5946
|
review_method: reviewMethod,
|
|
5943
|
-
created_at: now
|
|
5947
|
+
created_at: now,
|
|
5948
|
+
...profile !== void 0 ? { profile } : {}
|
|
5944
5949
|
});
|
|
5945
5950
|
continue;
|
|
5946
5951
|
}
|
|
@@ -5974,7 +5979,8 @@ function deriveClaimReviews(args) {
|
|
|
5974
5979
|
finding_ids: claimFindings.map((f) => f.finding_id),
|
|
5975
5980
|
reviewer,
|
|
5976
5981
|
review_method: reviewMethod,
|
|
5977
|
-
created_at: now
|
|
5982
|
+
created_at: now,
|
|
5983
|
+
...profile !== void 0 ? { profile } : {}
|
|
5978
5984
|
});
|
|
5979
5985
|
}
|
|
5980
5986
|
return reviews;
|
|
@@ -6470,7 +6476,8 @@ async function finalizeReview(args) {
|
|
|
6470
6476
|
findings: dedupedFindings,
|
|
6471
6477
|
reviewer: args.reviewer,
|
|
6472
6478
|
reviewMethod: args.reviewMethod,
|
|
6473
|
-
activeSectionWaivers
|
|
6479
|
+
activeSectionWaivers,
|
|
6480
|
+
profile: args.profile !== DEFAULT_PROFILE ? args.profile : void 0
|
|
6474
6481
|
});
|
|
6475
6482
|
const decisionCounts = {
|
|
6476
6483
|
accepted_for_synthesis: 0,
|
|
@@ -7543,8 +7550,8 @@ async function syncRepoKnowledge(options) {
|
|
|
7543
7550
|
};
|
|
7544
7551
|
}
|
|
7545
7552
|
const exportResult = await exportRepoKnowledge({ packPath });
|
|
7546
|
-
const { readFile:
|
|
7547
|
-
const text = await
|
|
7553
|
+
const { readFile: readFile27 } = await import("fs/promises");
|
|
7554
|
+
const text = await readFile27(exportResult.outPath, "utf8");
|
|
7548
7555
|
const facts = text.split(/\r?\n/).filter((l) => l.trim().length > 0).map((l) => JSON.parse(l));
|
|
7549
7556
|
try {
|
|
7550
7557
|
const r = await rk.ingestFacts({ facts, namespace: "research-os" });
|
|
@@ -12192,7 +12199,7 @@ var init_src = __esm({
|
|
|
12192
12199
|
init_triage();
|
|
12193
12200
|
init_discover();
|
|
12194
12201
|
init_errors();
|
|
12195
|
-
RESEARCH_OS_VERSION = "0.
|
|
12202
|
+
RESEARCH_OS_VERSION = "0.5.0";
|
|
12196
12203
|
}
|
|
12197
12204
|
});
|
|
12198
12205
|
|
|
@@ -13406,6 +13413,115 @@ async function applySourceCardOverrides(packPath, fromFile) {
|
|
|
13406
13413
|
// src/cli.ts
|
|
13407
13414
|
init_errors();
|
|
13408
13415
|
init_src();
|
|
13416
|
+
|
|
13417
|
+
// src/calibration/lookup.ts
|
|
13418
|
+
import { existsSync as existsSync33 } from "fs";
|
|
13419
|
+
import { readFile as readFile26 } from "fs/promises";
|
|
13420
|
+
import { join as join34 } from "path";
|
|
13421
|
+
|
|
13422
|
+
// src/calibration/receipt-schema.ts
|
|
13423
|
+
import { z as z25 } from "zod";
|
|
13424
|
+
var StatusLabelSchema = z25.enum([
|
|
13425
|
+
"trusted_baseline",
|
|
13426
|
+
"conditional_pass",
|
|
13427
|
+
"failed",
|
|
13428
|
+
"comparison_only"
|
|
13429
|
+
]);
|
|
13430
|
+
var ArchitectureSchema = z25.enum(["single-pass", "two-pass"]);
|
|
13431
|
+
var RecallSchema = z25.object({
|
|
13432
|
+
matched: z25.number().int().nonnegative(),
|
|
13433
|
+
total: z25.number().int().nonnegative(),
|
|
13434
|
+
ratio: z25.number().min(0).max(1)
|
|
13435
|
+
});
|
|
13436
|
+
var PerCategoryRecallSchema = z25.record(z25.string(), RecallSchema);
|
|
13437
|
+
var PassFailSchema = z25.object({
|
|
13438
|
+
fp_ceiling: z25.enum(["PASS", "FAIL"]),
|
|
13439
|
+
any_flag_recall_floor: z25.enum(["PASS", "FAIL"]),
|
|
13440
|
+
per_category_any_flag_floor: z25.enum(["PASS", "FAIL"]),
|
|
13441
|
+
strict_recall_floor: z25.enum(["PASS", "FAIL"]),
|
|
13442
|
+
decision_vocab_completeness: z25.enum(["PASS", "FAIL"]),
|
|
13443
|
+
latency_soft: z25.enum(["PASS", "WARN"]),
|
|
13444
|
+
latency_hard: z25.enum(["PASS", "FAIL"]),
|
|
13445
|
+
empty_or_malformed: z25.enum(["PASS", "FAIL"]),
|
|
13446
|
+
overall: z25.enum(["PASS", "FAIL"])
|
|
13447
|
+
});
|
|
13448
|
+
var DecisionVocabBarSchema = z25.object({
|
|
13449
|
+
architecture: ArchitectureSchema,
|
|
13450
|
+
required: z25.number().int().positive(),
|
|
13451
|
+
produced: z25.number().int().nonnegative(),
|
|
13452
|
+
passed: z25.boolean()
|
|
13453
|
+
});
|
|
13454
|
+
var CalibrationReceiptSchema = z25.object({
|
|
13455
|
+
schema_version: z25.literal(1),
|
|
13456
|
+
profile_name: z25.string(),
|
|
13457
|
+
status: StatusLabelSchema,
|
|
13458
|
+
model: z25.string(),
|
|
13459
|
+
architecture: ArchitectureSchema,
|
|
13460
|
+
fixture: z25.string(),
|
|
13461
|
+
fixture_total_claims: z25.number().int().positive(),
|
|
13462
|
+
fixture_good_claims: z25.number().int().nonnegative(),
|
|
13463
|
+
fixture_bad_claims: z25.number().int().nonnegative(),
|
|
13464
|
+
calibrated_at: z25.string(),
|
|
13465
|
+
research_os_version: z25.string(),
|
|
13466
|
+
runtime_ms: z25.number().int().nonnegative(),
|
|
13467
|
+
good_fp_count: z25.number().int().nonnegative(),
|
|
13468
|
+
any_flag_recall: RecallSchema,
|
|
13469
|
+
strict_recall: RecallSchema,
|
|
13470
|
+
per_category_any_flag: PerCategoryRecallSchema,
|
|
13471
|
+
per_category_strict: PerCategoryRecallSchema,
|
|
13472
|
+
decision_vocabulary: z25.record(z25.string(), z25.number().int().nonnegative()),
|
|
13473
|
+
decisions_produced_count: z25.number().int().nonnegative(),
|
|
13474
|
+
decision_vocab_bar: DecisionVocabBarSchema,
|
|
13475
|
+
unreachable_decisions: z25.array(z25.string()),
|
|
13476
|
+
empty_or_malformed_responses: z25.number().int().nonnegative(),
|
|
13477
|
+
pass_fail: PassFailSchema,
|
|
13478
|
+
notes: z25.array(z25.string())
|
|
13479
|
+
});
|
|
13480
|
+
|
|
13481
|
+
// src/calibration/receipt.ts
|
|
13482
|
+
function receiptToCalibrationSummary(receipt) {
|
|
13483
|
+
const fp = receipt.good_fp_count;
|
|
13484
|
+
const fpTotal = receipt.fixture_good_claims;
|
|
13485
|
+
const fpPct = fpTotal > 0 ? Math.round(fp / fpTotal * 100) : 0;
|
|
13486
|
+
const af = receipt.any_flag_recall;
|
|
13487
|
+
const sr = receipt.strict_recall;
|
|
13488
|
+
const unsupported = receipt.per_category_any_flag["unsupported_claim"];
|
|
13489
|
+
return {
|
|
13490
|
+
fixture: receipt.fixture,
|
|
13491
|
+
good_false_positive_rate: `${fp}/${fpTotal} (${fpPct}%)`,
|
|
13492
|
+
bad_any_flag_recall: `${af.matched}/${af.total} (${Math.round(af.ratio * 100)}%)`,
|
|
13493
|
+
strict_category_recall: `${sr.matched}/${sr.total} (${Math.round(sr.ratio * 100)}%)`,
|
|
13494
|
+
unsupported_claim_recall: unsupported ? `${unsupported.matched}/${unsupported.total} (${Math.round(unsupported.ratio * 100)}%)` : null,
|
|
13495
|
+
notes: `status=${receipt.status} model=${receipt.model} arch=${receipt.architecture} overall=${receipt.pass_fail.overall} decisions=${receipt.decisions_produced_count}/6`
|
|
13496
|
+
};
|
|
13497
|
+
}
|
|
13498
|
+
|
|
13499
|
+
// src/calibration/lookup.ts
|
|
13500
|
+
async function loadReceiptForPack(packDir, profile) {
|
|
13501
|
+
const receiptPath = receiptPathForPack(packDir, profile);
|
|
13502
|
+
if (!existsSync33(receiptPath)) return null;
|
|
13503
|
+
let raw;
|
|
13504
|
+
try {
|
|
13505
|
+
raw = JSON.parse(await readFile26(receiptPath, "utf8"));
|
|
13506
|
+
} catch (err) {
|
|
13507
|
+
throw new Error(
|
|
13508
|
+
`Invalid calibration receipt at ${receiptPath}: ${err.message}`,
|
|
13509
|
+
{ cause: err }
|
|
13510
|
+
);
|
|
13511
|
+
}
|
|
13512
|
+
const result = CalibrationReceiptSchema.safeParse(raw);
|
|
13513
|
+
if (!result.success) {
|
|
13514
|
+
throw new Error(
|
|
13515
|
+
`Invalid calibration receipt at ${receiptPath}: ${result.error.message}`
|
|
13516
|
+
);
|
|
13517
|
+
}
|
|
13518
|
+
return receiptToCalibrationSummary(result.data);
|
|
13519
|
+
}
|
|
13520
|
+
function receiptPathForPack(packDir, profile) {
|
|
13521
|
+
return join34(packDir, "calibration", "reviewer-profiles", profile, "seeded-v1.json");
|
|
13522
|
+
}
|
|
13523
|
+
|
|
13524
|
+
// src/cli.ts
|
|
13409
13525
|
function reportError(err) {
|
|
13410
13526
|
if (err instanceof ResearchOSError) {
|
|
13411
13527
|
process.stderr.write(`research-os: ${err.code}: ${err.message}
|
|
@@ -14435,7 +14551,8 @@ program.command("review-promote").description(
|
|
|
14435
14551
|
false
|
|
14436
14552
|
).action(async (section, opts) => {
|
|
14437
14553
|
try {
|
|
14438
|
-
const
|
|
14554
|
+
const explicitCalibration = opts.calibrationFixture || opts.goodFp || opts.anyFlagRecall || opts.strictCatRecall || opts.unsupportedRecall || opts.calibrationNotes;
|
|
14555
|
+
let calibration = explicitCalibration ? {
|
|
14439
14556
|
fixture: opts.calibrationFixture ?? null,
|
|
14440
14557
|
good_false_positive_rate: opts.goodFp ?? null,
|
|
14441
14558
|
bad_any_flag_recall: opts.anyFlagRecall ?? null,
|
|
@@ -14443,6 +14560,16 @@ program.command("review-promote").description(
|
|
|
14443
14560
|
unsupported_claim_recall: opts.unsupportedRecall ?? null,
|
|
14444
14561
|
notes: opts.calibrationNotes ?? null
|
|
14445
14562
|
} : null;
|
|
14563
|
+
if (!explicitCalibration) {
|
|
14564
|
+
const summary = await loadReceiptForPack(opts.pack, opts.profile);
|
|
14565
|
+
if (summary !== null) {
|
|
14566
|
+
calibration = summary;
|
|
14567
|
+
process.stdout.write(
|
|
14568
|
+
` [auto] calibration_summary populated from ${receiptPathForPack(opts.pack, opts.profile)}
|
|
14569
|
+
`
|
|
14570
|
+
);
|
|
14571
|
+
}
|
|
14572
|
+
}
|
|
14446
14573
|
const result = await promote({
|
|
14447
14574
|
sectionId: section,
|
|
14448
14575
|
packPath: opts.pack,
|