@mcptoolshop/research-os 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -3922,7 +3922,11 @@ var init_schema9 = __esm({
3922
3922
  finding_ids: z11.array(z11.string()),
3923
3923
  reviewer: ReviewerNameSchema,
3924
3924
  review_method: z11.string().min(1),
3925
- created_at: z11.string()
3925
+ created_at: z11.string(),
3926
+ // v0.5: optional profile lineage. Additive-optional — pre-v0.5 records
3927
+ // without this field parse cleanly. Frozen packs unaffected (Zod .optional()
3928
+ // with no .default() leaves absent keys absent on round-trip).
3929
+ profile: z11.string().optional()
3926
3930
  });
3927
3931
  ReviewSnapshotSchema = z11.object({
3928
3932
  section_id: z11.string().regex(/^[0-9]{2}-[a-z0-9-]+$/),
@@ -5925,7 +5929,7 @@ function pickHighestPriority(decisions) {
5925
5929
  return "accepted_for_synthesis";
5926
5930
  }
5927
5931
  function deriveClaimReviews(args) {
5928
- const { claims, findings, reviewer, reviewMethod, activeSectionWaivers } = args;
5932
+ const { claims, findings, reviewer, reviewMethod, activeSectionWaivers, profile } = args;
5929
5933
  const reviews = [];
5930
5934
  const now = (/* @__PURE__ */ new Date()).toISOString();
5931
5935
  const monopolyWaived = Array.isArray(activeSectionWaivers) && activeSectionWaivers.some((w) => w.scope === "min_independent_publishers");
@@ -5940,7 +5944,8 @@ function deriveClaimReviews(args) {
5940
5944
  finding_ids: [],
5941
5945
  reviewer,
5942
5946
  review_method: reviewMethod,
5943
- created_at: now
5947
+ created_at: now,
5948
+ ...profile !== void 0 ? { profile } : {}
5944
5949
  });
5945
5950
  continue;
5946
5951
  }
@@ -5974,7 +5979,8 @@ function deriveClaimReviews(args) {
5974
5979
  finding_ids: claimFindings.map((f) => f.finding_id),
5975
5980
  reviewer,
5976
5981
  review_method: reviewMethod,
5977
- created_at: now
5982
+ created_at: now,
5983
+ ...profile !== void 0 ? { profile } : {}
5978
5984
  });
5979
5985
  }
5980
5986
  return reviews;
@@ -6470,7 +6476,8 @@ async function finalizeReview(args) {
6470
6476
  findings: dedupedFindings,
6471
6477
  reviewer: args.reviewer,
6472
6478
  reviewMethod: args.reviewMethod,
6473
- activeSectionWaivers
6479
+ activeSectionWaivers,
6480
+ profile: args.profile !== DEFAULT_PROFILE ? args.profile : void 0
6474
6481
  });
6475
6482
  const decisionCounts = {
6476
6483
  accepted_for_synthesis: 0,
@@ -7543,8 +7550,8 @@ async function syncRepoKnowledge(options) {
7543
7550
  };
7544
7551
  }
7545
7552
  const exportResult = await exportRepoKnowledge({ packPath });
7546
- const { readFile: readFile26 } = await import("fs/promises");
7547
- const text = await readFile26(exportResult.outPath, "utf8");
7553
+ const { readFile: readFile27 } = await import("fs/promises");
7554
+ const text = await readFile27(exportResult.outPath, "utf8");
7548
7555
  const facts = text.split(/\r?\n/).filter((l) => l.trim().length > 0).map((l) => JSON.parse(l));
7549
7556
  try {
7550
7557
  const r = await rk.ingestFacts({ facts, namespace: "research-os" });
@@ -12192,7 +12199,7 @@ var init_src = __esm({
12192
12199
  init_triage();
12193
12200
  init_discover();
12194
12201
  init_errors();
12195
- RESEARCH_OS_VERSION = "0.4.0";
12202
+ RESEARCH_OS_VERSION = "0.5.0";
12196
12203
  }
12197
12204
  });
12198
12205
 
@@ -13406,6 +13413,115 @@ async function applySourceCardOverrides(packPath, fromFile) {
13406
13413
  // src/cli.ts
13407
13414
  init_errors();
13408
13415
  init_src();
13416
+
13417
+ // src/calibration/lookup.ts
13418
+ import { existsSync as existsSync33 } from "fs";
13419
+ import { readFile as readFile26 } from "fs/promises";
13420
+ import { join as join34 } from "path";
13421
+
13422
+ // src/calibration/receipt-schema.ts
13423
+ import { z as z25 } from "zod";
13424
+ var StatusLabelSchema = z25.enum([
13425
+ "trusted_baseline",
13426
+ "conditional_pass",
13427
+ "failed",
13428
+ "comparison_only"
13429
+ ]);
13430
+ var ArchitectureSchema = z25.enum(["single-pass", "two-pass"]);
13431
+ var RecallSchema = z25.object({
13432
+ matched: z25.number().int().nonnegative(),
13433
+ total: z25.number().int().nonnegative(),
13434
+ ratio: z25.number().min(0).max(1)
13435
+ });
13436
+ var PerCategoryRecallSchema = z25.record(z25.string(), RecallSchema);
13437
+ var PassFailSchema = z25.object({
13438
+ fp_ceiling: z25.enum(["PASS", "FAIL"]),
13439
+ any_flag_recall_floor: z25.enum(["PASS", "FAIL"]),
13440
+ per_category_any_flag_floor: z25.enum(["PASS", "FAIL"]),
13441
+ strict_recall_floor: z25.enum(["PASS", "FAIL"]),
13442
+ decision_vocab_completeness: z25.enum(["PASS", "FAIL"]),
13443
+ latency_soft: z25.enum(["PASS", "WARN"]),
13444
+ latency_hard: z25.enum(["PASS", "FAIL"]),
13445
+ empty_or_malformed: z25.enum(["PASS", "FAIL"]),
13446
+ overall: z25.enum(["PASS", "FAIL"])
13447
+ });
13448
+ var DecisionVocabBarSchema = z25.object({
13449
+ architecture: ArchitectureSchema,
13450
+ required: z25.number().int().positive(),
13451
+ produced: z25.number().int().nonnegative(),
13452
+ passed: z25.boolean()
13453
+ });
13454
+ var CalibrationReceiptSchema = z25.object({
13455
+ schema_version: z25.literal(1),
13456
+ profile_name: z25.string(),
13457
+ status: StatusLabelSchema,
13458
+ model: z25.string(),
13459
+ architecture: ArchitectureSchema,
13460
+ fixture: z25.string(),
13461
+ fixture_total_claims: z25.number().int().positive(),
13462
+ fixture_good_claims: z25.number().int().nonnegative(),
13463
+ fixture_bad_claims: z25.number().int().nonnegative(),
13464
+ calibrated_at: z25.string(),
13465
+ research_os_version: z25.string(),
13466
+ runtime_ms: z25.number().int().nonnegative(),
13467
+ good_fp_count: z25.number().int().nonnegative(),
13468
+ any_flag_recall: RecallSchema,
13469
+ strict_recall: RecallSchema,
13470
+ per_category_any_flag: PerCategoryRecallSchema,
13471
+ per_category_strict: PerCategoryRecallSchema,
13472
+ decision_vocabulary: z25.record(z25.string(), z25.number().int().nonnegative()),
13473
+ decisions_produced_count: z25.number().int().nonnegative(),
13474
+ decision_vocab_bar: DecisionVocabBarSchema,
13475
+ unreachable_decisions: z25.array(z25.string()),
13476
+ empty_or_malformed_responses: z25.number().int().nonnegative(),
13477
+ pass_fail: PassFailSchema,
13478
+ notes: z25.array(z25.string())
13479
+ });
13480
+
13481
+ // src/calibration/receipt.ts
13482
+ function receiptToCalibrationSummary(receipt) {
13483
+ const fp = receipt.good_fp_count;
13484
+ const fpTotal = receipt.fixture_good_claims;
13485
+ const fpPct = fpTotal > 0 ? Math.round(fp / fpTotal * 100) : 0;
13486
+ const af = receipt.any_flag_recall;
13487
+ const sr = receipt.strict_recall;
13488
+ const unsupported = receipt.per_category_any_flag["unsupported_claim"];
13489
+ return {
13490
+ fixture: receipt.fixture,
13491
+ good_false_positive_rate: `${fp}/${fpTotal} (${fpPct}%)`,
13492
+ bad_any_flag_recall: `${af.matched}/${af.total} (${Math.round(af.ratio * 100)}%)`,
13493
+ strict_category_recall: `${sr.matched}/${sr.total} (${Math.round(sr.ratio * 100)}%)`,
13494
+ unsupported_claim_recall: unsupported ? `${unsupported.matched}/${unsupported.total} (${Math.round(unsupported.ratio * 100)}%)` : null,
13495
+ notes: `status=${receipt.status} model=${receipt.model} arch=${receipt.architecture} overall=${receipt.pass_fail.overall} decisions=${receipt.decisions_produced_count}/6`
13496
+ };
13497
+ }
13498
+
13499
+ // src/calibration/lookup.ts
13500
+ async function loadReceiptForPack(packDir, profile) {
13501
+ const receiptPath = receiptPathForPack(packDir, profile);
13502
+ if (!existsSync33(receiptPath)) return null;
13503
+ let raw;
13504
+ try {
13505
+ raw = JSON.parse(await readFile26(receiptPath, "utf8"));
13506
+ } catch (err) {
13507
+ throw new Error(
13508
+ `Invalid calibration receipt at ${receiptPath}: ${err.message}`,
13509
+ { cause: err }
13510
+ );
13511
+ }
13512
+ const result = CalibrationReceiptSchema.safeParse(raw);
13513
+ if (!result.success) {
13514
+ throw new Error(
13515
+ `Invalid calibration receipt at ${receiptPath}: ${result.error.message}`
13516
+ );
13517
+ }
13518
+ return receiptToCalibrationSummary(result.data);
13519
+ }
13520
+ function receiptPathForPack(packDir, profile) {
13521
+ return join34(packDir, "calibration", "reviewer-profiles", profile, "seeded-v1.json");
13522
+ }
13523
+
13524
+ // src/cli.ts
13409
13525
  function reportError(err) {
13410
13526
  if (err instanceof ResearchOSError) {
13411
13527
  process.stderr.write(`research-os: ${err.code}: ${err.message}
@@ -14435,7 +14551,8 @@ program.command("review-promote").description(
14435
14551
  false
14436
14552
  ).action(async (section, opts) => {
14437
14553
  try {
14438
- const calibration = opts.calibrationFixture || opts.goodFp || opts.anyFlagRecall || opts.strictCatRecall || opts.unsupportedRecall || opts.calibrationNotes ? {
14554
+ const explicitCalibration = opts.calibrationFixture || opts.goodFp || opts.anyFlagRecall || opts.strictCatRecall || opts.unsupportedRecall || opts.calibrationNotes;
14555
+ let calibration = explicitCalibration ? {
14439
14556
  fixture: opts.calibrationFixture ?? null,
14440
14557
  good_false_positive_rate: opts.goodFp ?? null,
14441
14558
  bad_any_flag_recall: opts.anyFlagRecall ?? null,
@@ -14443,6 +14560,16 @@ program.command("review-promote").description(
14443
14560
  unsupported_claim_recall: opts.unsupportedRecall ?? null,
14444
14561
  notes: opts.calibrationNotes ?? null
14445
14562
  } : null;
14563
+ if (!explicitCalibration) {
14564
+ const summary = await loadReceiptForPack(opts.pack, opts.profile);
14565
+ if (summary !== null) {
14566
+ calibration = summary;
14567
+ process.stdout.write(
14568
+ ` [auto] calibration_summary populated from ${receiptPathForPack(opts.pack, opts.profile)}
14569
+ `
14570
+ );
14571
+ }
14572
+ }
14446
14573
  const result = await promote({
14447
14574
  sectionId: section,
14448
14575
  packPath: opts.pack,