codemem 0.23.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +303 -7
- package/dist/index.js.map +1 -1
- package/package.json +4 -4
package/dist/index.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import { DEFAULT_COORDINATOR_DB_PATH, MemoryStore, ObserverClient, RawEventSweeper, SyncRetentionRunner, VERSION, applyBootstrapSnapshot, backfillTagsText, backfillVectors, buildAuthHeaders, buildBaseUrl, buildRawEventEnvelopeFromHook, compareMemoryRoleReports, connect, coordinatorCreateGroupAction, coordinatorCreateInviteAction, coordinatorDisableDeviceAction, coordinatorEnrollDeviceAction, coordinatorImportInviteAction, coordinatorListBootstrapGrantsAction, coordinatorListDevicesAction, coordinatorListGroupsAction, coordinatorListJoinRequestsAction, coordinatorRemoveDeviceAction, coordinatorRenameDeviceAction, coordinatorReviewJoinRequestAction, coordinatorRevokeBootstrapGrantAction, createBetterSqliteCoordinatorApp, deactivateLowSignalMemories, deactivateLowSignalObservations, ensureDeviceIdentity, exportMemories, fetchAllSnapshotPages, fingerprintPublicKey, getMemoryRoleReport, getRawEventRelinkPlan, getRawEventRelinkReport, getRawEventStatus, getWorkspaceCodememConfigPath, hasUnsyncedSharedMemoryChanges, importMemories, initDatabase, isEmbeddingDisabled, loadPublicKey, loadSqliteVec, planReplicationOpsAgePrune, pruneReplicationOpsUntilCaughtUp, rawEventsGate, readCodememConfigFile, readCodememConfigFileAtPath, readCoordinatorSyncConfig, readImportPayload, requestJson, resolveCodememConfigPath, resolveDbPath, resolveHookProject, resolveProject, retryRawEventFailures, runSyncDaemon, runSyncPass, schema, setPeerProjectFilter, stripJsonComments, stripPrivateObj, stripTrailingCommas, syncPassPreflight, updatePeerAddresses, vacuumDatabase, writeCodememConfigFile } from "@codemem/core";
|
|
2
|
+
import { DEFAULT_COORDINATOR_DB_PATH, MemoryStore, ObserverClient, RawEventSweeper, SyncRetentionRunner, VERSION, applyBootstrapSnapshot, backfillTagsText, backfillVectors, buildAuthHeaders, buildBaseUrl, buildRawEventEnvelopeFromHook, compareMemoryRoleReports, connect, coordinatorCreateGroupAction, coordinatorCreateInviteAction, coordinatorDisableDeviceAction, coordinatorEnrollDeviceAction, coordinatorImportInviteAction, coordinatorListBootstrapGrantsAction, coordinatorListDevicesAction, coordinatorListGroupsAction, coordinatorListJoinRequestsAction, coordinatorRemoveDeviceAction, coordinatorRenameDeviceAction, coordinatorReviewJoinRequestAction, coordinatorRevokeBootstrapGrantAction, createBetterSqliteCoordinatorApp, deactivateLowSignalMemories, deactivateLowSignalObservations, ensureDeviceIdentity, exportMemories, fetchAllSnapshotPages, fingerprintPublicKey, getExtractionBenchmarkProfile, getInjectionEvalScenarioPack, getInjectionEvalScenarioPrompts, getMemoryRoleReport, getRawEventRelinkPlan, getRawEventRelinkReport, getRawEventStatus, getSessionExtractionEval, getSessionExtractionEvalScenario, getWorkspaceCodememConfigPath, hasUnsyncedSharedMemoryChanges, importMemories, initDatabase, isEmbeddingDisabled, loadObserverConfig, loadPublicKey, loadSqliteVec, planReplicationOpsAgePrune, pruneReplicationOpsUntilCaughtUp, rawEventsGate, readCodememConfigFile, readCodememConfigFileAtPath, readCoordinatorSyncConfig, readImportPayload, replayBatchExtraction, replayBatchExtractionWithTierRouting, requestJson, resolveCodememConfigPath, resolveDbPath, resolveHookProject, resolveProject, retryRawEventFailures, runSyncDaemon, runSyncPass, schema, setPeerProjectFilter, stripJsonComments, stripPrivateObj, stripTrailingCommas, syncPassPreflight, updatePeerAddresses, vacuumDatabase, writeCodememConfigFile } from "@codemem/core";
|
|
3
3
|
import { Command, Option } from "commander";
|
|
4
4
|
import omelette from "omelette";
|
|
5
5
|
import { existsSync, mkdirSync, readFileSync, rmSync, statSync, writeFileSync } from "node:fs";
|
|
@@ -1767,16 +1767,19 @@ function createInjectMemoryCommand() {
|
|
|
1767
1767
|
return cmd;
|
|
1768
1768
|
}
|
|
1769
1769
|
function createMemoryRoleReportCommand() {
|
|
1770
|
-
const cmd = new Command("role-report").configureHelp(helpStyle).description("Analyze inferred memory roles in a DB snapshot").option("--project <project>", "project identifier (defaults to git repo root)").option("--all-projects", "analyze across all projects").option("--probe <query>", "run a retrieval probe query against the snapshot", (value, prev) => [...prev, value], []).option("--inactive", "include inactive memories");
|
|
1770
|
+
const cmd = new Command("role-report").configureHelp(helpStyle).description("Analyze inferred memory roles in a DB snapshot").option("--project <project>", "project identifier (defaults to git repo root)").option("--all-projects", "analyze across all projects").option("--probe <query>", "run a retrieval probe query against the snapshot", (value, prev) => [...prev, value], []).option("--scenario <id>", "run a named injection-first eval scenario pack (can be repeated)", (value, prev) => [...prev, value], []).option("--inactive", "include inactive memories");
|
|
1771
1771
|
addDbOption(cmd);
|
|
1772
1772
|
addJsonOption(cmd);
|
|
1773
1773
|
cmd.action((opts) => {
|
|
1774
1774
|
const project = opts.allProjects === true ? null : opts.project?.trim() || process.env.CODEMEM_PROJECT?.trim() || resolveProject(process.cwd(), null);
|
|
1775
|
+
const invalidScenario = (opts.scenario ?? []).find((id) => getInjectionEvalScenarioPack(id) == null);
|
|
1776
|
+
if (invalidScenario) throw new Error(`Unknown eval scenario pack: ${invalidScenario}`);
|
|
1777
|
+
const probes = [...opts.probe ?? [], ...getInjectionEvalScenarioPrompts(opts.scenario ?? [])];
|
|
1775
1778
|
const result = getMemoryRoleReport(resolveDbOpt(opts), {
|
|
1776
1779
|
project,
|
|
1777
1780
|
allProjects: opts.allProjects === true,
|
|
1778
1781
|
includeInactive: opts.inactive === true,
|
|
1779
|
-
probes
|
|
1782
|
+
probes
|
|
1780
1783
|
});
|
|
1781
1784
|
if (opts.json) {
|
|
1782
1785
|
console.log(JSON.stringify(result, null, 2));
|
|
@@ -1800,17 +1803,23 @@ function createMemoryRoleReportCommand() {
|
|
|
1800
1803
|
p.log.message(` summary_unmapped ${result.summary_mapping.unmapped}`);
|
|
1801
1804
|
p.log.info("Project quality:");
|
|
1802
1805
|
for (const [bucket, count] of Object.entries(result.project_quality)) p.log.message(` ${bucket.padEnd(12)} ${String(count)}`);
|
|
1806
|
+
p.log.info("Session classes:");
|
|
1807
|
+
for (const [bucket, count] of Object.entries(result.session_class_buckets)) p.log.message(` ${bucket.padEnd(20)} ${String(count)}`);
|
|
1808
|
+
p.log.info("Summary dispositions:");
|
|
1809
|
+
for (const [bucket, count] of Object.entries(result.summary_disposition_buckets)) p.log.message(` ${bucket.padEnd(20)} ${String(count)}`);
|
|
1803
1810
|
if (result.probe_results.length > 0) {
|
|
1804
1811
|
p.log.info("Probe results:");
|
|
1805
1812
|
for (const probe of result.probe_results) {
|
|
1806
1813
|
p.log.message(` query: ${probe.query}`);
|
|
1814
|
+
if (probe.scenario_id) p.log.message(` scenario: ${probe.scenario_id} (${probe.scenario_category ?? "unknown"})${probe.scenario_title ? ` — ${probe.scenario_title}` : ""}`);
|
|
1807
1815
|
p.log.message(` mode: ${probe.mode}`);
|
|
1808
1816
|
p.log.message(` top roles: durable=${probe.top_role_counts.durable} recap=${probe.top_role_counts.recap} ephemeral=${probe.top_role_counts.ephemeral} general=${probe.top_role_counts.general}`);
|
|
1809
1817
|
p.log.message(` top mapping: mapped=${probe.top_mapping_counts.mapped} unmapped=${probe.top_mapping_counts.unmapped}`);
|
|
1810
1818
|
p.log.message(` burden: recap_share=${probe.top_burden.recap_share.toFixed(2)} unmapped_share=${probe.top_burden.unmapped_share.toFixed(2)} recap_unmapped_share=${probe.top_burden.recap_unmapped_share.toFixed(2)}`);
|
|
1811
1819
|
if (probe.simulated_demoted_unmapped_recap) p.log.message(` simulated demote-unmapped-recap burden: recap_share=${probe.simulated_demoted_unmapped_recap.top_burden.recap_share.toFixed(2)} unmapped_share=${probe.simulated_demoted_unmapped_recap.top_burden.unmapped_share.toFixed(2)} recap_unmapped_share=${probe.simulated_demoted_unmapped_recap.top_burden.recap_unmapped_share.toFixed(2)}`);
|
|
1812
1820
|
if (probe.simulated_demoted_unmapped_recap_and_ephemeral) p.log.message(` simulated demote-unmapped-recap+ephemeral burden: recap_share=${probe.simulated_demoted_unmapped_recap_and_ephemeral.top_burden.recap_share.toFixed(2)} unmapped_share=${probe.simulated_demoted_unmapped_recap_and_ephemeral.top_burden.unmapped_share.toFixed(2)} recap_unmapped_share=${probe.simulated_demoted_unmapped_recap_and_ephemeral.top_burden.recap_unmapped_share.toFixed(2)}`);
|
|
1813
|
-
|
|
1821
|
+
if (probe.scenario_score) p.log.message(` scenario score: mode_match=${probe.scenario_score.mode_match ? "yes" : "no"} top1_primary=${probe.scenario_score.primary_in_top1 ? "yes" : "no"} top3_primary=${probe.scenario_score.primary_in_top3_count} top1_anti=${probe.scenario_score.anti_signal_in_top1 ? "yes" : "no"} primary=${probe.scenario_score.primary_match_count} anti=${probe.scenario_score.anti_signal_count} recap=${probe.scenario_score.recap_count} unmapped_recap=${probe.scenario_score.unmapped_recap_count} chatter=${probe.scenario_score.administrative_chatter_count} net=${probe.scenario_score.score}`);
|
|
1822
|
+
for (const item of probe.items.slice(0, 5)) p.log.message(` [${item.id}] (${item.kind}/${item.role}/${item.mapping}/${item.session_class}/${item.summary_disposition}) ${item.title} — ${item.role_reason}`);
|
|
1814
1823
|
}
|
|
1815
1824
|
}
|
|
1816
1825
|
p.outro("done");
|
|
@@ -1818,14 +1827,18 @@ function createMemoryRoleReportCommand() {
|
|
|
1818
1827
|
return cmd;
|
|
1819
1828
|
}
|
|
1820
1829
|
function createMemoryRoleCompareCommand() {
|
|
1821
|
-
const cmd = new Command("role-compare").configureHelp(helpStyle).description("Compare inferred memory-role and probe metrics across two DB snapshots").argument("<baseline_db>", "baseline sqlite database path").argument("<candidate_db>", "candidate sqlite database path").option("--project <project>", "project identifier (defaults to git repo root)").option("--all-projects", "analyze across all projects").option("--probe <query>", "run a retrieval probe query against both snapshots", (value, prev) => [...prev, value], []).option("--inactive", "include inactive memories");
|
|
1830
|
+
const cmd = new Command("role-compare").configureHelp(helpStyle).description("Compare inferred memory-role and probe metrics across two DB snapshots").argument("<baseline_db>", "baseline sqlite database path").argument("<candidate_db>", "candidate sqlite database path").option("--project <project>", "project identifier (defaults to git repo root)").option("--all-projects", "analyze across all projects").option("--probe <query>", "run a retrieval probe query against both snapshots", (value, prev) => [...prev, value], []).option("--scenario <id>", "run a named injection-first eval scenario pack (can be repeated)", (value, prev) => [...prev, value], []).option("--inactive", "include inactive memories");
|
|
1822
1831
|
addJsonOption(cmd);
|
|
1823
1832
|
cmd.action((baselineDb, candidateDb, opts) => {
|
|
1833
|
+
const project = opts.allProjects === true ? null : opts.project?.trim() || process.env.CODEMEM_PROJECT?.trim() || resolveProject(process.cwd(), null);
|
|
1834
|
+
const invalidScenario = (opts.scenario ?? []).find((id) => getInjectionEvalScenarioPack(id) == null);
|
|
1835
|
+
if (invalidScenario) throw new Error(`Unknown eval scenario pack: ${invalidScenario}`);
|
|
1836
|
+
const probes = [...opts.probe ?? [], ...getInjectionEvalScenarioPrompts(opts.scenario ?? [])];
|
|
1824
1837
|
const result = compareMemoryRoleReports(baselineDb, candidateDb, {
|
|
1825
|
-
project
|
|
1838
|
+
project,
|
|
1826
1839
|
allProjects: opts.allProjects === true,
|
|
1827
1840
|
includeInactive: opts.inactive === true,
|
|
1828
|
-
probes
|
|
1841
|
+
probes
|
|
1829
1842
|
});
|
|
1830
1843
|
if (opts.json) {
|
|
1831
1844
|
console.log(JSON.stringify(result, null, 2));
|
|
@@ -1843,6 +1856,10 @@ function createMemoryRoleCompareCommand() {
|
|
|
1843
1856
|
].join("\n"));
|
|
1844
1857
|
p.log.info("Role deltas:");
|
|
1845
1858
|
for (const [role, count] of Object.entries(result.delta.counts_by_role)) p.log.message(` ${role.padEnd(10)} ${String(count)}`);
|
|
1859
|
+
p.log.info("Session class deltas:");
|
|
1860
|
+
for (const [bucket, count] of Object.entries(result.delta.session_class_buckets)) p.log.message(` ${bucket.padEnd(20)} ${String(count)}`);
|
|
1861
|
+
p.log.info("Summary disposition deltas:");
|
|
1862
|
+
for (const [bucket, count] of Object.entries(result.delta.summary_disposition_buckets)) p.log.message(` ${bucket.padEnd(20)} ${String(count)}`);
|
|
1846
1863
|
if (result.probe_comparisons.length > 0) {
|
|
1847
1864
|
p.log.info("Probe comparisons:");
|
|
1848
1865
|
for (const probe of result.probe_comparisons) {
|
|
@@ -1851,12 +1868,288 @@ function createMemoryRoleCompareCommand() {
|
|
|
1851
1868
|
p.log.message(` overlap: shared_top_keys=${probe.shared_item_keys.length} baseline_top=${probe.baseline_item_ids.slice(0, 5).join(",") || "-"} candidate_top=${probe.candidate_item_ids.slice(0, 5).join(",") || "-"}`);
|
|
1852
1869
|
if (probe.delta_top_burden) p.log.message(` burden delta: recap_share=${probe.delta_top_burden.recap_share.toFixed(2)} unmapped_share=${probe.delta_top_burden.unmapped_share.toFixed(2)} recap_unmapped_share=${probe.delta_top_burden.recap_unmapped_share.toFixed(2)}`);
|
|
1853
1870
|
if (probe.delta_top_mapping_counts) p.log.message(` mapping delta: mapped=${probe.delta_top_mapping_counts.mapped} unmapped=${probe.delta_top_mapping_counts.unmapped}`);
|
|
1871
|
+
if (probe.baseline_scenario_score || probe.candidate_scenario_score) p.log.message(` scenario scores: baseline=${probe.baseline_scenario_score?.score ?? "-"} candidate=${probe.candidate_scenario_score?.score ?? "-"}`);
|
|
1872
|
+
if (probe.delta_scenario_score) p.log.message(` scenario delta: mode_match=${probe.delta_scenario_score.mode_match ?? "-"} top1_primary=${probe.delta_scenario_score.primary_in_top1 ?? "-"} top3_primary=${probe.delta_scenario_score.primary_in_top3_count ?? "-"} top1_anti=${probe.delta_scenario_score.anti_signal_in_top1 ?? "-"} primary=${probe.delta_scenario_score.primary_match_count ?? "-"} anti=${probe.delta_scenario_score.anti_signal_count ?? "-"} recap=${probe.delta_scenario_score.recap_count ?? "-"} unmapped_recap=${probe.delta_scenario_score.unmapped_recap_count ?? "-"} chatter=${probe.delta_scenario_score.administrative_chatter_count ?? "-"} net=${probe.delta_scenario_score.score ?? "-"}`);
|
|
1854
1873
|
}
|
|
1855
1874
|
}
|
|
1856
1875
|
p.outro("done");
|
|
1857
1876
|
});
|
|
1858
1877
|
return cmd;
|
|
1859
1878
|
}
|
|
1879
|
+
function createMemoryExtractionReportCommand() {
|
|
1880
|
+
const cmd = new Command("extraction-report").configureHelp(helpStyle).description("Score extracted memories for a session against a built-in extraction eval rubric").option("--session-id <id>", "session ID to evaluate").option("--batch-id <id>", "raw-event flush batch ID to evaluate").requiredOption("--scenario <id>", "built-in extraction eval scenario ID").option("--inactive", "include inactive memories");
|
|
1881
|
+
addDbOption(cmd);
|
|
1882
|
+
addJsonOption(cmd);
|
|
1883
|
+
cmd.action((opts) => {
|
|
1884
|
+
const sessionIdInput = opts.sessionId?.trim() ?? "";
|
|
1885
|
+
const batchIdInput = opts.batchId?.trim() ?? "";
|
|
1886
|
+
const hasSessionId = sessionIdInput.length > 0;
|
|
1887
|
+
const hasBatchId = batchIdInput.length > 0;
|
|
1888
|
+
if (hasSessionId === hasBatchId) throw new Error("Provide exactly one of --session-id or --batch-id");
|
|
1889
|
+
const sessionId = hasSessionId ? parseStrictPositiveId(sessionIdInput) : null;
|
|
1890
|
+
if (hasSessionId && sessionId === null) throw new Error(`Invalid session ID: ${sessionIdInput || opts.sessionId}`);
|
|
1891
|
+
const batchId = hasBatchId ? parseStrictPositiveId(batchIdInput) : null;
|
|
1892
|
+
if (hasBatchId && batchId === null) throw new Error(`Invalid batch ID: ${batchIdInput || opts.batchId}`);
|
|
1893
|
+
const scenarioId = opts.scenario?.trim() ?? "";
|
|
1894
|
+
const scenario = getSessionExtractionEvalScenario(scenarioId);
|
|
1895
|
+
if (!scenario) throw new Error(`Unknown extraction eval scenario: ${scenarioId || opts.scenario}`);
|
|
1896
|
+
const result = batchId != null ? getSessionExtractionEval(resolveDbOpt(opts), {
|
|
1897
|
+
batchId,
|
|
1898
|
+
scenarioId: scenario.id,
|
|
1899
|
+
includeInactive: opts.inactive === true
|
|
1900
|
+
}) : getSessionExtractionEval(resolveDbOpt(opts), {
|
|
1901
|
+
sessionId,
|
|
1902
|
+
scenarioId: scenario.id,
|
|
1903
|
+
includeInactive: opts.inactive === true
|
|
1904
|
+
});
|
|
1905
|
+
if (opts.json) {
|
|
1906
|
+
console.log(JSON.stringify(result, null, 2));
|
|
1907
|
+
return;
|
|
1908
|
+
}
|
|
1909
|
+
p.intro("codemem memory extraction-report");
|
|
1910
|
+
p.log.info([
|
|
1911
|
+
`Scenario: ${result.scenario.id} — ${result.scenario.title}`,
|
|
1912
|
+
`Target: ${result.target.type}${result.target.batchId != null ? ` #${result.target.batchId}` : ""}`,
|
|
1913
|
+
`Session: ${result.session.id} (${result.session.project ?? "no-project"})`,
|
|
1914
|
+
`Session class: ${result.session.sessionClass}`,
|
|
1915
|
+
`Summary disposition: ${result.session.summaryDisposition}`
|
|
1916
|
+
].join("\n"));
|
|
1917
|
+
p.log.info([
|
|
1918
|
+
`Pass: ${result.pass ? "yes" : "no"}`,
|
|
1919
|
+
`Summary count: ${result.counts.summaries}`,
|
|
1920
|
+
`Observation count: ${result.counts.observations}`,
|
|
1921
|
+
`Summary thread coverage: ${result.coverage.summaryThreadCoverage}`,
|
|
1922
|
+
`Observation thread coverage: ${result.coverage.observationThreadCoverage}`,
|
|
1923
|
+
`Total thread coverage: ${result.coverage.totalThreadCoverage}`,
|
|
1924
|
+
`Duplicate observation threads: ${result.coverage.duplicateObservationThreads}`
|
|
1925
|
+
].join("\n"));
|
|
1926
|
+
if (result.failureReasons.length > 0) {
|
|
1927
|
+
p.log.warn("Failure reasons:");
|
|
1928
|
+
for (const reason of result.failureReasons) p.log.message(` - ${reason}`);
|
|
1929
|
+
}
|
|
1930
|
+
p.log.info("Thread coverage:");
|
|
1931
|
+
for (const thread of result.threads) p.log.message(` ${thread.id.padEnd(22)} summary=${thread.summaryMatch ? "yes" : "no"} observations=${thread.observationMatch ? "yes" : "no"}`);
|
|
1932
|
+
p.outro("done");
|
|
1933
|
+
});
|
|
1934
|
+
return cmd;
|
|
1935
|
+
}
|
|
1936
|
+
function createMemoryExtractionReplayCommand() {
|
|
1937
|
+
const cmd = new Command("extraction-replay").configureHelp(helpStyle).description("Re-run the observer on a historical flush batch without persisting, then score the fresh output").requiredOption("--batch-id <id>", "raw-event flush batch ID to replay").option("--transcript-budget <chars>", "override replay transcript budget in characters (replay only)").option("--observer-tier-routing", "use replay-only benchmark-backed observer tier routing").option("--observer-temperature <value>", "override observer temperature for replay only").option("--openai-responses", "use OpenAI Responses API for replay only").option("--reasoning-effort <level>", "set OpenAI reasoning.effort for replay only (responses path)").option("--reasoning-summary <mode>", "set OpenAI reasoning.summary for replay only (responses path)").option("--max-output-tokens <n>", "override OpenAI max_output_tokens for replay only (responses path)").requiredOption("--scenario <id>", "built-in extraction eval scenario ID");
|
|
1938
|
+
addDbOption(cmd);
|
|
1939
|
+
addJsonOption(cmd);
|
|
1940
|
+
cmd.action(async (opts) => {
|
|
1941
|
+
const batchIdInput = opts.batchId?.trim() ?? "";
|
|
1942
|
+
const batchId = parseStrictPositiveId(batchIdInput);
|
|
1943
|
+
if (batchId === null) throw new Error(`Invalid batch ID: ${batchIdInput || opts.batchId}`);
|
|
1944
|
+
const scenarioId = opts.scenario?.trim() ?? "";
|
|
1945
|
+
const scenario = getSessionExtractionEvalScenario(scenarioId);
|
|
1946
|
+
if (!scenario) throw new Error(`Unknown extraction eval scenario: ${scenarioId || opts.scenario}`);
|
|
1947
|
+
const transcriptBudgetInput = opts.transcriptBudget?.trim() ?? "";
|
|
1948
|
+
const transcriptBudget = transcriptBudgetInput.length > 0 ? parseStrictPositiveId(transcriptBudgetInput) : null;
|
|
1949
|
+
if (transcriptBudgetInput.length > 0 && transcriptBudget === null) throw new Error(`Invalid transcript budget: ${transcriptBudgetInput || opts.transcriptBudget}`);
|
|
1950
|
+
const observerTemperatureInput = opts.observerTemperature?.trim() ?? "";
|
|
1951
|
+
let observerTemperature;
|
|
1952
|
+
if (observerTemperatureInput.length > 0) {
|
|
1953
|
+
const parsed = Number(observerTemperatureInput);
|
|
1954
|
+
if (!Number.isFinite(parsed)) throw new Error(`Invalid observer temperature: ${observerTemperatureInput || opts.observerTemperature}`);
|
|
1955
|
+
observerTemperature = parsed;
|
|
1956
|
+
}
|
|
1957
|
+
const maxOutputTokensInput = opts.maxOutputTokens?.trim() ?? "";
|
|
1958
|
+
const maxOutputTokens = maxOutputTokensInput.length > 0 ? parseStrictPositiveId(maxOutputTokensInput) : null;
|
|
1959
|
+
if (maxOutputTokensInput.length > 0 && maxOutputTokens === null) throw new Error(`Invalid max output tokens: ${maxOutputTokensInput || opts.maxOutputTokens}`);
|
|
1960
|
+
const observerConfig = loadObserverConfig();
|
|
1961
|
+
const observerConfigWithOverrides = {
|
|
1962
|
+
...observerConfig,
|
|
1963
|
+
observerTemperature: observerTemperature ?? observerConfig.observerTemperature,
|
|
1964
|
+
observerOpenAIUseResponses: opts.openaiResponses === true,
|
|
1965
|
+
observerReasoningEffort: opts.reasoningEffort?.trim() || null,
|
|
1966
|
+
observerReasoningSummary: opts.reasoningSummary?.trim() || null,
|
|
1967
|
+
observerMaxOutputTokens: maxOutputTokens ?? observerConfig.observerMaxTokens
|
|
1968
|
+
};
|
|
1969
|
+
const observer = new ObserverClient(observerConfigWithOverrides);
|
|
1970
|
+
const result = opts.observerTierRouting === true ? await replayBatchExtractionWithTierRouting(resolveDbOpt(opts), observerConfigWithOverrides, {
|
|
1971
|
+
batchId,
|
|
1972
|
+
scenarioId: scenario.id,
|
|
1973
|
+
transcriptBudget: transcriptBudget ?? void 0
|
|
1974
|
+
}) : await replayBatchExtraction(resolveDbOpt(opts), observer, {
|
|
1975
|
+
batchId,
|
|
1976
|
+
scenarioId: scenario.id,
|
|
1977
|
+
transcriptBudget: transcriptBudget ?? void 0
|
|
1978
|
+
});
|
|
1979
|
+
if (opts.json) {
|
|
1980
|
+
console.log(JSON.stringify(result, null, 2));
|
|
1981
|
+
return;
|
|
1982
|
+
}
|
|
1983
|
+
p.intro("codemem memory extraction-replay");
|
|
1984
|
+
p.log.info([
|
|
1985
|
+
`Scenario: ${result.scenario.id} — ${result.scenario.title}`,
|
|
1986
|
+
`Batch: ${result.target.batchId}`,
|
|
1987
|
+
`Session: ${result.target.sessionId}`,
|
|
1988
|
+
`Observer: ${result.observer.provider}/${result.observer.model}`,
|
|
1989
|
+
`Tier: ${result.observer.tier ?? "manual"}`,
|
|
1990
|
+
`OpenAI Responses: ${result.observer.openaiUseResponses ? "yes" : "no"}`,
|
|
1991
|
+
`Reasoning effort: ${result.observer.reasoningEffort ?? "none"}`,
|
|
1992
|
+
`Classification: ${result.classification.status}`,
|
|
1993
|
+
`Pass: ${result.evaluation.pass ? "yes" : "no"}`
|
|
1994
|
+
].join("\n"));
|
|
1995
|
+
if (result.classification.reason) p.log.message(`Classification reason: ${result.classification.reason}`);
|
|
1996
|
+
if (result.evaluation.failureReasons.length > 0) {
|
|
1997
|
+
p.log.warn("Failure reasons:");
|
|
1998
|
+
for (const reason of result.evaluation.failureReasons) p.log.message(` - ${reason}`);
|
|
1999
|
+
}
|
|
2000
|
+
p.log.info([
|
|
2001
|
+
`Fresh summaries: ${result.evaluation.counts.summaries}`,
|
|
2002
|
+
`Fresh observations: ${result.evaluation.counts.observations}`,
|
|
2003
|
+
`Summary thread coverage: ${result.evaluation.coverage.summaryThreadCoverage}`,
|
|
2004
|
+
`Observation thread coverage: ${result.evaluation.coverage.observationThreadCoverage}`,
|
|
2005
|
+
`Total thread coverage: ${result.evaluation.coverage.totalThreadCoverage}`
|
|
2006
|
+
].join("\n"));
|
|
2007
|
+
p.outro("done");
|
|
2008
|
+
});
|
|
2009
|
+
return cmd;
|
|
2010
|
+
}
|
|
2011
|
+
function createMemoryExtractionBenchmarkCommand() {
|
|
2012
|
+
const cmd = new Command("extraction-benchmark").configureHelp(helpStyle).description("Run the formal extraction replay benchmark set and print a cost/quality scoreboard").requiredOption("--benchmark <id>", "benchmark profile id").option("--observer-provider <provider>", "override observer provider for this benchmark run").option("--observer-model <model>", "override observer model for this benchmark run").option("--observer-tier-routing", "use replay-only benchmark-backed observer tier routing").option("--openai-responses", "use OpenAI Responses API for this benchmark run").option("--reasoning-effort <level>", "set OpenAI reasoning.effort for this benchmark run (responses path)").option("--reasoning-summary <mode>", "set OpenAI reasoning.summary for this benchmark run (responses path)").option("--max-output-tokens <n>", "override OpenAI max_output_tokens for this benchmark run (responses path)").option("--observer-temperature <value>", "override observer temperature for this benchmark run").option("--transcript-budget <chars>", "override replay transcript budget in characters for this benchmark run");
|
|
2013
|
+
addDbOption(cmd);
|
|
2014
|
+
addJsonOption(cmd);
|
|
2015
|
+
cmd.action(async (opts) => {
|
|
2016
|
+
const benchmarkId = opts.benchmark?.trim() ?? "";
|
|
2017
|
+
const benchmark = getExtractionBenchmarkProfile(benchmarkId);
|
|
2018
|
+
if (!benchmark) throw new Error(`Unknown extraction benchmark: ${benchmarkId || opts.benchmark}`);
|
|
2019
|
+
const transcriptBudgetInput = opts.transcriptBudget?.trim() ?? "";
|
|
2020
|
+
const transcriptBudget = transcriptBudgetInput.length > 0 ? parseStrictPositiveId(transcriptBudgetInput) : null;
|
|
2021
|
+
if (transcriptBudgetInput.length > 0 && transcriptBudget === null) throw new Error(`Invalid transcript budget: ${transcriptBudgetInput || opts.transcriptBudget}`);
|
|
2022
|
+
const observerTemperatureInput = opts.observerTemperature?.trim() ?? "";
|
|
2023
|
+
let observerTemperature;
|
|
2024
|
+
if (observerTemperatureInput.length > 0) {
|
|
2025
|
+
const parsed = Number(observerTemperatureInput);
|
|
2026
|
+
if (!Number.isFinite(parsed)) throw new Error(`Invalid observer temperature: ${observerTemperatureInput || opts.observerTemperature}`);
|
|
2027
|
+
observerTemperature = parsed;
|
|
2028
|
+
}
|
|
2029
|
+
const maxOutputTokensInput = opts.maxOutputTokens?.trim() ?? "";
|
|
2030
|
+
const maxOutputTokens = maxOutputTokensInput.length > 0 ? parseStrictPositiveId(maxOutputTokensInput) : null;
|
|
2031
|
+
if (maxOutputTokensInput.length > 0 && maxOutputTokens === null) throw new Error(`Invalid max output tokens: ${maxOutputTokensInput || opts.maxOutputTokens}`);
|
|
2032
|
+
const observerConfig = loadObserverConfig();
|
|
2033
|
+
const observerConfigWithOverrides = {
|
|
2034
|
+
...observerConfig,
|
|
2035
|
+
observerProvider: opts.observerProvider?.trim() || observerConfig.observerProvider,
|
|
2036
|
+
observerModel: opts.observerModel?.trim() || observerConfig.observerModel,
|
|
2037
|
+
observerTemperature: observerTemperature ?? observerConfig.observerTemperature,
|
|
2038
|
+
observerOpenAIUseResponses: opts.openaiResponses === true,
|
|
2039
|
+
observerReasoningEffort: opts.reasoningEffort?.trim() || null,
|
|
2040
|
+
observerReasoningSummary: opts.reasoningSummary?.trim() || null,
|
|
2041
|
+
observerMaxOutputTokens: maxOutputTokens ?? observerConfig.observerMaxTokens
|
|
2042
|
+
};
|
|
2043
|
+
const observer = new ObserverClient(observerConfigWithOverrides);
|
|
2044
|
+
const runs = [];
|
|
2045
|
+
for (const batch of benchmark.batches) {
|
|
2046
|
+
const scenarioId = batch.scenarioId ?? benchmark.scenarioId;
|
|
2047
|
+
const result = opts.observerTierRouting === true ? await replayBatchExtractionWithTierRouting(resolveDbOpt(opts), observerConfigWithOverrides, {
|
|
2048
|
+
batchId: batch.batchId,
|
|
2049
|
+
scenarioId,
|
|
2050
|
+
transcriptBudget: transcriptBudget ?? void 0
|
|
2051
|
+
}) : await replayBatchExtraction(resolveDbOpt(opts), observer, {
|
|
2052
|
+
batchId: batch.batchId,
|
|
2053
|
+
scenarioId,
|
|
2054
|
+
transcriptBudget: transcriptBudget ?? void 0
|
|
2055
|
+
});
|
|
2056
|
+
runs.push({
|
|
2057
|
+
batchId: batch.batchId,
|
|
2058
|
+
sessionId: batch.sessionId,
|
|
2059
|
+
label: batch.label,
|
|
2060
|
+
purpose: batch.purpose,
|
|
2061
|
+
complexity: batch.complexity,
|
|
2062
|
+
scenarioId,
|
|
2063
|
+
expectedTier: batch.expectedTier ?? null,
|
|
2064
|
+
analysis: {
|
|
2065
|
+
eventSpan: result.analysis.eventSpan,
|
|
2066
|
+
promptCount: result.analysis.promptCount,
|
|
2067
|
+
toolCount: result.analysis.toolCount,
|
|
2068
|
+
transcriptLength: result.analysis.transcriptLength
|
|
2069
|
+
},
|
|
2070
|
+
status: result.classification.status,
|
|
2071
|
+
reason: result.classification.reason,
|
|
2072
|
+
tier: result.observer.tier ?? "manual",
|
|
2073
|
+
provider: result.observer.provider,
|
|
2074
|
+
model: result.observer.model,
|
|
2075
|
+
openaiUseResponses: result.observer.openaiUseResponses,
|
|
2076
|
+
reasoningEffort: result.observer.reasoningEffort,
|
|
2077
|
+
reasoningSummary: result.observer.reasoningSummary,
|
|
2078
|
+
maxOutputTokens: result.observer.maxOutputTokens,
|
|
2079
|
+
temperature: result.observer.temperature,
|
|
2080
|
+
summaries: result.evaluation.counts.summaries,
|
|
2081
|
+
observations: result.evaluation.counts.observations,
|
|
2082
|
+
repairApplied: result.observer.repairApplied
|
|
2083
|
+
});
|
|
2084
|
+
}
|
|
2085
|
+
const summary = {
|
|
2086
|
+
total: runs.length,
|
|
2087
|
+
shapeQualityTotal: runs.filter((run) => run.purpose === "shape_quality").length,
|
|
2088
|
+
shapeQualityPasses: runs.filter((run) => run.purpose === "shape_quality" && run.status === "pass").length,
|
|
2089
|
+
shapeQualityFails: runs.filter((run) => run.purpose === "shape_quality" && run.status === "shape_fail").length,
|
|
2090
|
+
expectedTierTotal: runs.filter((run) => run.expectedTier != null).length,
|
|
2091
|
+
expectedTierMatches: runs.filter((run) => run.expectedTier != null && run.expectedTier === run.tier).length,
|
|
2092
|
+
robustnessNoOutput: runs.filter((run) => run.status === "observer_no_output").length
|
|
2093
|
+
};
|
|
2094
|
+
const uniqueObserverKeys = Array.from(new Set(runs.map((run) => `${run.provider}::${run.model}::${run.openaiUseResponses ? "responses" : "chat"}`)));
|
|
2095
|
+
const observerSummary = opts.observerTierRouting === true ? {
|
|
2096
|
+
provider: uniqueObserverKeys.length === 1 ? runs[0]?.provider ?? observer.provider : "mixed",
|
|
2097
|
+
model: uniqueObserverKeys.length === 1 ? runs[0]?.model ?? observer.model : "mixed",
|
|
2098
|
+
tierRouting: true,
|
|
2099
|
+
openaiUseResponses: uniqueObserverKeys.length === 1 ? runs[0]?.openaiUseResponses ?? observer.openaiUseResponses : null,
|
|
2100
|
+
reasoningEffort: uniqueObserverKeys.length === 1 ? runs[0]?.reasoningEffort ?? observer.reasoningEffort : "mixed",
|
|
2101
|
+
reasoningSummary: uniqueObserverKeys.length === 1 ? runs[0]?.reasoningSummary ?? observer.reasoningSummary : "mixed",
|
|
2102
|
+
maxOutputTokens: uniqueObserverKeys.length === 1 ? runs[0]?.maxOutputTokens ?? observer.maxOutputTokens : null,
|
|
2103
|
+
temperature: uniqueObserverKeys.length === 1 ? runs[0]?.temperature ?? observer.temperature : null,
|
|
2104
|
+
transcriptBudget: transcriptBudget ?? null,
|
|
2105
|
+
selectedObservers: uniqueObserverKeys
|
|
2106
|
+
} : {
|
|
2107
|
+
provider: observer.provider,
|
|
2108
|
+
model: observer.model,
|
|
2109
|
+
tierRouting: false,
|
|
2110
|
+
openaiUseResponses: observer.openaiUseResponses,
|
|
2111
|
+
reasoningEffort: observer.reasoningEffort,
|
|
2112
|
+
reasoningSummary: observer.reasoningSummary,
|
|
2113
|
+
maxOutputTokens: observer.maxOutputTokens,
|
|
2114
|
+
temperature: observer.temperature,
|
|
2115
|
+
transcriptBudget: transcriptBudget ?? null,
|
|
2116
|
+
selectedObservers: uniqueObserverKeys
|
|
2117
|
+
};
|
|
2118
|
+
const output = {
|
|
2119
|
+
benchmark: {
|
|
2120
|
+
id: benchmark.id,
|
|
2121
|
+
title: benchmark.title,
|
|
2122
|
+
scenarioId: benchmark.scenarioId
|
|
2123
|
+
},
|
|
2124
|
+
observer: observerSummary,
|
|
2125
|
+
summary,
|
|
2126
|
+
runs
|
|
2127
|
+
};
|
|
2128
|
+
if (opts.json) {
|
|
2129
|
+
console.log(JSON.stringify(output, null, 2));
|
|
2130
|
+
return;
|
|
2131
|
+
}
|
|
2132
|
+
p.intro("codemem memory extraction-benchmark");
|
|
2133
|
+
p.log.info([
|
|
2134
|
+
`Benchmark: ${benchmark.id} — ${benchmark.title}`,
|
|
2135
|
+
`Observer: ${observerSummary.provider}/${observerSummary.model}`,
|
|
2136
|
+
`Tier routing: ${opts.observerTierRouting === true ? "yes" : "no"}`,
|
|
2137
|
+
`OpenAI Responses: ${observerSummary.openaiUseResponses === null ? "mixed" : observerSummary.openaiUseResponses ? "yes" : "no"}`,
|
|
2138
|
+
`Reasoning effort: ${observerSummary.reasoningEffort ?? "none"}`,
|
|
2139
|
+
`Reasoning summary: ${observerSummary.reasoningSummary ?? "none"}`,
|
|
2140
|
+
`Max output tokens: ${observerSummary.maxOutputTokens ?? "mixed"}`,
|
|
2141
|
+
`Temperature: ${observerSummary.temperature ?? "mixed"}`,
|
|
2142
|
+
`Transcript budget override: ${transcriptBudget ?? "default"}`,
|
|
2143
|
+
`Shape-quality passes: ${summary.shapeQualityPasses}/${summary.shapeQualityTotal}`,
|
|
2144
|
+
`Shape-quality fails: ${summary.shapeQualityFails}`,
|
|
2145
|
+
`Expected-tier matches: ${summary.expectedTierMatches}/${summary.expectedTierTotal}`,
|
|
2146
|
+
`Observer no-output cases: ${summary.robustnessNoOutput}`
|
|
2147
|
+
].join("\n"));
|
|
2148
|
+
for (const run of runs) p.log.message(` [${run.batchId}] ${run.status.padEnd(18)} ${run.complexity.padEnd(10)} tier=${run.tier.padEnd(6)} expected=${(run.expectedTier ?? "n/a").padEnd(6)} span=${String(run.analysis.eventSpan).padEnd(3)} prompts=${run.analysis.promptCount} tools=${String(run.analysis.toolCount).padEnd(2)} transcript=${run.analysis.transcriptLength} ${run.provider}/${run.model}${run.openaiUseResponses ? " [responses]" : ""} summaries=${run.summaries} observations=${run.observations} repair=${run.repairApplied ? "yes" : "no"} — ${run.label}`);
|
|
2149
|
+
p.outro("done");
|
|
2150
|
+
});
|
|
2151
|
+
return cmd;
|
|
2152
|
+
}
|
|
1860
2153
|
function createMemoryRelinkReportCommand() {
|
|
1861
2154
|
const cmd = new Command("relink-report").configureHelp(helpStyle).description("Analyze dry-run raw-event session relinking and compaction opportunities").option("--project <project>", "project identifier (defaults to git repo root)").option("--all-projects", "analyze across all projects").option("--limit <n>", "max groups to print", "25");
|
|
1862
2155
|
addDbOption(cmd);
|
|
@@ -1935,6 +2228,9 @@ memoryCommand.addCommand(createRememberMemoryCommand());
|
|
|
1935
2228
|
memoryCommand.addCommand(createInjectMemoryCommand());
|
|
1936
2229
|
memoryCommand.addCommand(createMemoryRoleReportCommand());
|
|
1937
2230
|
memoryCommand.addCommand(createMemoryRoleCompareCommand());
|
|
2231
|
+
memoryCommand.addCommand(createMemoryExtractionReportCommand());
|
|
2232
|
+
memoryCommand.addCommand(createMemoryExtractionReplayCommand());
|
|
2233
|
+
memoryCommand.addCommand(createMemoryExtractionBenchmarkCommand());
|
|
1938
2234
|
memoryCommand.addCommand(createMemoryRelinkReportCommand());
|
|
1939
2235
|
memoryCommand.addCommand(createMemoryRelinkPlanCommand());
|
|
1940
2236
|
//#endregion
|