@veewo/gitnexus 1.5.0 → 1.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/dist/benchmark/agent-context/runner.js +3 -0
  2. package/dist/benchmark/agent-context/runner.test.js +22 -0
  3. package/dist/benchmark/agent-context/tool-runner.d.ts +7 -6
  4. package/dist/benchmark/agent-safe-query-context/io.d.ts +2 -0
  5. package/dist/benchmark/agent-safe-query-context/io.js +86 -0
  6. package/dist/benchmark/agent-safe-query-context/io.test.d.ts +1 -0
  7. package/dist/benchmark/agent-safe-query-context/io.test.js +13 -0
  8. package/dist/benchmark/agent-safe-query-context/report.d.ts +57 -0
  9. package/dist/benchmark/agent-safe-query-context/report.js +159 -0
  10. package/dist/benchmark/agent-safe-query-context/report.test.d.ts +1 -0
  11. package/dist/benchmark/agent-safe-query-context/report.test.js +362 -0
  12. package/dist/benchmark/agent-safe-query-context/runner.d.ts +44 -0
  13. package/dist/benchmark/agent-safe-query-context/runner.js +406 -0
  14. package/dist/benchmark/agent-safe-query-context/runner.test.d.ts +1 -0
  15. package/dist/benchmark/agent-safe-query-context/runner.test.js +290 -0
  16. package/dist/benchmark/agent-safe-query-context/semantic-tuple.d.ts +20 -0
  17. package/dist/benchmark/agent-safe-query-context/semantic-tuple.js +225 -0
  18. package/dist/benchmark/agent-safe-query-context/semantic-tuple.test.d.ts +1 -0
  19. package/dist/benchmark/agent-safe-query-context/semantic-tuple.test.js +122 -0
  20. package/dist/benchmark/agent-safe-query-context/subagent-live.d.ts +47 -0
  21. package/dist/benchmark/agent-safe-query-context/subagent-live.js +128 -0
  22. package/dist/benchmark/agent-safe-query-context/subagent-live.test.d.ts +1 -0
  23. package/dist/benchmark/agent-safe-query-context/subagent-live.test.js +155 -0
  24. package/dist/benchmark/agent-safe-query-context/telemetry-tool.d.ts +9 -0
  25. package/dist/benchmark/agent-safe-query-context/telemetry-tool.js +77 -0
  26. package/dist/benchmark/agent-safe-query-context/types.d.ts +61 -0
  27. package/dist/benchmark/agent-safe-query-context/types.js +8 -0
  28. package/dist/benchmark/runtime-poc/provenance-artifact.d.ts +47 -0
  29. package/dist/benchmark/runtime-poc/provenance-artifact.js +89 -0
  30. package/dist/benchmark/runtime-poc/runner.d.ts +31 -0
  31. package/dist/benchmark/runtime-poc/runner.js +163 -0
  32. package/dist/benchmark/u2-e2e/hydration-policy-repeatability-runner.d.ts +8 -0
  33. package/dist/benchmark/u2-e2e/hydration-policy-repeatability-runner.js +21 -0
  34. package/dist/benchmark/u2-e2e/phase2-runtime-claim-acceptance-runner.d.ts +0 -1
  35. package/dist/benchmark/u2-e2e/phase2-runtime-claim-acceptance-runner.js +53 -51
  36. package/dist/benchmark/u2-e2e/phase2-runtime-claim-acceptance-runner.test.js +0 -1
  37. package/dist/benchmark/u2-e2e/phase5-rule-lab-acceptance-runner.d.ts +1 -1
  38. package/dist/benchmark/u2-e2e/phase5-rule-lab-acceptance-runner.js +82 -18
  39. package/dist/benchmark/u2-e2e/phase5-rule-lab-acceptance-runner.test.js +1 -2
  40. package/dist/benchmark/u2-e2e/retrieval-runner.js +15 -7
  41. package/dist/benchmark/u2-e2e/retrieval-runner.test.js +46 -0
  42. package/dist/cli/ai-context.js +2 -12
  43. package/dist/cli/ai-context.test.js +8 -0
  44. package/dist/cli/analyze-runtime-summary.js +1 -0
  45. package/dist/cli/analyze-runtime-summary.test.js +2 -0
  46. package/dist/cli/analyze-summary.d.ts +2 -0
  47. package/dist/cli/analyze-summary.js +24 -0
  48. package/dist/cli/analyze-summary.test.js +65 -1
  49. package/dist/cli/analyze.js +5 -1
  50. package/dist/cli/benchmark-agent-safe-query-context.d.ts +20 -0
  51. package/dist/cli/benchmark-agent-safe-query-context.js +39 -0
  52. package/dist/cli/benchmark-agent-safe-query-context.test.d.ts +1 -0
  53. package/dist/cli/benchmark-agent-safe-query-context.test.js +271 -0
  54. package/dist/cli/benchmark.d.ts +29 -0
  55. package/dist/cli/benchmark.js +55 -0
  56. package/dist/cli/index.js +23 -0
  57. package/dist/cli/rule-lab.d.ts +3 -7
  58. package/dist/cli/rule-lab.js +13 -22
  59. package/dist/cli/rule-lab.test.js +23 -3
  60. package/dist/cli/tool.d.ts +2 -0
  61. package/dist/cli/tool.js +2 -0
  62. package/dist/core/config/unity-config.d.ts +0 -1
  63. package/dist/core/config/unity-config.js +0 -1
  64. package/dist/core/ingestion/pipeline.js +35 -6
  65. package/dist/core/ingestion/unity-lifecycle-synthetic-calls.test.js +18 -20
  66. package/dist/core/ingestion/unity-parity-seed.d.ts +2 -1
  67. package/dist/core/ingestion/unity-parity-seed.js +8 -0
  68. package/dist/core/ingestion/unity-resource-processor.d.ts +11 -0
  69. package/dist/core/ingestion/unity-resource-processor.js +102 -0
  70. package/dist/core/ingestion/unity-resource-processor.test.js +449 -0
  71. package/dist/core/ingestion/unity-runtime-binding-rules.d.ts +15 -0
  72. package/dist/core/ingestion/unity-runtime-binding-rules.js +178 -30
  73. package/dist/core/lbug/csv-generator.test.js +2 -2
  74. package/dist/core/unity/doc-contract.test.d.ts +1 -0
  75. package/dist/core/unity/doc-contract.test.js +30 -0
  76. package/dist/core/unity/prefab-source-scan.d.ts +25 -0
  77. package/dist/core/unity/prefab-source-scan.js +152 -0
  78. package/dist/core/unity/prefab-source-scan.test.d.ts +1 -0
  79. package/dist/core/unity/prefab-source-scan.test.js +70 -0
  80. package/dist/core/unity/scan-context.d.ts +12 -0
  81. package/dist/core/unity/scan-context.js +50 -2
  82. package/dist/core/unity/scan-context.test.js +74 -0
  83. package/dist/mcp/local/agent-safe-response.d.ts +10 -0
  84. package/dist/mcp/local/agent-safe-response.js +639 -0
  85. package/dist/mcp/local/derived-process-reader.js +1 -1
  86. package/dist/mcp/local/local-backend.d.ts +18 -1
  87. package/dist/mcp/local/local-backend.js +319 -125
  88. package/dist/mcp/local/process-confidence.d.ts +1 -2
  89. package/dist/mcp/local/process-confidence.js +0 -3
  90. package/dist/mcp/local/process-confidence.test.js +4 -2
  91. package/dist/mcp/local/process-evidence.d.ts +1 -8
  92. package/dist/mcp/local/process-evidence.js +1 -23
  93. package/dist/mcp/local/process-evidence.test.js +2 -16
  94. package/dist/mcp/local/process-ref.d.ts +1 -1
  95. package/dist/mcp/local/runtime-chain-closure-evaluator.d.ts +33 -0
  96. package/dist/mcp/local/runtime-chain-closure-evaluator.js +273 -0
  97. package/dist/mcp/local/runtime-chain-graph-candidates.d.ts +23 -0
  98. package/dist/mcp/local/runtime-chain-graph-candidates.js +131 -0
  99. package/dist/mcp/local/runtime-chain-verify.d.ts +1 -1
  100. package/dist/mcp/local/runtime-chain-verify.js +149 -138
  101. package/dist/mcp/local/runtime-chain-verify.test.js +126 -68
  102. package/dist/mcp/local/runtime-claim-rule-registry.d.ts +4 -0
  103. package/dist/mcp/local/runtime-claim-rule-registry.js +4 -0
  104. package/dist/mcp/local/runtime-claim-rule-registry.test.js +37 -4
  105. package/dist/mcp/local/runtime-claim.d.ts +11 -0
  106. package/dist/mcp/local/runtime-claim.js +28 -0
  107. package/dist/mcp/local/unity-evidence-view.d.ts +1 -1
  108. package/dist/mcp/local/unity-evidence-view.js +1 -1
  109. package/dist/mcp/local/unity-evidence-view.test.js +22 -0
  110. package/dist/mcp/tools.js +51 -21
  111. package/dist/rule-lab/analyze.d.ts +2 -1
  112. package/dist/rule-lab/analyze.js +94 -59
  113. package/dist/rule-lab/analyze.test.js +238 -20
  114. package/dist/rule-lab/curate.d.ts +2 -1
  115. package/dist/rule-lab/curate.js +24 -3
  116. package/dist/rule-lab/curate.test.js +65 -0
  117. package/dist/rule-lab/curation-input-builder.d.ts +45 -0
  118. package/dist/rule-lab/curation-input-builder.js +133 -0
  119. package/dist/rule-lab/promote.js +80 -7
  120. package/dist/rule-lab/promote.test.js +150 -0
  121. package/dist/rule-lab/review-pack.d.ts +3 -0
  122. package/dist/rule-lab/review-pack.js +41 -1
  123. package/dist/rule-lab/review-pack.test.js +67 -0
  124. package/dist/rule-lab/types.d.ts +29 -0
  125. package/dist/types/pipeline.d.ts +3 -0
  126. package/package.json +4 -3
  127. package/scripts/run-node-tests.mjs +61 -0
  128. package/skills/_shared/unity-rule-authoring-contract.md +64 -0
  129. package/skills/_shared/unity-runtime-process-contract.md +16 -0
  130. package/skills/gitnexus-cli.md +8 -0
  131. package/skills/gitnexus-debugging.md +9 -0
  132. package/skills/gitnexus-exploring.md +66 -18
  133. package/skills/gitnexus-guide.md +42 -3
  134. package/skills/gitnexus-impact-analysis.md +8 -0
  135. package/skills/gitnexus-pr-review.md +8 -0
  136. package/skills/gitnexus-refactoring.md +8 -0
  137. package/skills/gitnexus-unity-rule-gen.md +66 -312
@@ -0,0 +1,47 @@
1
+ import { type LiveFailureClass } from './semantic-tuple.js';
2
+ import type { AgentSafeBenchmarkCase, SemanticTuple } from './types.js';
3
+ export interface TelemetryStep {
4
+ tool: 'query' | 'context' | 'cypher';
5
+ input: Record<string, unknown>;
6
+ output: unknown;
7
+ durationMs: number;
8
+ totalTokensEst: number;
9
+ timestamp: string;
10
+ }
11
+ export interface SubagentFinalResult {
12
+ resource_anchor?: string;
13
+ symbol_anchor?: string;
14
+ proof_edge?: unknown;
15
+ proof_edges?: unknown;
16
+ closure_status?: SemanticTuple['closure_status'];
17
+ summary?: string;
18
+ }
19
+ export interface SubagentLiveResult {
20
+ prompt: string;
21
+ prompt_path: string;
22
+ result_path: string;
23
+ telemetry_path: string;
24
+ final_result: SubagentFinalResult;
25
+ steps: TelemetryStep[];
26
+ semantic_tuple: SemanticTuple;
27
+ normalized_tuple_pass: boolean;
28
+ evidence_validation_pass: boolean;
29
+ failure_class?: LiveFailureClass;
30
+ semantic_tuple_pass: boolean;
31
+ tool_calls_to_completion: number;
32
+ tokens_to_completion: number;
33
+ stop_reason: 'semantic_tuple_satisfied' | 'agent_result_incomplete';
34
+ }
35
+ export declare function buildSubagentPrompt(benchmarkCase: AgentSafeBenchmarkCase, options: {
36
+ repo: string;
37
+ runDir: string;
38
+ resultPath: string;
39
+ }): string;
40
+ export declare function prepareSubagentCaseRun(runDir: string, benchmarkCase: AgentSafeBenchmarkCase, options: {
41
+ repo: string;
42
+ }): Promise<{
43
+ promptPath: string;
44
+ resultPath: string;
45
+ prompt: string;
46
+ }>;
47
+ export declare function loadSubagentLiveCaseResult(runDir: string, benchmarkCase: AgentSafeBenchmarkCase): Promise<SubagentLiveResult>;
@@ -0,0 +1,128 @@
1
+ import fs from 'node:fs/promises';
2
+ import path from 'node:path';
3
+ import { scoreLiveTuple } from './semantic-tuple.js';
4
+ const ALLOWED_TOOLS = new Set(['query', 'context', 'cypher']);
5
+ export function buildSubagentPrompt(benchmarkCase, options) {
6
+ const wrapperCommand = [
7
+ 'node gitnexus/dist/benchmark/agent-safe-query-context/telemetry-tool.js',
8
+ `--run-dir "${options.runDir}"`,
9
+ '--tool <query|context|cypher>',
10
+ `--input '<JSON>'`,
11
+ ].join(' ');
12
+ return [
13
+ 'You are running a benchmarked GitNexus investigation.',
14
+ '',
15
+ `Case: ${benchmarkCase.label}`,
16
+ `Repo: ${options.repo}`,
17
+ `Goal category: ${benchmarkCase.live_task.objective}`,
18
+ '',
19
+ 'Starting seeds:',
20
+ `- Symbol/class seed: ${benchmarkCase.live_task.symbol_seed}`,
21
+ `- Resource seed: ${benchmarkCase.live_task.resource_seed}`,
22
+ '',
23
+ 'Use only this wrapper command for benchmarked evidence collection:',
24
+ wrapperCommand,
25
+ '',
26
+ 'Rules:',
27
+ '- Investigate normally from the seeds. Do not assume the answer.',
28
+ '- Stay within the goal category and avoid switching to unrelated relation categories.',
29
+ '- For benchmarked GitNexus evidence collection, use only query/context/cypher through the wrapper command above.',
30
+ '- Stop when you have enough evidence to return your best result.',
31
+ '',
32
+ `Write your final result as JSON to: ${options.resultPath}`,
33
+ 'Final JSON schema:',
34
+ '{',
35
+ ' "resource_anchor": "string",',
36
+ ' "symbol_anchor": "string",',
37
+ ' "proof_edge": "string (optional)",',
38
+ ' "proof_edges": ["string"] (optional),',
39
+ ' "closure_status": "not_verified_full|verified_partial|verified_full|failed",',
40
+ ' "summary": "short explanation"',
41
+ '}',
42
+ '',
43
+ 'Do not include any extra wrapper calls after you have enough evidence.',
44
+ ].join('\n');
45
+ }
46
+ export async function prepareSubagentCaseRun(runDir, benchmarkCase, options) {
47
+ await fs.mkdir(runDir, { recursive: true });
48
+ const promptPath = path.join(runDir, 'prompt.txt');
49
+ const resultPath = path.join(runDir, 'result.json');
50
+ const prompt = buildSubagentPrompt(benchmarkCase, {
51
+ repo: options.repo,
52
+ runDir,
53
+ resultPath,
54
+ });
55
+ assertPromptContract(prompt, benchmarkCase.semantic_tuple);
56
+ await fs.writeFile(promptPath, prompt, 'utf-8');
57
+ return { promptPath, resultPath, prompt };
58
+ }
59
+ export async function loadSubagentLiveCaseResult(runDir, benchmarkCase) {
60
+ const promptPath = path.join(runDir, 'prompt.txt');
61
+ const telemetryPath = path.join(runDir, 'telemetry.jsonl');
62
+ const resultPath = path.join(runDir, 'result.json');
63
+ const prompt = await fs.readFile(promptPath, 'utf-8');
64
+ assertPromptContract(prompt, benchmarkCase.semantic_tuple);
65
+ const telemetryText = await fs.readFile(telemetryPath, 'utf-8');
66
+ const steps = telemetryText
67
+ .split('\n')
68
+ .map((line) => line.trim())
69
+ .filter(Boolean)
70
+ .map((line) => validateTelemetryRow(JSON.parse(line)));
71
+ if (steps.length === 0) {
72
+ throw new Error(`missing telemetry rows: ${runDir}`);
73
+ }
74
+ const finalResult = JSON.parse(await fs.readFile(resultPath, 'utf-8'));
75
+ const scoring = scoreLiveTuple(benchmarkCase.semantic_tuple, finalResult, steps.map((step) => step.output), { toolCalls: steps.length });
76
+ const passed = scoring.normalized_tuple_pass && scoring.evidence_validation_pass;
77
+ return {
78
+ prompt,
79
+ prompt_path: promptPath,
80
+ result_path: resultPath,
81
+ telemetry_path: telemetryPath,
82
+ final_result: finalResult,
83
+ steps,
84
+ semantic_tuple: scoring.normalized_tuple,
85
+ normalized_tuple_pass: scoring.normalized_tuple_pass,
86
+ evidence_validation_pass: scoring.evidence_validation_pass,
87
+ failure_class: scoring.failure_class,
88
+ semantic_tuple_pass: passed,
89
+ tool_calls_to_completion: steps.length,
90
+ tokens_to_completion: steps.reduce((sum, step) => sum + step.totalTokensEst, 0),
91
+ stop_reason: passed ? 'semantic_tuple_satisfied' : 'agent_result_incomplete',
92
+ };
93
+ }
94
+ function assertPromptContract(prompt, tuple) {
95
+ if (!prompt.includes('telemetry-tool.js')) {
96
+ throw new Error('prompt missing wrapper command');
97
+ }
98
+ if (!prompt.includes('Final JSON schema:')) {
99
+ throw new Error('prompt missing final JSON schema');
100
+ }
101
+ if (prompt.includes('strongest supported relation') || prompt.includes('strongest validated runtime relation')) {
102
+ throw new Error('prompt uses open-ended strongest-relation objective wording');
103
+ }
104
+ if (tuple.proof_edge && prompt.includes(tuple.proof_edge)) {
105
+ throw new Error('prompt leaks canonical proof_edge');
106
+ }
107
+ if (tuple.proof_edges && tuple.proof_edges.every((edge) => prompt.includes(edge))) {
108
+ throw new Error('prompt leaks canonical proof_edges');
109
+ }
110
+ }
111
+ function validateTelemetryRow(row) {
112
+ if (!row || typeof row !== 'object') {
113
+ throw new Error('invalid telemetry row');
114
+ }
115
+ if (!ALLOWED_TOOLS.has(row.tool)) {
116
+ throw new Error(`telemetry row contains non-allowlisted tool: ${String(row.tool)}`);
117
+ }
118
+ if (!('input' in row) || !('output' in row)) {
119
+ throw new Error('telemetry row missing input/output');
120
+ }
121
+ if (typeof row.durationMs !== 'number' || typeof row.totalTokensEst !== 'number') {
122
+ throw new Error('telemetry row missing duration/token estimates');
123
+ }
124
+ if (typeof row.timestamp !== 'string' || row.timestamp.length === 0) {
125
+ throw new Error('telemetry row missing timestamp');
126
+ }
127
+ return row;
128
+ }
@@ -0,0 +1,155 @@
1
+ import test from 'node:test';
2
+ import assert from 'node:assert/strict';
3
+ import fs from 'node:fs/promises';
4
+ import os from 'node:os';
5
+ import path from 'node:path';
6
+ import { buildSubagentPrompt, loadSubagentLiveCaseResult, prepareSubagentCaseRun } from './subagent-live.js';
7
+ const fakeCase = {
8
+ label: 'weapon_powerup',
9
+ start_query: 'weapon powerup equip chain',
10
+ retry_query: 'retry',
11
+ proof_contexts: ['HoldPickup', 'EquipWithEvent'],
12
+ proof_cypher: "MATCH (src)-[:CodeRelation {type: 'CALLS'}]->(dst) WHERE (src.name = 'HoldPickup' AND dst.name = 'PickItUp') OR (src.name = 'EquipWithEvent' AND dst.name = 'Equip') RETURN src.name, dst.name",
13
+ tool_plan: [{ tool: 'query', input: { query: 'WeaponPowerUp' } }],
14
+ live_task: {
15
+ objective: 'pickup/equip bridge proof',
16
+ symbol_seed: 'WeaponPowerUp',
17
+ resource_seed: 'Assets/NEON/DataAssets/Powerups/1_newWeapon/0_pick/法器_Orb/1_weapon_orb_key.asset',
18
+ },
19
+ semantic_tuple: {
20
+ resource_anchor: 'Assets/NEON/DataAssets/Powerups/1_newWeapon/0_pick/法器_Orb/1_weapon_orb_key.asset',
21
+ symbol_anchor: 'WeaponPowerUp',
22
+ proof_edges: [
23
+ 'HoldPickup -> WeaponPowerUp.PickItUp',
24
+ 'EquipWithEvent -> WeaponPowerUp.Equip',
25
+ ],
26
+ closure_status: 'not_verified_full',
27
+ },
28
+ };
29
+ test('buildSubagentPrompt includes wrapper command and final JSON schema without leaking canonical proof edges', () => {
30
+ const prompt = buildSubagentPrompt(fakeCase, {
31
+ repo: 'neonspark-core',
32
+ runDir: '/tmp/run',
33
+ resultPath: '/tmp/run/result.json',
34
+ });
35
+ assert.equal(prompt.includes('telemetry-tool.js'), true);
36
+ assert.equal(prompt.includes('Final JSON schema:'), true);
37
+ assert.equal(prompt.includes('strongest supported relation'), false);
38
+ assert.equal(prompt.includes('pickup/equip bridge proof'), true);
39
+ assert.equal(prompt.includes('HoldPickup -> WeaponPowerUp.PickItUp'), false);
40
+ assert.equal(prompt.includes('EquipWithEvent -> WeaponPowerUp.Equip'), false);
41
+ });
42
+ test('prepareSubagentCaseRun writes prompt artifact', async () => {
43
+ const runDir = await fs.mkdtemp(path.join(os.tmpdir(), 'agent-safe-run-'));
44
+ const prepared = await prepareSubagentCaseRun(runDir, fakeCase, { repo: 'neonspark-core' });
45
+ const prompt = await fs.readFile(prepared.promptPath, 'utf-8');
46
+ assert.equal(prompt.includes('WeaponPowerUp'), true);
47
+ assert.equal(prompt.includes('telemetry-tool.js'), true);
48
+ });
49
+ test('loadSubagentLiveCaseResult validates telemetry rows and derives semantic tuple from tool evidence', async () => {
50
+ const runDir = await fs.mkdtemp(path.join(os.tmpdir(), 'agent-safe-run-'));
51
+ const promptPath = path.join(runDir, 'prompt.txt');
52
+ const resultPath = path.join(runDir, 'result.json');
53
+ const telemetryPath = path.join(runDir, 'telemetry.jsonl');
54
+ await fs.writeFile(promptPath, buildSubagentPrompt(fakeCase, {
55
+ repo: 'neonspark-core',
56
+ runDir,
57
+ resultPath,
58
+ }), 'utf-8');
59
+ await fs.writeFile(resultPath, JSON.stringify({
60
+ resource_anchor: fakeCase.semantic_tuple.resource_anchor,
61
+ symbol_anchor: fakeCase.semantic_tuple.symbol_anchor,
62
+ proof_edges: fakeCase.semantic_tuple.proof_edges,
63
+ closure_status: 'not_verified_full',
64
+ summary: 'Found supporting pickup/equip evidence.',
65
+ }, null, 2));
66
+ await fs.writeFile(telemetryPath, [
67
+ JSON.stringify({
68
+ tool: 'query',
69
+ input: { query: 'WeaponPowerUp', repo: 'neonspark-core' },
70
+ output: {
71
+ candidates: [{ name: 'WeaponPowerUp' }],
72
+ resource_hints: [{ target: fakeCase.semantic_tuple.resource_anchor }],
73
+ },
74
+ durationMs: 12,
75
+ totalTokensEst: 120,
76
+ timestamp: '2026-04-08T00:00:00.000Z',
77
+ }),
78
+ JSON.stringify({
79
+ tool: 'cypher',
80
+ input: { query: fakeCase.proof_cypher, repo: 'neonspark-core' },
81
+ output: {
82
+ markdown: '| src.name | dst.name |\n| --- | --- |\n| HoldPickup | PickItUp |\n| EquipWithEvent | Equip |',
83
+ row_count: 2,
84
+ },
85
+ durationMs: 8,
86
+ totalTokensEst: 80,
87
+ timestamp: '2026-04-08T00:00:01.000Z',
88
+ }),
89
+ ].join('\n'), 'utf-8');
90
+ const result = await loadSubagentLiveCaseResult(runDir, fakeCase);
91
+ assert.equal(result.normalized_tuple_pass, true);
92
+ assert.equal(result.evidence_validation_pass, true);
93
+ assert.equal(result.failure_class, undefined);
94
+ assert.equal(result.semantic_tuple_pass, true);
95
+ assert.equal(result.tool_calls_to_completion, 2);
96
+ assert.equal(result.tokens_to_completion, 200);
97
+ });
98
+ test('loadSubagentLiveCaseResult keeps case non-passing when evidence validation fails', async () => {
99
+ const runDir = await fs.mkdtemp(path.join(os.tmpdir(), 'agent-safe-run-'));
100
+ const promptPath = path.join(runDir, 'prompt.txt');
101
+ const resultPath = path.join(runDir, 'result.json');
102
+ const telemetryPath = path.join(runDir, 'telemetry.jsonl');
103
+ await fs.writeFile(promptPath, buildSubagentPrompt(fakeCase, {
104
+ repo: 'neonspark-core',
105
+ runDir,
106
+ resultPath,
107
+ }), 'utf-8');
108
+ await fs.writeFile(resultPath, JSON.stringify({
109
+ resource_anchor: fakeCase.semantic_tuple.resource_anchor,
110
+ symbol_anchor: 'Game.Runtime.WeaponPowerUp',
111
+ proof_edges: [
112
+ { caller: 'HoldPickup', callee: 'WeaponPowerUp.PickItUp' },
113
+ { caller: 'EquipWithEvent', callee: 'WeaponPowerUp.Equip' },
114
+ ],
115
+ closure_status: 'not_verified_full',
116
+ summary: 'Normalized tuple inferred from final response.',
117
+ }, null, 2));
118
+ await fs.writeFile(telemetryPath, JSON.stringify({
119
+ tool: 'query',
120
+ input: { query: 'WeaponPowerUp', repo: 'neonspark-core' },
121
+ output: {
122
+ candidates: [{ name: 'WeaponPowerUp' }],
123
+ resource_hints: [{ target: fakeCase.semantic_tuple.resource_anchor }],
124
+ },
125
+ durationMs: 12,
126
+ totalTokensEst: 120,
127
+ timestamp: '2026-04-08T00:00:00.000Z',
128
+ }), 'utf-8');
129
+ const result = await loadSubagentLiveCaseResult(runDir, fakeCase);
130
+ assert.equal(result.normalized_tuple_pass, true);
131
+ assert.equal(result.evidence_validation_pass, false);
132
+ assert.equal(result.semantic_tuple_pass, false);
133
+ assert.equal(result.failure_class, 'evidence_missing');
134
+ });
135
+ test('loadSubagentLiveCaseResult rejects non-allowlisted tools', async () => {
136
+ const runDir = await fs.mkdtemp(path.join(os.tmpdir(), 'agent-safe-run-'));
137
+ const promptPath = path.join(runDir, 'prompt.txt');
138
+ const resultPath = path.join(runDir, 'result.json');
139
+ const telemetryPath = path.join(runDir, 'telemetry.jsonl');
140
+ await fs.writeFile(promptPath, buildSubagentPrompt(fakeCase, {
141
+ repo: 'neonspark-core',
142
+ runDir,
143
+ resultPath,
144
+ }), 'utf-8');
145
+ await fs.writeFile(resultPath, JSON.stringify({ summary: 'noop' }), 'utf-8');
146
+ await fs.writeFile(telemetryPath, JSON.stringify({
147
+ tool: 'impact',
148
+ input: {},
149
+ output: {},
150
+ durationMs: 1,
151
+ totalTokensEst: 1,
152
+ timestamp: '2026-04-08T00:00:00.000Z',
153
+ }), 'utf-8');
154
+ await assert.rejects(() => loadSubagentLiveCaseResult(runDir, fakeCase), /non-allowlisted tool/);
155
+ });
@@ -0,0 +1,9 @@
1
+ type TelemetryToolName = 'query' | 'context' | 'cypher';
2
+ interface TelemetryToolOptions {
3
+ runDir: string;
4
+ tool: TelemetryToolName;
5
+ input: Record<string, unknown>;
6
+ }
7
+ export declare function invokeTelemetryTool(options: TelemetryToolOptions): Promise<unknown>;
8
+ export declare function telemetryToolMain(argv: string[]): Promise<void>;
9
+ export {};
@@ -0,0 +1,77 @@
1
+ import fs from 'node:fs/promises';
2
+ import { writeSync } from 'node:fs';
3
+ import path from 'node:path';
4
+ import { closeLbug } from '../../mcp/core/lbug-adapter.js';
5
+ import { LocalBackend } from '../../mcp/local/local-backend.js';
6
+ import { estimateTokens } from '../u2-e2e/metrics.js';
7
+ export async function invokeTelemetryTool(options) {
8
+ const backend = new LocalBackend();
9
+ const ok = await backend.init();
10
+ if (!ok) {
11
+ throw new Error('No indexed repositories found. Run analyze first.');
12
+ }
13
+ const started = performance.now();
14
+ try {
15
+ const output = await backend.callTool(options.tool, options.input);
16
+ const durationMs = Number((performance.now() - started).toFixed(1));
17
+ const row = {
18
+ tool: options.tool,
19
+ input: options.input,
20
+ output,
21
+ durationMs,
22
+ totalTokensEst: estimateTokens(JSON.stringify(options.input)) + estimateTokens(JSON.stringify(output)),
23
+ timestamp: new Date().toISOString(),
24
+ };
25
+ await fs.mkdir(options.runDir, { recursive: true });
26
+ await fs.appendFile(path.join(options.runDir, 'telemetry.jsonl'), `${JSON.stringify(row)}\n`, 'utf-8');
27
+ return output;
28
+ }
29
+ finally {
30
+ await closeLbug();
31
+ }
32
+ }
33
+ export async function telemetryToolMain(argv) {
34
+ const parsed = parseArgs(argv);
35
+ const output = await invokeTelemetryTool(parsed);
36
+ writeSync(1, `${JSON.stringify(output, null, 2)}\n`);
37
+ }
38
+ function parseArgs(argv) {
39
+ let runDir = '';
40
+ let tool = '';
41
+ let inputText = '';
42
+ for (let index = 0; index < argv.length; index += 1) {
43
+ const token = argv[index];
44
+ if (token === '--run-dir') {
45
+ runDir = argv[index + 1] || '';
46
+ index += 1;
47
+ continue;
48
+ }
49
+ if (token === '--tool') {
50
+ tool = (argv[index + 1] || '');
51
+ index += 1;
52
+ continue;
53
+ }
54
+ if (token === '--input') {
55
+ inputText = argv[index + 1] || '';
56
+ index += 1;
57
+ continue;
58
+ }
59
+ }
60
+ if (!runDir || !tool || !inputText) {
61
+ throw new Error('Usage: telemetry-tool --run-dir <dir> --tool <query|context|cypher> --input <json>');
62
+ }
63
+ if (!['query', 'context', 'cypher'].includes(tool)) {
64
+ throw new Error(`Unsupported tool: ${tool}`);
65
+ }
66
+ return {
67
+ runDir,
68
+ tool,
69
+ input: JSON.parse(inputText),
70
+ };
71
+ }
72
+ if (process.argv[1] && import.meta.url === `file://${process.argv[1]}`) {
73
+ telemetryToolMain(process.argv.slice(2)).catch((error) => {
74
+ process.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`);
75
+ process.exitCode = 1;
76
+ });
77
+ }
@@ -0,0 +1,61 @@
1
+ import type { AgentContextToolStep } from '../agent-context/types.js';
2
+ export declare const AGENT_SAFE_CASE_KEYS: readonly ["weapon_powerup", "reload"];
3
+ export type AgentSafeCaseKey = (typeof AGENT_SAFE_CASE_KEYS)[number];
4
+ export declare const AGENT_SAFE_TRACK_KEYS: readonly ["workflow_replay_full", "workflow_replay_slim", "same_script_full", "same_script_slim", "subagent_live"];
5
+ export type AgentSafeTrackKey = (typeof AGENT_SAFE_TRACK_KEYS)[number];
6
+ export interface SemanticTuple {
7
+ resource_anchor: string;
8
+ symbol_anchor: string;
9
+ proof_edge?: string;
10
+ proof_edges?: string[];
11
+ closure_status: 'not_verified_full' | 'verified_partial' | 'verified_full' | 'failed';
12
+ }
13
+ export interface SemanticDriftMetrics {
14
+ anchor_top1_pass: boolean;
15
+ recommended_follow_up_hit: boolean;
16
+ post_narrowing_anchor_pass: boolean;
17
+ post_narrowing_follow_up_hit: boolean;
18
+ ambiguity_detour_count: number;
19
+ placeholder_leak_detected: boolean;
20
+ heuristic_top_summary_detected: boolean;
21
+ live_tool_evidence_pass: boolean;
22
+ freeze_ready: boolean;
23
+ guid_invariance_pass: boolean;
24
+ tier_envelope: {
25
+ facts_present: boolean;
26
+ closure_present: boolean;
27
+ clues_present: boolean;
28
+ semantic_order_pass: boolean;
29
+ summary_source: string;
30
+ };
31
+ }
32
+ export interface AgentSafeLiveTask {
33
+ objective: string;
34
+ symbol_seed: string;
35
+ resource_seed: string;
36
+ }
37
+ export interface AgentSafeBenchmarkCase {
38
+ label: string;
39
+ start_query: string;
40
+ retry_query: string;
41
+ start_query_input?: Record<string, unknown>;
42
+ retry_query_input?: Record<string, unknown>;
43
+ proof_contexts: string[];
44
+ proof_cypher: string;
45
+ tool_plan: AgentContextToolStep[];
46
+ live_task: AgentSafeLiveTask;
47
+ semantic_tuple: SemanticTuple;
48
+ }
49
+ export interface AgentSafeBenchmarkThresholds {
50
+ workflowReplay: {
51
+ maxSteps: number;
52
+ };
53
+ tokenReduction: {
54
+ weapon_powerup: number;
55
+ reload: number;
56
+ };
57
+ }
58
+ export interface AgentSafeBenchmarkSuite {
59
+ thresholds: AgentSafeBenchmarkThresholds;
60
+ cases: Record<AgentSafeCaseKey, AgentSafeBenchmarkCase>;
61
+ }
@@ -0,0 +1,8 @@
1
+ export const AGENT_SAFE_CASE_KEYS = ['weapon_powerup', 'reload'];
2
+ export const AGENT_SAFE_TRACK_KEYS = [
3
+ 'workflow_replay_full',
4
+ 'workflow_replay_slim',
5
+ 'same_script_full',
6
+ 'same_script_slim',
7
+ 'subagent_live',
8
+ ];
@@ -0,0 +1,47 @@
1
+ export interface RuntimeProvenanceInputRecord {
2
+ scenario_id: string;
3
+ query_text: string;
4
+ symbol_name?: string;
5
+ resource_seed_path?: string;
6
+ mapped_seed_targets?: string[];
7
+ runtime_claim: {
8
+ status: string;
9
+ evidence_level: string;
10
+ reason?: string;
11
+ hops_count?: number;
12
+ gaps_count?: number;
13
+ };
14
+ }
15
+ export interface RuntimeProvenanceArtifact {
16
+ generated_at: string;
17
+ repo: string;
18
+ mode: 'offline_provenance_only';
19
+ records: RuntimeProvenanceInputRecord[];
20
+ }
21
+ export interface RuntimeProvenanceIndexEntry {
22
+ generated_at: string;
23
+ repo: string;
24
+ artifact_path: string;
25
+ sha256: string;
26
+ record_count: number;
27
+ generator: 'runtime-poc-provenance-v1';
28
+ }
29
+ export interface RuntimeProvenanceIndex {
30
+ version: '1.0.0';
31
+ entries: RuntimeProvenanceIndexEntry[];
32
+ }
33
+ export declare function buildRuntimeProvenanceArtifact(input: {
34
+ repo: string;
35
+ records: RuntimeProvenanceInputRecord[];
36
+ generatedAt?: string;
37
+ }): RuntimeProvenanceArtifact;
38
+ export declare function writeRuntimeProvenanceArtifact(input: {
39
+ reportDir: string;
40
+ repo: string;
41
+ records: RuntimeProvenanceInputRecord[];
42
+ }): Promise<{
43
+ artifactPath: string;
44
+ indexPath: string;
45
+ sha256: string;
46
+ artifact: RuntimeProvenanceArtifact;
47
+ }>;
@@ -0,0 +1,89 @@
1
+ import crypto from 'node:crypto';
2
+ import fs from 'node:fs/promises';
3
+ import path from 'node:path';
4
+ function normalizeRecord(input) {
5
+ return {
6
+ scenario_id: String(input.scenario_id || '').trim(),
7
+ query_text: String(input.query_text || '').trim(),
8
+ ...(String(input.symbol_name || '').trim() ? { symbol_name: String(input.symbol_name).trim() } : {}),
9
+ ...(String(input.resource_seed_path || '').trim()
10
+ ? { resource_seed_path: String(input.resource_seed_path).trim() }
11
+ : {}),
12
+ mapped_seed_targets: Array.isArray(input.mapped_seed_targets)
13
+ ? input.mapped_seed_targets.map((value) => String(value || '').trim()).filter(Boolean)
14
+ : [],
15
+ runtime_claim: {
16
+ status: String(input.runtime_claim?.status || 'failed').trim(),
17
+ evidence_level: String(input.runtime_claim?.evidence_level || 'none').trim(),
18
+ ...(String(input.runtime_claim?.reason || '').trim()
19
+ ? { reason: String(input.runtime_claim?.reason).trim() }
20
+ : {}),
21
+ ...(Number.isFinite(Number(input.runtime_claim?.hops_count))
22
+ ? { hops_count: Number(input.runtime_claim?.hops_count) }
23
+ : {}),
24
+ ...(Number.isFinite(Number(input.runtime_claim?.gaps_count))
25
+ ? { gaps_count: Number(input.runtime_claim?.gaps_count) }
26
+ : {}),
27
+ },
28
+ };
29
+ }
30
+ function toIsoStamp(date = new Date()) {
31
+ return date.toISOString().replace(/\.\d{3}Z$/, 'Z');
32
+ }
33
+ export function buildRuntimeProvenanceArtifact(input) {
34
+ return {
35
+ generated_at: String(input.generatedAt || toIsoStamp()),
36
+ repo: String(input.repo || '').trim(),
37
+ mode: 'offline_provenance_only',
38
+ records: (input.records || []).map((record) => normalizeRecord(record)),
39
+ };
40
+ }
41
+ function buildSha256(raw) {
42
+ return crypto.createHash('sha256').update(raw).digest('hex');
43
+ }
44
+ export async function writeRuntimeProvenanceArtifact(input) {
45
+ const reportDir = path.resolve(input.reportDir);
46
+ await fs.mkdir(reportDir, { recursive: true });
47
+ const artifact = buildRuntimeProvenanceArtifact({
48
+ repo: input.repo,
49
+ records: input.records,
50
+ });
51
+ const stampForFile = artifact.generated_at.replace(/[:]/g, '-');
52
+ const artifactPath = path.join(reportDir, `provenance-${stampForFile}.json`);
53
+ const artifactRaw = JSON.stringify(artifact, null, 2);
54
+ await fs.writeFile(artifactPath, `${artifactRaw}\n`, 'utf-8');
55
+ const sha256 = buildSha256(artifactRaw);
56
+ const indexPath = path.join(reportDir, 'provenance-index.json');
57
+ let index = {
58
+ version: '1.0.0',
59
+ entries: [],
60
+ };
61
+ try {
62
+ const existing = JSON.parse(await fs.readFile(indexPath, 'utf-8'));
63
+ if (existing && Array.isArray(existing.entries)) {
64
+ index = {
65
+ version: '1.0.0',
66
+ entries: existing.entries,
67
+ };
68
+ }
69
+ }
70
+ catch {
71
+ // Keep default empty index.
72
+ }
73
+ const entry = {
74
+ generated_at: artifact.generated_at,
75
+ repo: artifact.repo,
76
+ artifact_path: artifactPath,
77
+ sha256,
78
+ record_count: artifact.records.length,
79
+ generator: 'runtime-poc-provenance-v1',
80
+ };
81
+ index.entries = [entry, ...index.entries.filter((row) => String(row.artifact_path || '') !== artifactPath)];
82
+ await fs.writeFile(indexPath, `${JSON.stringify(index, null, 2)}\n`, 'utf-8');
83
+ return {
84
+ artifactPath,
85
+ indexPath,
86
+ sha256,
87
+ artifact,
88
+ };
89
+ }
@@ -0,0 +1,31 @@
1
+ type RuntimeStatus = 'verified_full' | 'verified_partial' | 'failed';
2
+ type RuntimeEvidenceLevel = 'verified_chain' | 'verified_segment' | 'clue' | 'none';
3
+ export interface RuntimePocCase {
4
+ case_id: string;
5
+ query_text: string;
6
+ symbol_name?: string;
7
+ resource_seed_path?: string;
8
+ mapped_seed_targets?: string[];
9
+ baseline: {
10
+ status: RuntimeStatus;
11
+ evidence_level: RuntimeEvidenceLevel;
12
+ reason?: string;
13
+ };
14
+ graph_only: {
15
+ status: RuntimeStatus;
16
+ evidence_level: RuntimeEvidenceLevel;
17
+ reason?: string;
18
+ };
19
+ }
20
+ export interface RuntimePocBenchmarkResult {
21
+ comparisonPath: string;
22
+ summaryPath: string;
23
+ provenanceArtifactPath: string;
24
+ provenanceIndexPath: string;
25
+ }
26
+ export declare function runRuntimePocBenchmark(input: {
27
+ repo: string;
28
+ reportDir: string;
29
+ casesPath?: string;
30
+ }): Promise<RuntimePocBenchmarkResult>;
31
+ export {};