@veewo/gitnexus 1.5.0 → 1.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/benchmark/agent-context/runner.js +3 -0
- package/dist/benchmark/agent-context/runner.test.js +22 -0
- package/dist/benchmark/agent-context/tool-runner.d.ts +7 -6
- package/dist/benchmark/agent-safe-query-context/io.d.ts +2 -0
- package/dist/benchmark/agent-safe-query-context/io.js +86 -0
- package/dist/benchmark/agent-safe-query-context/io.test.d.ts +1 -0
- package/dist/benchmark/agent-safe-query-context/io.test.js +13 -0
- package/dist/benchmark/agent-safe-query-context/report.d.ts +57 -0
- package/dist/benchmark/agent-safe-query-context/report.js +159 -0
- package/dist/benchmark/agent-safe-query-context/report.test.d.ts +1 -0
- package/dist/benchmark/agent-safe-query-context/report.test.js +362 -0
- package/dist/benchmark/agent-safe-query-context/runner.d.ts +44 -0
- package/dist/benchmark/agent-safe-query-context/runner.js +406 -0
- package/dist/benchmark/agent-safe-query-context/runner.test.d.ts +1 -0
- package/dist/benchmark/agent-safe-query-context/runner.test.js +290 -0
- package/dist/benchmark/agent-safe-query-context/semantic-tuple.d.ts +20 -0
- package/dist/benchmark/agent-safe-query-context/semantic-tuple.js +225 -0
- package/dist/benchmark/agent-safe-query-context/semantic-tuple.test.d.ts +1 -0
- package/dist/benchmark/agent-safe-query-context/semantic-tuple.test.js +122 -0
- package/dist/benchmark/agent-safe-query-context/subagent-live.d.ts +47 -0
- package/dist/benchmark/agent-safe-query-context/subagent-live.js +128 -0
- package/dist/benchmark/agent-safe-query-context/subagent-live.test.d.ts +1 -0
- package/dist/benchmark/agent-safe-query-context/subagent-live.test.js +155 -0
- package/dist/benchmark/agent-safe-query-context/telemetry-tool.d.ts +9 -0
- package/dist/benchmark/agent-safe-query-context/telemetry-tool.js +77 -0
- package/dist/benchmark/agent-safe-query-context/types.d.ts +61 -0
- package/dist/benchmark/agent-safe-query-context/types.js +8 -0
- package/dist/benchmark/runtime-poc/provenance-artifact.d.ts +47 -0
- package/dist/benchmark/runtime-poc/provenance-artifact.js +89 -0
- package/dist/benchmark/runtime-poc/runner.d.ts +31 -0
- package/dist/benchmark/runtime-poc/runner.js +163 -0
- package/dist/benchmark/u2-e2e/hydration-policy-repeatability-runner.d.ts +8 -0
- package/dist/benchmark/u2-e2e/hydration-policy-repeatability-runner.js +21 -0
- package/dist/benchmark/u2-e2e/phase2-runtime-claim-acceptance-runner.d.ts +0 -1
- package/dist/benchmark/u2-e2e/phase2-runtime-claim-acceptance-runner.js +53 -51
- package/dist/benchmark/u2-e2e/phase2-runtime-claim-acceptance-runner.test.js +0 -1
- package/dist/benchmark/u2-e2e/phase5-rule-lab-acceptance-runner.d.ts +1 -1
- package/dist/benchmark/u2-e2e/phase5-rule-lab-acceptance-runner.js +82 -18
- package/dist/benchmark/u2-e2e/phase5-rule-lab-acceptance-runner.test.js +1 -2
- package/dist/benchmark/u2-e2e/retrieval-runner.js +15 -7
- package/dist/benchmark/u2-e2e/retrieval-runner.test.js +46 -0
- package/dist/cli/ai-context.js +2 -12
- package/dist/cli/ai-context.test.js +8 -0
- package/dist/cli/analyze-runtime-summary.js +1 -0
- package/dist/cli/analyze-runtime-summary.test.js +2 -0
- package/dist/cli/analyze-summary.d.ts +2 -0
- package/dist/cli/analyze-summary.js +24 -0
- package/dist/cli/analyze-summary.test.js +65 -1
- package/dist/cli/analyze.js +5 -1
- package/dist/cli/benchmark-agent-safe-query-context.d.ts +20 -0
- package/dist/cli/benchmark-agent-safe-query-context.js +39 -0
- package/dist/cli/benchmark-agent-safe-query-context.test.d.ts +1 -0
- package/dist/cli/benchmark-agent-safe-query-context.test.js +271 -0
- package/dist/cli/benchmark.d.ts +29 -0
- package/dist/cli/benchmark.js +55 -0
- package/dist/cli/index.js +23 -0
- package/dist/cli/rule-lab.d.ts +3 -7
- package/dist/cli/rule-lab.js +13 -22
- package/dist/cli/rule-lab.test.js +23 -3
- package/dist/cli/tool.d.ts +2 -0
- package/dist/cli/tool.js +2 -0
- package/dist/core/config/unity-config.d.ts +0 -1
- package/dist/core/config/unity-config.js +0 -1
- package/dist/core/ingestion/pipeline.js +35 -6
- package/dist/core/ingestion/unity-lifecycle-synthetic-calls.test.js +18 -20
- package/dist/core/ingestion/unity-parity-seed.d.ts +2 -1
- package/dist/core/ingestion/unity-parity-seed.js +8 -0
- package/dist/core/ingestion/unity-resource-processor.d.ts +11 -0
- package/dist/core/ingestion/unity-resource-processor.js +102 -0
- package/dist/core/ingestion/unity-resource-processor.test.js +449 -0
- package/dist/core/ingestion/unity-runtime-binding-rules.d.ts +15 -0
- package/dist/core/ingestion/unity-runtime-binding-rules.js +178 -30
- package/dist/core/lbug/csv-generator.test.js +2 -2
- package/dist/core/unity/doc-contract.test.d.ts +1 -0
- package/dist/core/unity/doc-contract.test.js +30 -0
- package/dist/core/unity/prefab-source-scan.d.ts +25 -0
- package/dist/core/unity/prefab-source-scan.js +152 -0
- package/dist/core/unity/prefab-source-scan.test.d.ts +1 -0
- package/dist/core/unity/prefab-source-scan.test.js +70 -0
- package/dist/core/unity/scan-context.d.ts +12 -0
- package/dist/core/unity/scan-context.js +50 -2
- package/dist/core/unity/scan-context.test.js +74 -0
- package/dist/mcp/local/agent-safe-response.d.ts +10 -0
- package/dist/mcp/local/agent-safe-response.js +639 -0
- package/dist/mcp/local/derived-process-reader.js +1 -1
- package/dist/mcp/local/local-backend.d.ts +18 -1
- package/dist/mcp/local/local-backend.js +319 -125
- package/dist/mcp/local/process-confidence.d.ts +1 -2
- package/dist/mcp/local/process-confidence.js +0 -3
- package/dist/mcp/local/process-confidence.test.js +4 -2
- package/dist/mcp/local/process-evidence.d.ts +1 -8
- package/dist/mcp/local/process-evidence.js +1 -23
- package/dist/mcp/local/process-evidence.test.js +2 -16
- package/dist/mcp/local/process-ref.d.ts +1 -1
- package/dist/mcp/local/runtime-chain-closure-evaluator.d.ts +33 -0
- package/dist/mcp/local/runtime-chain-closure-evaluator.js +273 -0
- package/dist/mcp/local/runtime-chain-graph-candidates.d.ts +23 -0
- package/dist/mcp/local/runtime-chain-graph-candidates.js +131 -0
- package/dist/mcp/local/runtime-chain-verify.d.ts +1 -1
- package/dist/mcp/local/runtime-chain-verify.js +149 -138
- package/dist/mcp/local/runtime-chain-verify.test.js +126 -68
- package/dist/mcp/local/runtime-claim-rule-registry.d.ts +4 -0
- package/dist/mcp/local/runtime-claim-rule-registry.js +4 -0
- package/dist/mcp/local/runtime-claim-rule-registry.test.js +37 -4
- package/dist/mcp/local/runtime-claim.d.ts +11 -0
- package/dist/mcp/local/runtime-claim.js +28 -0
- package/dist/mcp/local/unity-evidence-view.d.ts +1 -1
- package/dist/mcp/local/unity-evidence-view.js +1 -1
- package/dist/mcp/local/unity-evidence-view.test.js +22 -0
- package/dist/mcp/tools.js +51 -21
- package/dist/rule-lab/analyze.d.ts +2 -1
- package/dist/rule-lab/analyze.js +94 -59
- package/dist/rule-lab/analyze.test.js +238 -20
- package/dist/rule-lab/curate.d.ts +2 -1
- package/dist/rule-lab/curate.js +24 -3
- package/dist/rule-lab/curate.test.js +65 -0
- package/dist/rule-lab/curation-input-builder.d.ts +45 -0
- package/dist/rule-lab/curation-input-builder.js +133 -0
- package/dist/rule-lab/promote.js +80 -7
- package/dist/rule-lab/promote.test.js +150 -0
- package/dist/rule-lab/review-pack.d.ts +3 -0
- package/dist/rule-lab/review-pack.js +41 -1
- package/dist/rule-lab/review-pack.test.js +67 -0
- package/dist/rule-lab/types.d.ts +29 -0
- package/dist/types/pipeline.d.ts +3 -0
- package/package.json +4 -3
- package/scripts/run-node-tests.mjs +61 -0
- package/skills/_shared/unity-rule-authoring-contract.md +64 -0
- package/skills/_shared/unity-runtime-process-contract.md +16 -0
- package/skills/gitnexus-cli.md +8 -0
- package/skills/gitnexus-debugging.md +9 -0
- package/skills/gitnexus-exploring.md +66 -18
- package/skills/gitnexus-guide.md +42 -3
- package/skills/gitnexus-impact-analysis.md +8 -0
- package/skills/gitnexus-pr-review.md +8 -0
- package/skills/gitnexus-refactoring.md +8 -0
- package/skills/gitnexus-unity-rule-gen.md +66 -312
|
@@ -0,0 +1,406 @@
|
|
|
1
|
+
import path from 'node:path';
|
|
2
|
+
import { estimateTokens } from '../u2-e2e/metrics.js';
|
|
3
|
+
import { createAgentContextToolRunner } from '../agent-context/tool-runner.js';
|
|
4
|
+
import { deriveSemanticTuple, semanticTuplePass } from './semantic-tuple.js';
|
|
5
|
+
const PLACEHOLDER_FOLLOW_UP = 'Reload NEON.Game.Graph.Nodes.Reloads';
|
|
6
|
+
export async function runWorkflowReplay(benchmarkCase, runner, options = {}) {
|
|
7
|
+
const maxSteps = options.maxSteps ?? 5;
|
|
8
|
+
const steps = [];
|
|
9
|
+
await pushStep(steps, 'query', withReplayInput(benchmarkCase.start_query_input || { query: benchmarkCase.start_query }, options.repo, options.responseProfile, 'query'), runner.query);
|
|
10
|
+
let semanticTuple = deriveSemanticTuple(benchmarkCase.semantic_tuple, steps.map((step) => step.output));
|
|
11
|
+
let passed = semanticTuplePass(semanticTuple, benchmarkCase.semantic_tuple);
|
|
12
|
+
if (!passed && steps.length < maxSteps && shouldRetryQuery(semanticTuple)) {
|
|
13
|
+
await pushStep(steps, 'query', withReplayInput(benchmarkCase.retry_query_input || { query: benchmarkCase.retry_query }, options.repo, options.responseProfile, 'query'), runner.query);
|
|
14
|
+
semanticTuple = deriveSemanticTuple(benchmarkCase.semantic_tuple, steps.map((step) => step.output));
|
|
15
|
+
passed = semanticTuplePass(semanticTuple, benchmarkCase.semantic_tuple);
|
|
16
|
+
}
|
|
17
|
+
for (const contextName of benchmarkCase.proof_contexts) {
|
|
18
|
+
if (passed || steps.length >= maxSteps) {
|
|
19
|
+
break;
|
|
20
|
+
}
|
|
21
|
+
await pushStep(steps, 'context', withReplayInput({ name: contextName }, options.repo, options.responseProfile, 'context'), runner.context);
|
|
22
|
+
semanticTuple = deriveSemanticTuple(benchmarkCase.semantic_tuple, steps.map((step) => step.output));
|
|
23
|
+
passed = semanticTuplePass(semanticTuple, benchmarkCase.semantic_tuple);
|
|
24
|
+
}
|
|
25
|
+
if (!passed && steps.length < maxSteps) {
|
|
26
|
+
await pushStep(steps, 'cypher', withReplayInput({ query: benchmarkCase.proof_cypher }, options.repo, options.responseProfile, 'cypher'), runner.cypher);
|
|
27
|
+
semanticTuple = deriveSemanticTuple(benchmarkCase.semantic_tuple, steps.map((step) => step.output));
|
|
28
|
+
passed = semanticTuplePass(semanticTuple, benchmarkCase.semantic_tuple);
|
|
29
|
+
}
|
|
30
|
+
const guidInvariance = computeGuidInvariance(benchmarkCase, steps);
|
|
31
|
+
const driftMetrics = computeSemanticDriftMetrics(benchmarkCase, steps);
|
|
32
|
+
const confirmedChainSteps = deriveConfirmedChainSteps({
|
|
33
|
+
steps,
|
|
34
|
+
semanticTuple,
|
|
35
|
+
placeholderLeakDetected: driftMetrics.placeholder_leak_detected,
|
|
36
|
+
});
|
|
37
|
+
const liveToolEvidencePass = countLiveToolEvidence(steps) > 0;
|
|
38
|
+
const freezeReady = confirmedChainSteps.length > 0
|
|
39
|
+
&& !driftMetrics.placeholder_leak_detected
|
|
40
|
+
&& liveToolEvidencePass
|
|
41
|
+
&& guidInvariance.guid_invariance_pass;
|
|
42
|
+
return {
|
|
43
|
+
steps,
|
|
44
|
+
...driftMetrics,
|
|
45
|
+
base: guidInvariance.base,
|
|
46
|
+
guid_variant: guidInvariance.guid_variant,
|
|
47
|
+
guid_invariance_pass: guidInvariance.guid_invariance_pass,
|
|
48
|
+
confirmed_chain: { steps: confirmedChainSteps },
|
|
49
|
+
live_tool_evidence_pass: liveToolEvidencePass,
|
|
50
|
+
freeze_ready: freezeReady,
|
|
51
|
+
semantic_tuple: semanticTuple,
|
|
52
|
+
semantic_tuple_pass: passed,
|
|
53
|
+
tool_calls_to_completion: steps.length,
|
|
54
|
+
tokens_to_completion: steps.reduce((sum, step) => sum + step.totalTokensEst, 0),
|
|
55
|
+
retry_breakdown: {
|
|
56
|
+
query_retry_count: Math.max(0, steps.filter((step) => step.tool === 'query').length - 1),
|
|
57
|
+
context_retry_count: Math.max(0, steps.filter((step) => step.tool === 'context').length - 1),
|
|
58
|
+
cypher_retry_count: Math.max(0, steps.filter((step) => step.tool === 'cypher').length - 1),
|
|
59
|
+
},
|
|
60
|
+
stop_reason: passed ? 'semantic_tuple_satisfied' : 'max_steps_reached',
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
export async function runWorkflowReplayWithDefaultRunner(benchmarkCase, options = {}) {
|
|
64
|
+
const runner = await createAgentContextToolRunner();
|
|
65
|
+
try {
|
|
66
|
+
return await runWorkflowReplay(benchmarkCase, runner, options);
|
|
67
|
+
}
|
|
68
|
+
finally {
|
|
69
|
+
await runner.close();
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
async function pushStep(steps, tool, input, executor) {
|
|
73
|
+
const started = performance.now();
|
|
74
|
+
const output = await executor(input);
|
|
75
|
+
const durationMs = Number((performance.now() - started).toFixed(1));
|
|
76
|
+
const totalTokensEst = estimateTokens(JSON.stringify(input)) + estimateTokens(JSON.stringify(output));
|
|
77
|
+
steps.push({
|
|
78
|
+
tool,
|
|
79
|
+
input,
|
|
80
|
+
output,
|
|
81
|
+
durationMs,
|
|
82
|
+
totalTokensEst,
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
function shouldRetryQuery(tuple) {
|
|
86
|
+
return !tuple.resource_anchor || !tuple.symbol_anchor;
|
|
87
|
+
}
|
|
88
|
+
function withReplayInput(input, repo, responseProfile, tool) {
|
|
89
|
+
const withRepo = repo ? { ...input, repo } : { ...input };
|
|
90
|
+
if (!responseProfile) {
|
|
91
|
+
return withRepo;
|
|
92
|
+
}
|
|
93
|
+
if (tool === 'query' || tool === 'context') {
|
|
94
|
+
return { ...withRepo, response_profile: responseProfile };
|
|
95
|
+
}
|
|
96
|
+
return withRepo;
|
|
97
|
+
}
|
|
98
|
+
function computeSemanticDriftMetrics(benchmarkCase, steps) {
|
|
99
|
+
const firstOutput = steps[0]?.output;
|
|
100
|
+
const postNarrowingOutput = pickPostNarrowingQueryOutput(benchmarkCase, steps);
|
|
101
|
+
const primaryCandidate = extractPrimaryCandidate(firstOutput);
|
|
102
|
+
const recommendedFollowUp = extractRecommendedFollowUp(firstOutput);
|
|
103
|
+
const postNarrowingPrimaryCandidate = extractPrimaryCandidate(postNarrowingOutput);
|
|
104
|
+
const postNarrowingRecommendedFollowUp = extractRecommendedFollowUp(postNarrowingOutput);
|
|
105
|
+
const placeholderLeakDetected = detectPlaceholderLeak({
|
|
106
|
+
benchmarkCase,
|
|
107
|
+
firstOutput,
|
|
108
|
+
postNarrowingOutput,
|
|
109
|
+
});
|
|
110
|
+
const heuristicTopSummaryDetected = detectHeuristicTopSummary({
|
|
111
|
+
benchmarkCase,
|
|
112
|
+
output: firstOutput,
|
|
113
|
+
});
|
|
114
|
+
const tierEnvelope = readTierEnvelopeMetrics(firstOutput);
|
|
115
|
+
return {
|
|
116
|
+
anchor_top1_pass: stringsEqual(primaryCandidate, benchmarkCase.semantic_tuple.symbol_anchor),
|
|
117
|
+
recommended_follow_up_hit: recommendedFollowUp
|
|
118
|
+
? matchesResourceAnchor(recommendedFollowUp, benchmarkCase.semantic_tuple.resource_anchor)
|
|
119
|
+
: extractResourceTargets(firstOutput).some((target) => matchesResourceAnchor(target, benchmarkCase.semantic_tuple.resource_anchor)),
|
|
120
|
+
post_narrowing_anchor_pass: stringsEqual(postNarrowingPrimaryCandidate, benchmarkCase.semantic_tuple.symbol_anchor),
|
|
121
|
+
post_narrowing_follow_up_hit: postNarrowingRecommendedFollowUp
|
|
122
|
+
? matchesResourceAnchor(postNarrowingRecommendedFollowUp, benchmarkCase.semantic_tuple.resource_anchor)
|
|
123
|
+
: extractResourceTargets(postNarrowingOutput).some((target) => matchesResourceAnchor(target, benchmarkCase.semantic_tuple.resource_anchor)),
|
|
124
|
+
ambiguity_detour_count: steps.reduce((count, step) => count + (isAmbiguousOutput(step.output) ? 1 : 0), 0),
|
|
125
|
+
placeholder_leak_detected: placeholderLeakDetected,
|
|
126
|
+
heuristic_top_summary_detected: heuristicTopSummaryDetected,
|
|
127
|
+
live_tool_evidence_pass: false,
|
|
128
|
+
freeze_ready: false,
|
|
129
|
+
guid_invariance_pass: false,
|
|
130
|
+
tier_envelope: tierEnvelope,
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
function readTierEnvelopeMetrics(output) {
|
|
134
|
+
const envelope = output?.tier_envelope;
|
|
135
|
+
const summarySource = String(envelope?.summary_source || '').trim()
|
|
136
|
+
|| inferSummarySourceFromOutput(output);
|
|
137
|
+
const factsPresent = envelope?.facts_present === true
|
|
138
|
+
|| Boolean(output && typeof output.facts === 'object');
|
|
139
|
+
const closurePresent = envelope?.closure_present === true
|
|
140
|
+
|| Boolean(output && typeof output.closure === 'object');
|
|
141
|
+
const cluesPresent = envelope?.clues_present === true
|
|
142
|
+
|| Boolean(output && typeof output.clues === 'object');
|
|
143
|
+
const semanticOrderPass = typeof envelope?.semantic_order_pass === 'boolean'
|
|
144
|
+
? Boolean(envelope.semantic_order_pass)
|
|
145
|
+
: summarySource !== 'clues'
|
|
146
|
+
|| !hasStrongLeadOutsideClues(output);
|
|
147
|
+
return {
|
|
148
|
+
facts_present: factsPresent,
|
|
149
|
+
closure_present: closurePresent,
|
|
150
|
+
clues_present: cluesPresent,
|
|
151
|
+
semantic_order_pass: semanticOrderPass,
|
|
152
|
+
summary_source: summarySource || 'fallback',
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
function inferSummarySourceFromOutput(output) {
|
|
156
|
+
if (!output)
|
|
157
|
+
return 'fallback';
|
|
158
|
+
const summary = String(output.summary || '').trim();
|
|
159
|
+
const facts = output.facts;
|
|
160
|
+
const clues = output.clues;
|
|
161
|
+
const closure = output.closure;
|
|
162
|
+
const factCandidates = Array.isArray(facts?.candidates) ? facts.candidates : [];
|
|
163
|
+
const factProcessHints = Array.isArray(facts?.process_hints) ? facts.process_hints : [];
|
|
164
|
+
const clueProcessHints = Array.isArray(clues?.process_hints) ? clues.process_hints : [];
|
|
165
|
+
const runtimePreview = closure?.runtime_preview;
|
|
166
|
+
const runtimeStatus = String(runtimePreview?.status || '').trim();
|
|
167
|
+
if (factCandidates.some((row) => String(row?.name || '').trim() === summary))
|
|
168
|
+
return 'facts';
|
|
169
|
+
if (factProcessHints.some((row) => String(row?.summary || '').trim() === summary))
|
|
170
|
+
return 'facts';
|
|
171
|
+
if (runtimeStatus && runtimeStatus === summary)
|
|
172
|
+
return 'closure';
|
|
173
|
+
if (clueProcessHints.some((row) => String(row?.summary || '').trim() === summary))
|
|
174
|
+
return 'clues';
|
|
175
|
+
return 'fallback';
|
|
176
|
+
}
|
|
177
|
+
function hasStrongLeadOutsideClues(output) {
|
|
178
|
+
if (!output)
|
|
179
|
+
return false;
|
|
180
|
+
const facts = output.facts;
|
|
181
|
+
const factCandidates = Array.isArray(facts?.candidates) ? facts.candidates : [];
|
|
182
|
+
if (factCandidates.length > 0)
|
|
183
|
+
return true;
|
|
184
|
+
const factProcessHints = Array.isArray(facts?.process_hints) ? facts.process_hints : [];
|
|
185
|
+
return factProcessHints.some((row) => {
|
|
186
|
+
const confidence = String(row?.confidence || '').trim().toLowerCase();
|
|
187
|
+
const evidenceMode = String(row?.evidence_mode || '').trim().toLowerCase();
|
|
188
|
+
return (confidence === 'high' || confidence === 'medium') && evidenceMode !== 'resource_heuristic';
|
|
189
|
+
});
|
|
190
|
+
}
|
|
191
|
+
function deriveConfirmedChainSteps(input) {
|
|
192
|
+
if (input.placeholderLeakDetected)
|
|
193
|
+
return [];
|
|
194
|
+
const chain = new Set();
|
|
195
|
+
for (const step of input.steps) {
|
|
196
|
+
if (step.tool !== 'cypher')
|
|
197
|
+
continue;
|
|
198
|
+
const output = step.output;
|
|
199
|
+
const rows = Array.isArray(output?.rows) ? output.rows : [];
|
|
200
|
+
for (const row of rows) {
|
|
201
|
+
const src = String(row?.src || '').trim();
|
|
202
|
+
const dst = String(row?.dst || '').trim();
|
|
203
|
+
if (!src || !dst)
|
|
204
|
+
continue;
|
|
205
|
+
chain.add(`${src} -> ${dst}`);
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
if (chain.size > 0)
|
|
209
|
+
return [...chain].slice(0, 8);
|
|
210
|
+
const fallback = [
|
|
211
|
+
...(Array.isArray(input.semanticTuple.proof_edges) ? input.semanticTuple.proof_edges : []),
|
|
212
|
+
String(input.semanticTuple.proof_edge || '').trim(),
|
|
213
|
+
]
|
|
214
|
+
.map((step) => String(step || '').trim())
|
|
215
|
+
.filter(Boolean);
|
|
216
|
+
return [...new Set(fallback)].slice(0, 8);
|
|
217
|
+
}
|
|
218
|
+
function countLiveToolEvidence(steps) {
|
|
219
|
+
let score = 0;
|
|
220
|
+
for (const step of steps) {
|
|
221
|
+
const output = step.output;
|
|
222
|
+
if (!output || typeof output !== 'object')
|
|
223
|
+
continue;
|
|
224
|
+
if (Number(output.row_count || 0) > 0)
|
|
225
|
+
score += 1;
|
|
226
|
+
if (Array.isArray(output.rows) && output.rows.length > 0)
|
|
227
|
+
score += 1;
|
|
228
|
+
if (Array.isArray(output.candidates) && output.candidates.length > 0)
|
|
229
|
+
score += 1;
|
|
230
|
+
if (Array.isArray(output.process_hints) && output.process_hints.length > 0)
|
|
231
|
+
score += 1;
|
|
232
|
+
if (Array.isArray(output.processes) && output.processes.length > 0)
|
|
233
|
+
score += 1;
|
|
234
|
+
if (Array.isArray(output.resource_hints) && output.resource_hints.length > 0)
|
|
235
|
+
score += 1;
|
|
236
|
+
if (output.symbol && typeof output.symbol === 'object')
|
|
237
|
+
score += 1;
|
|
238
|
+
}
|
|
239
|
+
return score;
|
|
240
|
+
}
|
|
241
|
+
function computeGuidInvariance(benchmarkCase, steps) {
|
|
242
|
+
const queryOutputs = steps
|
|
243
|
+
.filter((step) => step.tool === 'query')
|
|
244
|
+
.map((step) => step.output)
|
|
245
|
+
.filter(Boolean);
|
|
246
|
+
const baseOutput = pickPostNarrowingQueryOutput(benchmarkCase, steps) || queryOutputs[0];
|
|
247
|
+
const guidVariantOutput = queryOutputs.find((output) => output !== baseOutput && isGuidVariantOutput(output))
|
|
248
|
+
|| baseOutput;
|
|
249
|
+
const base = {
|
|
250
|
+
primary_candidate: extractPrimaryCandidate(baseOutput),
|
|
251
|
+
recommended_follow_up: extractRecommendedFollowUp(baseOutput),
|
|
252
|
+
};
|
|
253
|
+
const guidVariant = {
|
|
254
|
+
primary_candidate: extractPrimaryCandidate(guidVariantOutput),
|
|
255
|
+
recommended_follow_up: extractRecommendedFollowUp(guidVariantOutput),
|
|
256
|
+
};
|
|
257
|
+
const guidInvariancePass = stringsEqual(base.primary_candidate, guidVariant.primary_candidate)
|
|
258
|
+
&& stringsEqual(normalizeAssetPath(base.recommended_follow_up), normalizeAssetPath(guidVariant.recommended_follow_up));
|
|
259
|
+
return {
|
|
260
|
+
base,
|
|
261
|
+
guid_variant: guidVariant,
|
|
262
|
+
guid_invariance_pass: guidInvariancePass,
|
|
263
|
+
};
|
|
264
|
+
}
|
|
265
|
+
function detectPlaceholderLeak(input) {
|
|
266
|
+
const intentText = `${input.benchmarkCase.start_query} ${input.benchmarkCase.semantic_tuple.symbol_anchor}`.toLowerCase();
|
|
267
|
+
if (intentText.includes('reload'))
|
|
268
|
+
return false;
|
|
269
|
+
const signals = collectSignalTexts([input.firstOutput, input.postNarrowingOutput]);
|
|
270
|
+
return signals.some((text) => text.includes(PLACEHOLDER_FOLLOW_UP.toLowerCase()));
|
|
271
|
+
}
|
|
272
|
+
function detectHeuristicTopSummary(input) {
|
|
273
|
+
const output = input.output;
|
|
274
|
+
if (!output)
|
|
275
|
+
return false;
|
|
276
|
+
const summary = String(output.summary || '').trim().toLowerCase();
|
|
277
|
+
const processHints = Array.isArray(output.process_hints)
|
|
278
|
+
? output.process_hints
|
|
279
|
+
: (Array.isArray(output.processes) ? output.processes : []);
|
|
280
|
+
const topHint = processHints[0];
|
|
281
|
+
const topEvidenceMode = String(topHint?.evidence_mode || topHint?.process_evidence_mode || '').trim().toLowerCase();
|
|
282
|
+
const topConfidence = String(topHint?.confidence || topHint?.process_confidence || '').trim().toLowerCase();
|
|
283
|
+
const summaryLooksHeuristic = summary.includes('heuristic clue')
|
|
284
|
+
|| (topEvidenceMode === 'resource_heuristic' && topConfidence === 'low');
|
|
285
|
+
if (!summaryLooksHeuristic)
|
|
286
|
+
return false;
|
|
287
|
+
const hasStrongerProcessLead = processHints.some((hint) => {
|
|
288
|
+
const row = hint;
|
|
289
|
+
const evidenceMode = String(row?.evidence_mode || row?.process_evidence_mode || '').trim().toLowerCase();
|
|
290
|
+
const confidence = String(row?.confidence || row?.process_confidence || '').trim().toLowerCase();
|
|
291
|
+
return (confidence === 'high' || confidence === 'medium') && evidenceMode !== 'resource_heuristic';
|
|
292
|
+
});
|
|
293
|
+
const anchorSymbol = input.benchmarkCase.semantic_tuple.symbol_anchor;
|
|
294
|
+
const primaryCandidate = extractPrimaryCandidate(output);
|
|
295
|
+
const candidates = Array.isArray(output.candidates) ? output.candidates : [];
|
|
296
|
+
const hasStrongAnchorCandidate = stringsEqual(primaryCandidate, anchorSymbol)
|
|
297
|
+
|| candidates.some((candidate) => stringsEqual(String(candidate?.name || ''), anchorSymbol));
|
|
298
|
+
return hasStrongerProcessLead || hasStrongAnchorCandidate;
|
|
299
|
+
}
|
|
300
|
+
function collectSignalTexts(outputs) {
|
|
301
|
+
const texts = [];
|
|
302
|
+
for (const output of outputs) {
|
|
303
|
+
if (!output)
|
|
304
|
+
continue;
|
|
305
|
+
const summary = String(output.summary || '').trim();
|
|
306
|
+
if (summary)
|
|
307
|
+
texts.push(summary.toLowerCase());
|
|
308
|
+
const decision = output.decision;
|
|
309
|
+
const followUp = String(decision?.recommended_follow_up || '').trim();
|
|
310
|
+
if (followUp)
|
|
311
|
+
texts.push(followUp.toLowerCase());
|
|
312
|
+
const runtimePreview = output.runtime_preview;
|
|
313
|
+
const runtimePreviewNext = String(runtimePreview?.next_action || '').trim();
|
|
314
|
+
if (runtimePreviewNext)
|
|
315
|
+
texts.push(runtimePreviewNext.toLowerCase());
|
|
316
|
+
const runtimeClaim = output.runtime_claim;
|
|
317
|
+
const runtimeClaimNext = String(runtimeClaim?.next_action || '').trim();
|
|
318
|
+
if (runtimeClaimNext)
|
|
319
|
+
texts.push(runtimeClaimNext.toLowerCase());
|
|
320
|
+
const upgradeHints = Array.isArray(output.upgrade_hints) ? output.upgrade_hints : [];
|
|
321
|
+
for (const hint of upgradeHints) {
|
|
322
|
+
const row = hint;
|
|
323
|
+
const nextCommand = String(row.next_command || '').trim();
|
|
324
|
+
const paramDelta = String(row.param_delta || '').trim();
|
|
325
|
+
if (nextCommand)
|
|
326
|
+
texts.push(nextCommand.toLowerCase());
|
|
327
|
+
if (paramDelta)
|
|
328
|
+
texts.push(paramDelta.toLowerCase());
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
return texts;
|
|
332
|
+
}
|
|
333
|
+
function pickPostNarrowingQueryOutput(benchmarkCase, steps) {
|
|
334
|
+
const queryOutputs = steps
|
|
335
|
+
.filter((step) => step.tool === 'query')
|
|
336
|
+
.map((step) => step.output);
|
|
337
|
+
return queryOutputs.find((output) => {
|
|
338
|
+
const primaryCandidate = extractPrimaryCandidate(output);
|
|
339
|
+
if (stringsEqual(primaryCandidate, benchmarkCase.semantic_tuple.symbol_anchor)) {
|
|
340
|
+
return true;
|
|
341
|
+
}
|
|
342
|
+
const resourceTargets = extractResourceTargets(output);
|
|
343
|
+
if (resourceTargets.some((target) => matchesResourceAnchor(target, benchmarkCase.semantic_tuple.resource_anchor))) {
|
|
344
|
+
return true;
|
|
345
|
+
}
|
|
346
|
+
return matchesResourceAnchor(extractRecommendedFollowUp(output), benchmarkCase.semantic_tuple.resource_anchor);
|
|
347
|
+
});
|
|
348
|
+
}
|
|
349
|
+
function extractPrimaryCandidate(output) {
|
|
350
|
+
const decision = output?.decision;
|
|
351
|
+
const candidates = Array.isArray(output?.candidates) ? output.candidates : [];
|
|
352
|
+
const symbol = output?.symbol;
|
|
353
|
+
return String(decision?.primary_candidate
|
|
354
|
+
|| candidates[0]?.name
|
|
355
|
+
|| symbol?.name
|
|
356
|
+
|| '').trim();
|
|
357
|
+
}
|
|
358
|
+
function extractRecommendedFollowUp(output) {
|
|
359
|
+
const decision = output?.decision;
|
|
360
|
+
return String(decision?.recommended_follow_up || '').trim();
|
|
361
|
+
}
|
|
362
|
+
function extractResourceTargets(output) {
|
|
363
|
+
const targets = new Set();
|
|
364
|
+
const resourceHints = Array.isArray(output?.resource_hints) ? output.resource_hints : [];
|
|
365
|
+
const nextHops = Array.isArray(output?.next_hops) ? output.next_hops : [];
|
|
366
|
+
for (const row of [...resourceHints, ...nextHops]) {
|
|
367
|
+
const target = String(row?.target || row?.path || '').trim();
|
|
368
|
+
if (target)
|
|
369
|
+
targets.add(target);
|
|
370
|
+
}
|
|
371
|
+
return [...targets];
|
|
372
|
+
}
|
|
373
|
+
function isAmbiguousOutput(output) {
|
|
374
|
+
if (!output || typeof output !== 'object')
|
|
375
|
+
return false;
|
|
376
|
+
const row = output;
|
|
377
|
+
const status = String(row.status || '').trim().toLowerCase();
|
|
378
|
+
const message = String(row.message || '').trim().toLowerCase();
|
|
379
|
+
return status === 'ambiguous' || message.includes('disambiguate');
|
|
380
|
+
}
|
|
381
|
+
function matchesResourceAnchor(candidate, canonical) {
|
|
382
|
+
const normalizedCandidate = normalizeAssetPath(candidate);
|
|
383
|
+
const normalizedCanonical = normalizeAssetPath(canonical);
|
|
384
|
+
if (!normalizedCandidate || !normalizedCanonical)
|
|
385
|
+
return false;
|
|
386
|
+
if (normalizedCandidate.includes(normalizedCanonical))
|
|
387
|
+
return true;
|
|
388
|
+
const canonicalDir = path.posix.dirname(normalizedCanonical);
|
|
389
|
+
return normalizedCandidate.includes(canonicalDir);
|
|
390
|
+
}
|
|
391
|
+
function isGuidVariantOutput(output) {
|
|
392
|
+
const signals = collectSignalTexts([output]);
|
|
393
|
+
if (signals.some((text) => text.includes('guid')))
|
|
394
|
+
return true;
|
|
395
|
+
return signals.some((text) => /[a-f0-9]{32}/.test(text));
|
|
396
|
+
}
|
|
397
|
+
function normalizeAssetPath(value) {
|
|
398
|
+
return String(value || '')
|
|
399
|
+
.trim()
|
|
400
|
+
.replace(/^resource_path_prefix=/, '')
|
|
401
|
+
.replace(/^"+|"+$/g, '')
|
|
402
|
+
.replace(/\\/g, '/');
|
|
403
|
+
}
|
|
404
|
+
function stringsEqual(left, right) {
|
|
405
|
+
return String(left || '').trim().toLowerCase() === String(right || '').trim().toLowerCase();
|
|
406
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|