@veewo/gitnexus 1.5.0-rc.4 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. package/dist/benchmark/agent-context/runner.js +3 -0
  2. package/dist/benchmark/agent-context/runner.test.js +22 -0
  3. package/dist/benchmark/agent-context/tool-runner.d.ts +7 -6
  4. package/dist/benchmark/agent-safe-query-context/io.d.ts +2 -0
  5. package/dist/benchmark/agent-safe-query-context/io.js +86 -0
  6. package/dist/benchmark/agent-safe-query-context/io.test.d.ts +1 -0
  7. package/dist/benchmark/agent-safe-query-context/io.test.js +13 -0
  8. package/dist/benchmark/agent-safe-query-context/report.d.ts +57 -0
  9. package/dist/benchmark/agent-safe-query-context/report.js +159 -0
  10. package/dist/benchmark/agent-safe-query-context/report.test.d.ts +1 -0
  11. package/dist/benchmark/agent-safe-query-context/report.test.js +362 -0
  12. package/dist/benchmark/agent-safe-query-context/runner.d.ts +44 -0
  13. package/dist/benchmark/agent-safe-query-context/runner.js +406 -0
  14. package/dist/benchmark/agent-safe-query-context/runner.test.d.ts +1 -0
  15. package/dist/benchmark/agent-safe-query-context/runner.test.js +290 -0
  16. package/dist/benchmark/agent-safe-query-context/semantic-tuple.d.ts +20 -0
  17. package/dist/benchmark/agent-safe-query-context/semantic-tuple.js +225 -0
  18. package/dist/benchmark/agent-safe-query-context/semantic-tuple.test.d.ts +1 -0
  19. package/dist/benchmark/agent-safe-query-context/semantic-tuple.test.js +122 -0
  20. package/dist/benchmark/agent-safe-query-context/subagent-live.d.ts +47 -0
  21. package/dist/benchmark/agent-safe-query-context/subagent-live.js +128 -0
  22. package/dist/benchmark/agent-safe-query-context/subagent-live.test.d.ts +1 -0
  23. package/dist/benchmark/agent-safe-query-context/subagent-live.test.js +155 -0
  24. package/dist/benchmark/agent-safe-query-context/telemetry-tool.d.ts +9 -0
  25. package/dist/benchmark/agent-safe-query-context/telemetry-tool.js +77 -0
  26. package/dist/benchmark/agent-safe-query-context/types.d.ts +61 -0
  27. package/dist/benchmark/agent-safe-query-context/types.js +8 -0
  28. package/dist/benchmark/analyze-runner.d.ts +1 -1
  29. package/dist/benchmark/analyze-runner.js +4 -3
  30. package/dist/benchmark/analyze-runner.test.js +7 -0
  31. package/dist/benchmark/runtime-poc/provenance-artifact.d.ts +47 -0
  32. package/dist/benchmark/runtime-poc/provenance-artifact.js +89 -0
  33. package/dist/benchmark/runtime-poc/runner.d.ts +31 -0
  34. package/dist/benchmark/runtime-poc/runner.js +163 -0
  35. package/dist/benchmark/u2-e2e/hydration-policy-repeatability-runner.d.ts +8 -0
  36. package/dist/benchmark/u2-e2e/hydration-policy-repeatability-runner.js +21 -0
  37. package/dist/benchmark/u2-e2e/phase2-runtime-claim-acceptance-runner.d.ts +0 -1
  38. package/dist/benchmark/u2-e2e/phase2-runtime-claim-acceptance-runner.js +53 -51
  39. package/dist/benchmark/u2-e2e/phase2-runtime-claim-acceptance-runner.test.js +0 -1
  40. package/dist/benchmark/u2-e2e/phase5-rule-lab-acceptance-runner.d.ts +1 -1
  41. package/dist/benchmark/u2-e2e/phase5-rule-lab-acceptance-runner.js +82 -18
  42. package/dist/benchmark/u2-e2e/phase5-rule-lab-acceptance-runner.test.js +1 -2
  43. package/dist/benchmark/u2-e2e/retrieval-runner.js +15 -7
  44. package/dist/benchmark/u2-e2e/retrieval-runner.test.js +46 -0
  45. package/dist/cli/ai-context.d.ts +0 -1
  46. package/dist/cli/ai-context.js +5 -6
  47. package/dist/cli/ai-context.test.js +8 -0
  48. package/dist/cli/analyze-options.js +58 -34
  49. package/dist/cli/analyze-options.test.js +57 -0
  50. package/dist/cli/analyze-runtime-summary.js +2 -0
  51. package/dist/cli/analyze-runtime-summary.test.js +12 -0
  52. package/dist/cli/analyze-summary.d.ts +4 -0
  53. package/dist/cli/analyze-summary.js +43 -0
  54. package/dist/cli/analyze-summary.test.js +65 -1
  55. package/dist/cli/analyze.d.ts +11 -0
  56. package/dist/cli/analyze.js +34 -5
  57. package/dist/cli/analyze.test.d.ts +1 -0
  58. package/dist/cli/analyze.test.js +25 -0
  59. package/dist/cli/benchmark-agent-context.js +1 -1
  60. package/dist/cli/benchmark-agent-safe-query-context.d.ts +20 -0
  61. package/dist/cli/benchmark-agent-safe-query-context.js +39 -0
  62. package/dist/cli/benchmark-agent-safe-query-context.test.d.ts +1 -0
  63. package/dist/cli/benchmark-agent-safe-query-context.test.js +271 -0
  64. package/dist/cli/benchmark-unity.js +1 -1
  65. package/dist/cli/benchmark-unity.test.js +5 -1
  66. package/dist/cli/benchmark.d.ts +29 -0
  67. package/dist/cli/benchmark.js +55 -0
  68. package/dist/cli/index.js +27 -2
  69. package/dist/cli/rule-lab.d.ts +3 -7
  70. package/dist/cli/rule-lab.js +13 -22
  71. package/dist/cli/rule-lab.test.js +23 -3
  72. package/dist/cli/scope-manifest-config.d.ts +9 -0
  73. package/dist/cli/scope-manifest-config.js +37 -0
  74. package/dist/cli/setup.js +40 -41
  75. package/dist/cli/setup.test.js +14 -14
  76. package/dist/cli/sync-manifest.d.ts +27 -0
  77. package/dist/cli/sync-manifest.js +200 -0
  78. package/dist/cli/sync-manifest.test.d.ts +1 -0
  79. package/dist/cli/sync-manifest.test.js +88 -0
  80. package/dist/cli/tool.d.ts +2 -0
  81. package/dist/cli/tool.js +2 -0
  82. package/dist/core/config/unity-config.d.ts +1 -1
  83. package/dist/core/config/unity-config.js +1 -1
  84. package/dist/core/ingestion/call-processor.d.ts +2 -1
  85. package/dist/core/ingestion/call-processor.js +28 -6
  86. package/dist/core/ingestion/heritage-processor.d.ts +2 -1
  87. package/dist/core/ingestion/heritage-processor.js +30 -7
  88. package/dist/core/ingestion/import-processor.d.ts +2 -1
  89. package/dist/core/ingestion/import-processor.js +28 -6
  90. package/dist/core/ingestion/parsing-processor.d.ts +5 -3
  91. package/dist/core/ingestion/parsing-processor.js +46 -13
  92. package/dist/core/ingestion/pipeline.js +100 -19
  93. package/dist/core/ingestion/unity-lifecycle-synthetic-calls.test.js +18 -20
  94. package/dist/core/ingestion/unity-parity-seed.d.ts +2 -1
  95. package/dist/core/ingestion/unity-parity-seed.js +8 -0
  96. package/dist/core/ingestion/unity-resource-processor.d.ts +11 -0
  97. package/dist/core/ingestion/unity-resource-processor.js +102 -0
  98. package/dist/core/ingestion/unity-resource-processor.test.js +449 -0
  99. package/dist/core/ingestion/unity-runtime-binding-rules.d.ts +16 -1
  100. package/dist/core/ingestion/unity-runtime-binding-rules.js +193 -42
  101. package/dist/core/ingestion/workers/parse-worker.d.ts +2 -0
  102. package/dist/core/ingestion/workers/parse-worker.js +50 -6
  103. package/dist/core/lbug/csv-generator.test.js +2 -2
  104. package/dist/core/tree-sitter/csharp-define-profile.d.ts +6 -0
  105. package/dist/core/tree-sitter/csharp-define-profile.js +43 -0
  106. package/dist/core/tree-sitter/csharp-preproc-normalizer.d.ts +14 -0
  107. package/dist/core/tree-sitter/csharp-preproc-normalizer.js +261 -0
  108. package/dist/core/tree-sitter/parser-loader.d.ts +10 -0
  109. package/dist/core/tree-sitter/parser-loader.js +19 -0
  110. package/dist/core/unity/doc-contract.test.d.ts +1 -0
  111. package/dist/core/unity/doc-contract.test.js +30 -0
  112. package/dist/core/unity/prefab-source-scan.d.ts +25 -0
  113. package/dist/core/unity/prefab-source-scan.js +152 -0
  114. package/dist/core/unity/prefab-source-scan.test.d.ts +1 -0
  115. package/dist/core/unity/prefab-source-scan.test.js +70 -0
  116. package/dist/core/unity/scan-context.d.ts +12 -0
  117. package/dist/core/unity/scan-context.js +50 -2
  118. package/dist/core/unity/scan-context.test.js +74 -0
  119. package/dist/mcp/local/agent-safe-response.d.ts +10 -0
  120. package/dist/mcp/local/agent-safe-response.js +639 -0
  121. package/dist/mcp/local/derived-process-reader.js +1 -1
  122. package/dist/mcp/local/local-backend.d.ts +18 -1
  123. package/dist/mcp/local/local-backend.js +319 -125
  124. package/dist/mcp/local/process-confidence.d.ts +1 -2
  125. package/dist/mcp/local/process-confidence.js +0 -3
  126. package/dist/mcp/local/process-confidence.test.js +4 -2
  127. package/dist/mcp/local/process-evidence.d.ts +1 -8
  128. package/dist/mcp/local/process-evidence.js +1 -23
  129. package/dist/mcp/local/process-evidence.test.js +2 -16
  130. package/dist/mcp/local/process-ref.d.ts +1 -1
  131. package/dist/mcp/local/runtime-chain-closure-evaluator.d.ts +33 -0
  132. package/dist/mcp/local/runtime-chain-closure-evaluator.js +273 -0
  133. package/dist/mcp/local/runtime-chain-graph-candidates.d.ts +23 -0
  134. package/dist/mcp/local/runtime-chain-graph-candidates.js +131 -0
  135. package/dist/mcp/local/runtime-chain-verify.d.ts +1 -1
  136. package/dist/mcp/local/runtime-chain-verify.js +149 -138
  137. package/dist/mcp/local/runtime-chain-verify.test.js +126 -68
  138. package/dist/mcp/local/runtime-claim-rule-registry.d.ts +4 -0
  139. package/dist/mcp/local/runtime-claim-rule-registry.js +4 -0
  140. package/dist/mcp/local/runtime-claim-rule-registry.test.js +37 -4
  141. package/dist/mcp/local/runtime-claim.d.ts +11 -0
  142. package/dist/mcp/local/runtime-claim.js +28 -0
  143. package/dist/mcp/local/unity-evidence-view.d.ts +1 -1
  144. package/dist/mcp/local/unity-evidence-view.js +1 -1
  145. package/dist/mcp/local/unity-evidence-view.test.js +22 -0
  146. package/dist/mcp/tools.js +51 -21
  147. package/dist/rule-lab/analyze.d.ts +2 -1
  148. package/dist/rule-lab/analyze.js +94 -59
  149. package/dist/rule-lab/analyze.test.js +238 -20
  150. package/dist/rule-lab/curate.d.ts +2 -1
  151. package/dist/rule-lab/curate.js +24 -3
  152. package/dist/rule-lab/curate.test.js +65 -0
  153. package/dist/rule-lab/curation-input-builder.d.ts +45 -0
  154. package/dist/rule-lab/curation-input-builder.js +133 -0
  155. package/dist/rule-lab/promote.js +80 -7
  156. package/dist/rule-lab/promote.test.js +150 -0
  157. package/dist/rule-lab/review-pack.d.ts +3 -0
  158. package/dist/rule-lab/review-pack.js +41 -1
  159. package/dist/rule-lab/review-pack.test.js +67 -0
  160. package/dist/rule-lab/types.d.ts +29 -0
  161. package/dist/types/pipeline.d.ts +16 -0
  162. package/package.json +14 -13
  163. package/scripts/check-sync-manifest-traceability.mjs +203 -0
  164. package/scripts/run-node-tests.mjs +61 -0
  165. package/scripts/tree-sitter-audit-classify.mjs +172 -0
  166. package/skills/_shared/unity-rule-authoring-contract.md +64 -0
  167. package/skills/_shared/unity-runtime-process-contract.md +16 -0
  168. package/skills/gitnexus-cli.md +44 -4
  169. package/skills/gitnexus-debugging.md +9 -0
  170. package/skills/gitnexus-exploring.md +66 -18
  171. package/skills/gitnexus-guide.md +42 -3
  172. package/skills/gitnexus-impact-analysis.md +8 -0
  173. package/skills/gitnexus-pr-review.md +8 -0
  174. package/skills/gitnexus-refactoring.md +8 -0
  175. package/skills/gitnexus-unity-rule-gen.md +66 -312
@@ -0,0 +1,290 @@
1
+ import test from 'node:test';
2
+ import assert from 'node:assert/strict';
3
+ import { runWorkflowReplay } from './runner.js';
4
+ const fakeCase = {
5
+ label: 'weapon_powerup',
6
+ start_query: 'weapon powerup equip chain',
7
+ retry_query: '1_weapon_orb_key.asset WeaponPowerUp HoldPickup EquipWithEvent Equip',
8
+ proof_contexts: ['WeaponPowerUp'],
9
+ proof_cypher: "MATCH (src)-[:CodeRelation {type: 'CALLS'}]->(dst) WHERE src.name IN ['HoldPickup', 'EquipWithEvent'] RETURN src.name, dst.name",
10
+ tool_plan: [
11
+ { tool: 'query', input: { query: 'weapon powerup equip chain' } },
12
+ { tool: 'context', input: { name: 'WeaponPowerUp' } },
13
+ { tool: 'cypher', input: { query: "MATCH (src)-[:CodeRelation {type: 'CALLS'}]->(dst) RETURN src.name, dst.name" } },
14
+ ],
15
+ live_task: {
16
+ objective: 'Investigate WeaponPowerUp from the provided asset seed and return the strongest supported pickup/equip runtime relation.',
17
+ symbol_seed: 'WeaponPowerUp',
18
+ resource_seed: 'Assets/NEON/DataAssets/Powerups/1_newWeapon/0_pick/法器_Orb/1_weapon_orb_key.asset',
19
+ },
20
+ semantic_tuple: {
21
+ resource_anchor: 'Assets/NEON/DataAssets/Powerups/1_newWeapon/0_pick/法器_Orb/1_weapon_orb_key.asset',
22
+ symbol_anchor: 'WeaponPowerUp',
23
+ proof_edges: [
24
+ 'HoldPickup -> WeaponPowerUp.PickItUp',
25
+ 'EquipWithEvent -> WeaponPowerUp.Equip',
26
+ ],
27
+ closure_status: 'not_verified_full',
28
+ },
29
+ };
30
+ test('workflow replay narrows query only when retry triggers fire', async () => {
31
+ const calls = [];
32
+ let queryCount = 0;
33
+ const fakeRunner = {
34
+ async query(input) {
35
+ calls.push({ tool: 'query', input });
36
+ queryCount += 1;
37
+ if (queryCount === 1) {
38
+ return {
39
+ candidates: [{ name: 'FallbackCandidate' }],
40
+ resource_hints: [{ path: 'Assets/Other/OffTarget.asset' }],
41
+ };
42
+ }
43
+ return {
44
+ candidates: [{ name: 'WeaponPowerUp' }],
45
+ resource_hints: [
46
+ {
47
+ path: 'Assets/NEON/DataAssets/Powerups/1_newWeapon/0_pick/法器_Orb/1_weapon_orb_key.asset',
48
+ },
49
+ ],
50
+ };
51
+ },
52
+ async context(input) {
53
+ calls.push({ tool: 'context', input });
54
+ return {
55
+ symbol: { name: 'WeaponPowerUp' },
56
+ incoming: {
57
+ CALLS: [{ name: 'HoldPickup' }, { name: 'EquipWithEvent' }],
58
+ },
59
+ outgoing: {
60
+ CALLS: [{ name: 'PickItUp' }, { name: 'Equip' }],
61
+ },
62
+ };
63
+ },
64
+ async cypher(input) {
65
+ calls.push({ tool: 'cypher', input });
66
+ return {
67
+ row_count: 2,
68
+ rows: [
69
+ { src: 'HoldPickup', dst: 'PickItUp' },
70
+ { src: 'EquipWithEvent', dst: 'Equip' },
71
+ ],
72
+ };
73
+ },
74
+ };
75
+ const result = await runWorkflowReplay(fakeCase, fakeRunner);
76
+ assert.equal(result.tool_calls_to_completion, 4);
77
+ assert.equal(result.retry_breakdown.query_retry_count, 1);
78
+ assert.equal(result.retry_breakdown.context_retry_count, 0);
79
+ assert.equal(result.semantic_tuple_pass, true);
80
+ assert.equal(result.stop_reason, 'semantic_tuple_satisfied');
81
+ assert.equal(calls.map((entry) => entry.tool).join(','), 'query,query,context,cypher');
82
+ });
83
+ test('workflow replay applies response_profile to query and context calls', async () => {
84
+ const calls = [];
85
+ const fakeRunner = {
86
+ async query(input) {
87
+ calls.push({ tool: 'query', input });
88
+ return {
89
+ candidates: [{ name: 'WeaponPowerUp' }],
90
+ resource_hints: [{ path: fakeCase.semantic_tuple.resource_anchor }],
91
+ };
92
+ },
93
+ async context(input) {
94
+ calls.push({ tool: 'context', input });
95
+ return {
96
+ symbol: { name: 'WeaponPowerUp' },
97
+ incoming: {
98
+ CALLS: [{ name: 'HoldPickup' }, { name: 'EquipWithEvent' }],
99
+ },
100
+ outgoing: {
101
+ CALLS: [{ name: 'PickItUp' }, { name: 'Equip' }],
102
+ },
103
+ };
104
+ },
105
+ async cypher(input) {
106
+ calls.push({ tool: 'cypher', input });
107
+ return {
108
+ row_count: 2,
109
+ rows: [
110
+ { src: 'HoldPickup', dst: 'PickItUp' },
111
+ { src: 'EquipWithEvent', dst: 'Equip' },
112
+ ],
113
+ };
114
+ },
115
+ };
116
+ const result = await runWorkflowReplay(fakeCase, fakeRunner, { responseProfile: 'slim' });
117
+ const queryCalls = calls.filter((entry) => entry.tool === 'query');
118
+ const contextCalls = calls.filter((entry) => entry.tool === 'context');
119
+ assert.equal(queryCalls.every((entry) => entry.input.response_profile === 'slim'), true);
120
+ assert.equal(contextCalls.every((entry) => entry.input.response_profile === 'slim'), true);
121
+ assert.equal(result.guid_invariance_pass, true);
122
+ assert.equal(result.guid_variant?.primary_candidate, result.base?.primary_candidate);
123
+ assert.equal(result.guid_variant?.recommended_follow_up, result.base?.recommended_follow_up);
124
+ });
125
+ test('workflow replay exposes drift-sensitive metrics from the first-hop output and ambiguity detours', async () => {
126
+ const fakeRunner = {
127
+ async query() {
128
+ return {
129
+ decision: {
130
+ primary_candidate: 'FallbackCandidate',
131
+ recommended_follow_up: 'resource_path_prefix=Assets/Other/OffTarget.asset',
132
+ },
133
+ candidates: [{ name: 'FallbackCandidate' }],
134
+ resource_hints: [{ target: 'Assets/Other/OffTarget.asset' }],
135
+ };
136
+ },
137
+ async context() {
138
+ return {
139
+ status: 'ambiguous',
140
+ candidates: [
141
+ { name: 'WeaponPowerUp', uid: 'Class:A' },
142
+ { name: 'WeaponPowerUp', uid: 'Class:B' },
143
+ ],
144
+ };
145
+ },
146
+ async cypher() {
147
+ return {
148
+ row_count: 2,
149
+ rows: [
150
+ { src: 'HoldPickup', dst: 'PickItUp' },
151
+ { src: 'EquipWithEvent', dst: 'Equip' },
152
+ ],
153
+ };
154
+ },
155
+ };
156
+ const result = await runWorkflowReplay(fakeCase, fakeRunner, { maxSteps: 3, responseProfile: 'slim' });
157
+ assert.equal(result.anchor_top1_pass, false);
158
+ assert.equal(result.recommended_follow_up_hit, false);
159
+ assert.equal(result.post_narrowing_anchor_pass, false);
160
+ assert.equal(result.post_narrowing_follow_up_hit, false);
161
+ assert.equal(result.ambiguity_detour_count, 1);
162
+ });
163
+ test('workflow replay tracks post-narrowing convergence separately from first-hop drift', async () => {
164
+ let queryCount = 0;
165
+ const fakeRunner = {
166
+ async query() {
167
+ queryCount += 1;
168
+ if (queryCount === 1) {
169
+ return {
170
+ decision: {
171
+ primary_candidate: 'FallbackCandidate',
172
+ recommended_follow_up: 'resource_path_prefix=Assets/Other/OffTarget.asset',
173
+ },
174
+ candidates: [{ name: 'FallbackCandidate' }],
175
+ resource_hints: [{ target: 'Assets/Other/OffTarget.asset' }],
176
+ };
177
+ }
178
+ return {
179
+ decision: {
180
+ primary_candidate: 'WeaponPowerUp',
181
+ recommended_follow_up: `resource_path_prefix=${fakeCase.semantic_tuple.resource_anchor}`,
182
+ },
183
+ candidates: [{ name: 'WeaponPowerUp' }],
184
+ resource_hints: [{ target: fakeCase.semantic_tuple.resource_anchor }],
185
+ };
186
+ },
187
+ async context() {
188
+ return {
189
+ status: 'ambiguous',
190
+ candidates: [{ name: 'WeaponPowerUp', uid: 'Class:A' }],
191
+ };
192
+ },
193
+ async cypher() {
194
+ return {
195
+ row_count: 2,
196
+ rows: [
197
+ { src: 'HoldPickup', dst: 'PickItUp' },
198
+ { src: 'EquipWithEvent', dst: 'Equip' },
199
+ ],
200
+ };
201
+ },
202
+ };
203
+ const result = await runWorkflowReplay(fakeCase, fakeRunner, { maxSteps: 4, responseProfile: 'slim' });
204
+ assert.equal(result.anchor_top1_pass, false);
205
+ assert.equal(result.recommended_follow_up_hit, false);
206
+ assert.equal(result.post_narrowing_anchor_pass, true);
207
+ assert.equal(result.post_narrowing_follow_up_hit, true);
208
+ });
209
+ test('workflow replay flags unrelated placeholder follow-up leakage', async () => {
210
+ const fakeRunner = {
211
+ async query() {
212
+ return {
213
+ decision: {
214
+ primary_candidate: 'WeaponPowerUp',
215
+ recommended_follow_up: 'resource_path_prefix=Reload NEON.Game.Graph.Nodes.Reloads',
216
+ },
217
+ summary: 'WeaponPowerUp flow',
218
+ candidates: [{ name: 'WeaponPowerUp' }],
219
+ resource_hints: [{ target: fakeCase.semantic_tuple.resource_anchor }],
220
+ };
221
+ },
222
+ async context() {
223
+ return {
224
+ symbol: { name: 'WeaponPowerUp' },
225
+ };
226
+ },
227
+ async cypher() {
228
+ return {
229
+ row_count: 2,
230
+ rows: [
231
+ { src: 'HoldPickup', dst: 'PickItUp' },
232
+ { src: 'EquipWithEvent', dst: 'Equip' },
233
+ ],
234
+ };
235
+ },
236
+ };
237
+ const result = await runWorkflowReplay(fakeCase, fakeRunner, { maxSteps: 4, responseProfile: 'slim' });
238
+ assert.equal(result.semantic_tuple_pass, true);
239
+ assert.equal(result.placeholder_leak_detected, true);
240
+ assert.equal(result.live_tool_evidence_pass, true);
241
+ assert.equal(result.freeze_ready, (result.confirmed_chain?.steps.length ?? 0) > 0
242
+ && !result.placeholder_leak_detected
243
+ && Boolean(result.live_tool_evidence_pass)
244
+ && result.guid_invariance_pass);
245
+ assert.equal((result.confirmed_chain?.steps.length ?? 0) > 0, false);
246
+ });
247
+ test('workflow replay surfaces heuristic first-screen drift separately from semantic tuple pass', async () => {
248
+ const fakeRunner = {
249
+ async query() {
250
+ return {
251
+ summary: 'runtime heuristic clue',
252
+ decision: {
253
+ primary_candidate: 'WeaponPowerUp',
254
+ recommended_follow_up: `resource_path_prefix=${fakeCase.semantic_tuple.resource_anchor}`,
255
+ },
256
+ process_hints: [
257
+ {
258
+ summary: 'runtime heuristic clue',
259
+ confidence: 'low',
260
+ evidence_mode: 'resource_heuristic',
261
+ },
262
+ {
263
+ summary: 'Unity-runtime-root -> WeaponPowerUp',
264
+ confidence: 'high',
265
+ evidence_mode: 'direct_step',
266
+ },
267
+ ],
268
+ candidates: [{ name: 'WeaponPowerUp' }],
269
+ resource_hints: [{ target: fakeCase.semantic_tuple.resource_anchor }],
270
+ };
271
+ },
272
+ async context() {
273
+ return {
274
+ symbol: { name: 'WeaponPowerUp' },
275
+ };
276
+ },
277
+ async cypher() {
278
+ return {
279
+ row_count: 2,
280
+ rows: [
281
+ { src: 'HoldPickup', dst: 'PickItUp' },
282
+ { src: 'EquipWithEvent', dst: 'Equip' },
283
+ ],
284
+ };
285
+ },
286
+ };
287
+ const result = await runWorkflowReplay(fakeCase, fakeRunner, { maxSteps: 4, responseProfile: 'slim' });
288
+ assert.equal(result.semantic_tuple_pass, true);
289
+ assert.equal(result.heuristic_top_summary_detected, true);
290
+ });
@@ -0,0 +1,20 @@
1
+ import type { SemanticTuple } from './types.js';
2
+ export type LiveFailureClass = 'semantic_drift' | 'evidence_missing' | 'expression_mismatch' | 'over_investigated';
3
+ export interface LiveTupleScore {
4
+ normalized_tuple: SemanticTuple;
5
+ normalized_tuple_pass: boolean;
6
+ evidence_validation_pass: boolean;
7
+ failure_class?: LiveFailureClass;
8
+ }
9
+ export declare function semanticTuplePass(actual: SemanticTuple, expected: SemanticTuple): boolean;
10
+ export declare function scoreLiveTuple(expected: SemanticTuple, finalResult: {
11
+ resource_anchor?: unknown;
12
+ symbol_anchor?: unknown;
13
+ proof_edge?: unknown;
14
+ proof_edges?: unknown;
15
+ closure_status?: unknown;
16
+ }, outputs: unknown[], options?: {
17
+ toolCalls?: number;
18
+ overInvestigatedThreshold?: number;
19
+ }): LiveTupleScore;
20
+ export declare function deriveSemanticTuple(expected: SemanticTuple, outputs: unknown[]): SemanticTuple;
@@ -0,0 +1,225 @@
1
+ export function semanticTuplePass(actual, expected) {
2
+ return JSON.stringify(normalizeTuple(actual)) === JSON.stringify(normalizeTuple(expected));
3
+ }
4
+ export function scoreLiveTuple(expected, finalResult, outputs, options = {}) {
5
+ const normalizedTuple = normalizeLiveTuple(expected, finalResult);
6
+ const normalizedTuplePass = semanticTuplePass(normalizedTuple, expected);
7
+ const evidenceTuple = deriveSemanticTuple(expected, outputs);
8
+ const evidenceValidationPass = semanticTuplePass(evidenceTuple, expected);
9
+ const failureClass = classifyLiveFailure(expected, normalizedTuple, normalizedTuplePass, evidenceValidationPass, options);
10
+ return {
11
+ normalized_tuple: normalizedTuple,
12
+ normalized_tuple_pass: normalizedTuplePass,
13
+ evidence_validation_pass: evidenceValidationPass,
14
+ failure_class: failureClass,
15
+ };
16
+ }
17
+ export function deriveSemanticTuple(expected, outputs) {
18
+ return {
19
+ resource_anchor: hasExactString(outputs, expected.resource_anchor) ? expected.resource_anchor : '',
20
+ symbol_anchor: hasExactString(outputs, expected.symbol_anchor) ? expected.symbol_anchor : '',
21
+ proof_edge: expected.proof_edge && hasProofEdge(outputs, expected.proof_edge) ? expected.proof_edge : undefined,
22
+ proof_edges: expected.proof_edges?.filter((edge) => hasProofEdge(outputs, edge)),
23
+ closure_status: expected.closure_status,
24
+ };
25
+ }
26
+ function normalizeLiveTuple(expected, finalResult) {
27
+ const normalizedResourceAnchor = normalizeResourceAnchor(expected.resource_anchor, finalResult.resource_anchor);
28
+ const normalizedSymbolAnchor = normalizeSymbolAnchor(expected.symbol_anchor, finalResult.symbol_anchor);
29
+ const reportedEdges = collectReportedEdges(finalResult.proof_edge, finalResult.proof_edges);
30
+ const normalizedProofEdge = expected.proof_edge && containsExpectedEdge(expected.proof_edge, reportedEdges)
31
+ ? expected.proof_edge
32
+ : undefined;
33
+ const normalizedProofEdges = expected.proof_edges?.filter((edge) => containsExpectedEdge(edge, reportedEdges));
34
+ const closureStatus = isClosureStatus(finalResult.closure_status) ? finalResult.closure_status : expected.closure_status;
35
+ return {
36
+ resource_anchor: normalizedResourceAnchor,
37
+ symbol_anchor: normalizedSymbolAnchor,
38
+ proof_edge: normalizedProofEdge,
39
+ proof_edges: normalizedProofEdges,
40
+ closure_status: closureStatus,
41
+ };
42
+ }
43
+ function normalizeTuple(tuple) {
44
+ return {
45
+ resource_anchor: tuple.resource_anchor,
46
+ symbol_anchor: tuple.symbol_anchor,
47
+ proof_edge: tuple.proof_edge,
48
+ proof_edges: tuple.proof_edges ? [...tuple.proof_edges].sort() : undefined,
49
+ closure_status: tuple.closure_status,
50
+ };
51
+ }
52
+ function hasExactString(outputs, expected) {
53
+ return outputs.some((output) => valueContainsString(output, expected));
54
+ }
55
+ function hasProofEdge(outputs, edge) {
56
+ if (hasExactString(outputs, edge)) {
57
+ return true;
58
+ }
59
+ const parsed = parseProofEdge(edge);
60
+ if (!parsed) {
61
+ return false;
62
+ }
63
+ return outputs.some((output) => valueContainsEdge(output, parsed.source, parsed.targetMethod));
64
+ }
65
+ function containsExpectedEdge(expectedEdge, reportedEdges) {
66
+ const parsedExpected = parseProofEdge(expectedEdge);
67
+ if (!parsedExpected) {
68
+ return false;
69
+ }
70
+ return reportedEdges.some((edge) => edge.source === parsedExpected.source && edge.targetMethod === parsedExpected.targetMethod);
71
+ }
72
+ function normalizeResourceAnchor(expectedResource, candidate) {
73
+ if (typeof candidate !== 'string') {
74
+ return '';
75
+ }
76
+ return candidate === expectedResource ? expectedResource : '';
77
+ }
78
+ function normalizeSymbolAnchor(expectedSymbol, candidate) {
79
+ if (typeof candidate !== 'string') {
80
+ return '';
81
+ }
82
+ const normalizedCandidate = candidate.trim();
83
+ if (normalizedCandidate === expectedSymbol || normalizedCandidate.endsWith(`.${expectedSymbol}`)) {
84
+ return expectedSymbol;
85
+ }
86
+ return '';
87
+ }
88
+ function collectReportedEdges(proofEdge, proofEdges) {
89
+ const collected = [];
90
+ collectEdgesRecursive(proofEdge, collected);
91
+ collectEdgesRecursive(proofEdges, collected);
92
+ return collected;
93
+ }
94
+ function collectEdgesRecursive(value, collected) {
95
+ if (!value) {
96
+ return;
97
+ }
98
+ if (typeof value === 'string') {
99
+ const parsed = parseProofEdge(value);
100
+ if (parsed) {
101
+ collected.push(parsed);
102
+ }
103
+ return;
104
+ }
105
+ if (Array.isArray(value)) {
106
+ value.forEach((entry) => collectEdgesRecursive(entry, collected));
107
+ return;
108
+ }
109
+ if (typeof value !== 'object') {
110
+ return;
111
+ }
112
+ const record = value;
113
+ const pairs = [
114
+ [record.src, record.dst],
115
+ [record.source, record.target],
116
+ [record.caller, record.callee],
117
+ [record.from, record.to],
118
+ ];
119
+ for (const [left, right] of pairs) {
120
+ const parsed = parseEdgeFromPair(left, right);
121
+ if (parsed) {
122
+ collected.push(parsed);
123
+ }
124
+ }
125
+ Object.values(record).forEach((entry) => collectEdgesRecursive(entry, collected));
126
+ }
127
+ function parseEdgeFromPair(left, right) {
128
+ const source = extractName(left);
129
+ const target = extractName(right);
130
+ if (!source || !target) {
131
+ return null;
132
+ }
133
+ const targetMethod = target.split('.').pop() || target;
134
+ return { source, targetMethod };
135
+ }
136
+ function extractName(value) {
137
+ if (typeof value === 'string') {
138
+ return value.split('.').pop() || value;
139
+ }
140
+ if (value && typeof value === 'object') {
141
+ const record = value;
142
+ return (extractName(record.name) ||
143
+ extractName(record.id) ||
144
+ extractName(record.uid) ||
145
+ extractName(record.symbol) ||
146
+ null);
147
+ }
148
+ return null;
149
+ }
150
+ function classifyLiveFailure(expected, normalizedTuple, normalizedTuplePass, evidenceValidationPass, options) {
151
+ if (normalizedTuplePass && evidenceValidationPass) {
152
+ return undefined;
153
+ }
154
+ if (normalizedTuplePass && !evidenceValidationPass) {
155
+ return 'evidence_missing';
156
+ }
157
+ const overInvestigatedThreshold = options.overInvestigatedThreshold ?? 6;
158
+ if ((options.toolCalls ?? 0) > overInvestigatedThreshold) {
159
+ return 'over_investigated';
160
+ }
161
+ const anchorDrift = normalizedTuple.resource_anchor !== expected.resource_anchor
162
+ || normalizedTuple.symbol_anchor !== expected.symbol_anchor;
163
+ return anchorDrift ? 'semantic_drift' : 'expression_mismatch';
164
+ }
165
+ function isClosureStatus(value) {
166
+ return value === 'not_verified_full' || value === 'verified_partial' || value === 'verified_full' || value === 'failed';
167
+ }
168
+ function valueContainsString(value, expected) {
169
+ if (typeof value === 'string') {
170
+ return value.includes(expected);
171
+ }
172
+ if (Array.isArray(value)) {
173
+ return value.some((entry) => valueContainsString(entry, expected));
174
+ }
175
+ if (value && typeof value === 'object') {
176
+ return Object.values(value).some((entry) => valueContainsString(entry, expected));
177
+ }
178
+ return false;
179
+ }
180
+ function valueContainsEdge(value, source, targetMethod) {
181
+ if (typeof value === 'string') {
182
+ return (value.includes(`| ${source} | ${targetMethod} |`)
183
+ || value.includes(`${source} -> ${targetMethod}`));
184
+ }
185
+ if (Array.isArray(value)) {
186
+ return value.some((entry) => valueContainsEdge(entry, source, targetMethod));
187
+ }
188
+ if (!value || typeof value !== 'object') {
189
+ return false;
190
+ }
191
+ const record = value;
192
+ const pairs = [
193
+ [record.src, record.dst],
194
+ [record.source, record.target],
195
+ [record.caller, record.callee],
196
+ [record.from, record.to],
197
+ ];
198
+ for (const [left, right] of pairs) {
199
+ if (matchName(left, source) && matchName(right, targetMethod)) {
200
+ return true;
201
+ }
202
+ }
203
+ return Object.values(record).some((entry) => valueContainsEdge(entry, source, targetMethod));
204
+ }
205
+ function matchName(value, expected) {
206
+ if (typeof value === 'string') {
207
+ return value === expected || value.endsWith(`.${expected}`);
208
+ }
209
+ if (value && typeof value === 'object') {
210
+ const record = value;
211
+ return (matchName(record.name, expected) ||
212
+ matchName(record.id, expected) ||
213
+ matchName(record.uid, expected));
214
+ }
215
+ return false;
216
+ }
217
+ function parseProofEdge(edge) {
218
+ const [sourcePart, targetPart] = edge.split('->').map((part) => part.trim());
219
+ if (!sourcePart || !targetPart) {
220
+ return null;
221
+ }
222
+ const source = sourcePart.split('.').pop() || sourcePart;
223
+ const targetMethod = targetPart.split('.').pop() || targetPart;
224
+ return { source, targetMethod };
225
+ }
@@ -0,0 +1,122 @@
1
+ import test from 'node:test';
2
+ import assert from 'node:assert/strict';
3
+ import { scoreLiveTuple, semanticTuplePass } from './semantic-tuple.js';
4
+ test('semanticTuplePass returns true for identical tuples', () => {
5
+ const tuple = {
6
+ resource_anchor: 'Assets/NEON/DataAssets/Powerups/1_newWeapon/0_pick/法器_Orb/1_weapon_orb_key.asset',
7
+ symbol_anchor: 'WeaponPowerUp',
8
+ proof_edges: [
9
+ 'HoldPickup -> WeaponPowerUp.PickItUp',
10
+ 'EquipWithEvent -> WeaponPowerUp.Equip',
11
+ ],
12
+ closure_status: 'not_verified_full',
13
+ };
14
+ assert.equal(semanticTuplePass(tuple, tuple), true);
15
+ });
16
+ test('semanticTuplePass returns false when any tuple field differs', () => {
17
+ const left = {
18
+ resource_anchor: 'Assets/NEON/Graphs/PlayerGun/Gungraph_use/1_weapon_orb_key.asset',
19
+ symbol_anchor: 'ReloadBase',
20
+ proof_edge: 'ReloadBase.GetValue -> ReloadBase.CheckReload',
21
+ closure_status: 'not_verified_full',
22
+ };
23
+ const right = {
24
+ ...left,
25
+ proof_edge: 'ReloadBase.GetValue -> ReloadBase.ReloadRoutine',
26
+ };
27
+ assert.equal(semanticTuplePass(left, right), false);
28
+ });
29
+ test('scoreLiveTuple normalizes fully-qualified symbol identity to canonical anchor', () => {
30
+ const expected = {
31
+ resource_anchor: 'Assets/NEON/Graphs/PlayerGun/Gungraph_use/1_weapon_orb_key.asset',
32
+ symbol_anchor: 'ReloadBase',
33
+ proof_edge: 'ReloadBase.GetValue -> ReloadBase.CheckReload',
34
+ closure_status: 'not_verified_full',
35
+ };
36
+ const score = scoreLiveTuple(expected, {
37
+ resource_anchor: expected.resource_anchor,
38
+ symbol_anchor: 'Game.Runtime.ReloadBase',
39
+ proof_edge: 'ReloadBase.GetValue -> ReloadBase.CheckReload',
40
+ closure_status: 'not_verified_full',
41
+ }, [
42
+ { text: expected.resource_anchor },
43
+ { symbol: 'ReloadBase' },
44
+ { src: 'GetValue', dst: 'CheckReload' },
45
+ ]);
46
+ assert.equal(score.normalized_tuple.symbol_anchor, 'ReloadBase');
47
+ assert.equal(score.normalized_tuple_pass, true);
48
+ assert.equal(score.evidence_validation_pass, true);
49
+ });
50
+ test('scoreLiveTuple normalizes caller/callee objects to canonical proof edges', () => {
51
+ const expected = {
52
+ resource_anchor: 'Assets/NEON/DataAssets/Powerups/1_newWeapon/0_pick/法器_Orb/1_weapon_orb_key.asset',
53
+ symbol_anchor: 'WeaponPowerUp',
54
+ proof_edges: [
55
+ 'HoldPickup -> WeaponPowerUp.PickItUp',
56
+ 'EquipWithEvent -> WeaponPowerUp.Equip',
57
+ ],
58
+ closure_status: 'not_verified_full',
59
+ };
60
+ const score = scoreLiveTuple(expected, {
61
+ resource_anchor: expected.resource_anchor,
62
+ symbol_anchor: expected.symbol_anchor,
63
+ proof_edges: [
64
+ { caller: 'HoldPickup', callee: 'WeaponPowerUp.PickItUp' },
65
+ { caller: 'EquipWithEvent', callee: 'WeaponPowerUp.Equip' },
66
+ ],
67
+ closure_status: 'not_verified_full',
68
+ }, [
69
+ { value: expected.resource_anchor },
70
+ { symbol: 'WeaponPowerUp' },
71
+ { src: 'HoldPickup', dst: 'PickItUp' },
72
+ { src: 'EquipWithEvent', dst: 'Equip' },
73
+ ]);
74
+ assert.equal(score.normalized_tuple_pass, true);
75
+ assert.equal(score.evidence_validation_pass, true);
76
+ });
77
+ test('scoreLiveTuple classifies evidence_missing when normalized tuple passes without telemetry evidence', () => {
78
+ const expected = {
79
+ resource_anchor: 'Assets/NEON/Graphs/PlayerGun/Gungraph_use/1_weapon_orb_key.asset',
80
+ symbol_anchor: 'ReloadBase',
81
+ proof_edge: 'ReloadBase.GetValue -> ReloadBase.CheckReload',
82
+ closure_status: 'not_verified_full',
83
+ };
84
+ const score = scoreLiveTuple(expected, {
85
+ resource_anchor: expected.resource_anchor,
86
+ symbol_anchor: expected.symbol_anchor,
87
+ proof_edge: expected.proof_edge,
88
+ closure_status: 'not_verified_full',
89
+ }, [{ output: 'no reload edge evidence here' }]);
90
+ assert.equal(score.normalized_tuple_pass, true);
91
+ assert.equal(score.evidence_validation_pass, false);
92
+ assert.equal(score.failure_class, 'evidence_missing');
93
+ });
94
+ test('scoreLiveTuple emits semantic_drift, expression_mismatch, and over_investigated failure classes', () => {
95
+ const expected = {
96
+ resource_anchor: 'Assets/NEON/DataAssets/Powerups/1_newWeapon/0_pick/法器_Orb/1_weapon_orb_key.asset',
97
+ symbol_anchor: 'WeaponPowerUp',
98
+ proof_edges: [
99
+ 'HoldPickup -> WeaponPowerUp.PickItUp',
100
+ 'EquipWithEvent -> WeaponPowerUp.Equip',
101
+ ],
102
+ closure_status: 'not_verified_full',
103
+ };
104
+ const semanticDrift = scoreLiveTuple(expected, {
105
+ resource_anchor: expected.resource_anchor,
106
+ symbol_anchor: 'WrongSymbol',
107
+ proof_edges: [{ caller: 'HoldPickup', callee: 'WeaponPowerUp.PickItUp' }],
108
+ }, []);
109
+ assert.equal(semanticDrift.failure_class, 'semantic_drift');
110
+ const expressionMismatch = scoreLiveTuple(expected, {
111
+ resource_anchor: expected.resource_anchor,
112
+ symbol_anchor: expected.symbol_anchor,
113
+ proof_edges: [{ caller: 'HoldPickup', callee: 'WeaponPowerUp.NotEquip' }],
114
+ }, []);
115
+ assert.equal(expressionMismatch.failure_class, 'expression_mismatch');
116
+ const overInvestigated = scoreLiveTuple(expected, {
117
+ resource_anchor: expected.resource_anchor,
118
+ symbol_anchor: expected.symbol_anchor,
119
+ proof_edges: [{ caller: 'HoldPickup', callee: 'WeaponPowerUp.NotEquip' }],
120
+ }, [], { toolCalls: 8, overInvestigatedThreshold: 6 });
121
+ assert.equal(overInvestigated.failure_class, 'over_investigated');
122
+ });