outcome-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/README.md +261 -0
  2. package/package.json +95 -0
  3. package/src/agents/README.md +139 -0
  4. package/src/agents/adapters/anthropic.adapter.ts +166 -0
  5. package/src/agents/adapters/dalle.adapter.ts +145 -0
  6. package/src/agents/adapters/gemini.adapter.ts +134 -0
  7. package/src/agents/adapters/imagen.adapter.ts +106 -0
  8. package/src/agents/adapters/nano-banana.adapter.ts +129 -0
  9. package/src/agents/adapters/openai.adapter.ts +165 -0
  10. package/src/agents/adapters/veo.adapter.ts +130 -0
  11. package/src/agents/agent.schema.property.test.ts +379 -0
  12. package/src/agents/agent.schema.test.ts +148 -0
  13. package/src/agents/agent.schema.ts +263 -0
  14. package/src/agents/index.ts +60 -0
  15. package/src/agents/registered-agent.schema.ts +356 -0
  16. package/src/agents/registry.ts +97 -0
  17. package/src/agents/tournament-configs.property.test.ts +266 -0
  18. package/src/cli/README.md +145 -0
  19. package/src/cli/commands/define.ts +79 -0
  20. package/src/cli/commands/list.ts +46 -0
  21. package/src/cli/commands/logs.ts +83 -0
  22. package/src/cli/commands/run.ts +416 -0
  23. package/src/cli/commands/verify.ts +110 -0
  24. package/src/cli/index.ts +81 -0
  25. package/src/config/README.md +128 -0
  26. package/src/config/env.ts +262 -0
  27. package/src/config/index.ts +19 -0
  28. package/src/eval/README.md +318 -0
  29. package/src/eval/ai-judge.test.ts +435 -0
  30. package/src/eval/ai-judge.ts +368 -0
  31. package/src/eval/code-validators.ts +414 -0
  32. package/src/eval/evaluateOutcome.property.test.ts +1174 -0
  33. package/src/eval/evaluateOutcome.ts +591 -0
  34. package/src/eval/immigration-validators.ts +122 -0
  35. package/src/eval/index.ts +90 -0
  36. package/src/eval/judge-cache.ts +402 -0
  37. package/src/eval/tournament-validators.property.test.ts +439 -0
  38. package/src/eval/validators.property.test.ts +1118 -0
  39. package/src/eval/validators.ts +1199 -0
  40. package/src/eval/weighted-scorer.ts +285 -0
  41. package/src/index.ts +17 -0
  42. package/src/league/README.md +188 -0
  43. package/src/league/health-check.ts +353 -0
  44. package/src/league/index.ts +93 -0
  45. package/src/league/killAgent.ts +151 -0
  46. package/src/league/league.test.ts +1151 -0
  47. package/src/league/runLeague.ts +843 -0
  48. package/src/league/scoreAgent.ts +175 -0
  49. package/src/modules/omnibridge/__tests__/.gitkeep +1 -0
  50. package/src/modules/omnibridge/__tests__/auth-tunnel.property.test.ts +524 -0
  51. package/src/modules/omnibridge/__tests__/deterministic-logger.property.test.ts +965 -0
  52. package/src/modules/omnibridge/__tests__/ghost-api.property.test.ts +461 -0
  53. package/src/modules/omnibridge/__tests__/omnibridge-integration.test.ts +542 -0
  54. package/src/modules/omnibridge/__tests__/parallel-executor.property.test.ts +671 -0
  55. package/src/modules/omnibridge/__tests__/semantic-normalizer.property.test.ts +521 -0
  56. package/src/modules/omnibridge/__tests__/semantic-normalizer.test.ts +254 -0
  57. package/src/modules/omnibridge/__tests__/session-vault.property.test.ts +367 -0
  58. package/src/modules/omnibridge/__tests__/shadow-session.property.test.ts +523 -0
  59. package/src/modules/omnibridge/__tests__/triangulation-engine.property.test.ts +292 -0
  60. package/src/modules/omnibridge/__tests__/verification-engine.property.test.ts +769 -0
  61. package/src/modules/omnibridge/api/.gitkeep +1 -0
  62. package/src/modules/omnibridge/api/ghost-api.ts +1087 -0
  63. package/src/modules/omnibridge/auth/.gitkeep +1 -0
  64. package/src/modules/omnibridge/auth/auth-tunnel.ts +843 -0
  65. package/src/modules/omnibridge/auth/session-vault.ts +577 -0
  66. package/src/modules/omnibridge/core/.gitkeep +1 -0
  67. package/src/modules/omnibridge/core/semantic-normalizer.ts +702 -0
  68. package/src/modules/omnibridge/core/triangulation-engine.ts +530 -0
  69. package/src/modules/omnibridge/core/types.ts +610 -0
  70. package/src/modules/omnibridge/execution/.gitkeep +1 -0
  71. package/src/modules/omnibridge/execution/deterministic-logger.ts +629 -0
  72. package/src/modules/omnibridge/execution/parallel-executor.ts +542 -0
  73. package/src/modules/omnibridge/execution/shadow-session.ts +794 -0
  74. package/src/modules/omnibridge/index.ts +212 -0
  75. package/src/modules/omnibridge/omnibridge.ts +510 -0
  76. package/src/modules/omnibridge/verification/.gitkeep +1 -0
  77. package/src/modules/omnibridge/verification/verification-engine.ts +783 -0
  78. package/src/outcomes/README.md +75 -0
  79. package/src/outcomes/acquire-pilot-customer.ts +297 -0
  80. package/src/outcomes/code-delivery-outcomes.ts +89 -0
  81. package/src/outcomes/code-outcomes.ts +256 -0
  82. package/src/outcomes/code_review_battle.test.ts +135 -0
  83. package/src/outcomes/code_review_battle.ts +135 -0
  84. package/src/outcomes/cold_email_battle.ts +97 -0
  85. package/src/outcomes/content_creation_battle.ts +160 -0
  86. package/src/outcomes/f1_stem_opt_compliance.ts +61 -0
  87. package/src/outcomes/index.ts +107 -0
  88. package/src/outcomes/lead_gen_battle.test.ts +113 -0
  89. package/src/outcomes/lead_gen_battle.ts +99 -0
  90. package/src/outcomes/outcome.schema.property.test.ts +229 -0
  91. package/src/outcomes/outcome.schema.ts +187 -0
  92. package/src/outcomes/qualified_sales_interest.ts +118 -0
  93. package/src/outcomes/swarm_planner.property.test.ts +370 -0
  94. package/src/outcomes/swarm_planner.ts +96 -0
  95. package/src/outcomes/web_extraction.ts +234 -0
  96. package/src/runtime/README.md +220 -0
  97. package/src/runtime/agentRunner.test.ts +341 -0
  98. package/src/runtime/agentRunner.ts +746 -0
  99. package/src/runtime/claudeAdapter.ts +232 -0
  100. package/src/runtime/costTracker.ts +123 -0
  101. package/src/runtime/index.ts +34 -0
  102. package/src/runtime/modelAdapter.property.test.ts +305 -0
  103. package/src/runtime/modelAdapter.ts +144 -0
  104. package/src/runtime/openaiAdapter.ts +235 -0
  105. package/src/utils/README.md +122 -0
  106. package/src/utils/command-runner.ts +134 -0
  107. package/src/utils/cost-guard.ts +379 -0
  108. package/src/utils/errors.test.ts +290 -0
  109. package/src/utils/errors.ts +442 -0
  110. package/src/utils/index.ts +37 -0
  111. package/src/utils/logger.test.ts +361 -0
  112. package/src/utils/logger.ts +419 -0
  113. package/src/utils/output-parsers.ts +216 -0
@@ -0,0 +1,292 @@
1
+ /**
2
+ * Property-Based Tests for Triangulation Engine
3
+ *
4
+ * These tests validate the correctness properties defined in the design document.
5
+ * Each property test runs minimum 100 iterations with randomly generated inputs.
6
+ *
7
+ * Requirements: 2.1, 2.2, 2.4, 2.5, 2.6
8
+ */
9
+
10
+ import { describe, test, expect } from 'vitest';
11
+ import * as fc from 'fast-check';
12
+ import { TriangulationEngine } from '../core/triangulation-engine.js';
13
+ import type {
14
+ IntentDocument,
15
+ IntentElement,
16
+ AnchorSet,
17
+ SpatialAnchor,
18
+ SemanticAnchor,
19
+ FunctionalAnchor,
20
+ } from '../core/types.js';
21
+
22
+ const engine = new TriangulationEngine();
23
+
24
+ // =============================================================================
25
+ // Arbitraries (Test Data Generators)
26
+ // =============================================================================
27
+
28
+ /**
29
+ * Generate a valid Intent Element
30
+ */
31
+ const intentElementArb: fc.Arbitrary<IntentElement> = fc.record({
32
+ intentId: fc.constantFrom(
33
+ 'ACTION_ID:SUBMIT',
34
+ 'ACTION_ID:LOGIN',
35
+ 'ACTION_ID:SIGNUP',
36
+ 'ACTION_ID:SEARCH',
37
+ 'ACTION_ID:CANCEL',
38
+ 'INPUT_ID:EMAIL_INPUT',
39
+ 'INPUT_ID:PASSWORD_INPUT',
40
+ 'INPUT_ID:NAME_INPUT',
41
+ 'INPUT_ID:SEARCH',
42
+ 'NAV_ID:HOME',
43
+ 'NAV_ID:PROFILE',
44
+ 'NAV_ID:SETTINGS',
45
+ 'NAV_ID:NAVIGATION',
46
+ 'DISPLAY_ID:HEADING',
47
+ 'DISPLAY_ID:ARTICLE',
48
+ 'DISPLAY_ID:SECTION'
49
+ ),
50
+ role: fc.constantFrom('action', 'input', 'display', 'navigation') as fc.Arbitrary<'action' | 'input' | 'display' | 'navigation'>,
51
+ label: fc.lorem({ maxCount: 5 }),
52
+ ariaRole: fc.option(fc.constantFrom('button', 'link', 'textbox', 'navigation', 'main', 'banner', 'contentinfo'), { nil: undefined }),
53
+ contextHints: fc.array(fc.lorem({ maxCount: 3 }), { minLength: 0, maxLength: 3 }),
54
+ tagName: fc.constantFrom('button', 'a', 'input', 'div', 'nav', 'header', 'footer', 'main', 'section', 'article'),
55
+ });
56
+
57
+ /**
58
+ * Generate a valid Intent Document with multiple elements
59
+ */
60
+ const intentDocumentArb: fc.Arbitrary<IntentDocument> = fc.record({
61
+ elements: fc.array(intentElementArb, { minLength: 1, maxLength: 20 }),
62
+ forms: fc.constant([]),
63
+ navigation: fc.constant({ primaryLinks: [], breadcrumbs: undefined }),
64
+ buildTimeMs: fc.double({ min: 1, max: 100 }),
65
+ tokenReduction: fc.double({ min: 80, max: 99 }),
66
+ sourceUrl: fc.webUrl(),
67
+ createdAt: fc.integer({ min: 1700000000000, max: 1800000000000 }),
68
+ });
69
+
70
+ /**
71
+ * Generate a Spatial Anchor
72
+ */
73
+ const spatialAnchorArb: fc.Arbitrary<SpatialAnchor> = fc.record({
74
+ region: fc.constantFrom('header', 'main', 'sidebar', 'footer') as fc.Arbitrary<'header' | 'main' | 'sidebar' | 'footer'>,
75
+ relativePosition: fc.record({
76
+ x: fc.double({ min: 0, max: 1 }),
77
+ y: fc.double({ min: 0, max: 1 }),
78
+ }),
79
+ nearbyLandmarks: fc.array(fc.constantFrom(
80
+ 'NAV_ID:HOME',
81
+ 'NAV_ID:NAVIGATION',
82
+ 'ACTION_ID:SEARCH',
83
+ 'ACTION_ID:LOGIN'
84
+ ), { minLength: 0, maxLength: 5 }),
85
+ });
86
+
87
+ /**
88
+ * Generate a Semantic Anchor
89
+ */
90
+ const semanticAnchorArb: fc.Arbitrary<SemanticAnchor> = fc.record({
91
+ intentId: fc.constantFrom(
92
+ 'ACTION_ID:SUBMIT',
93
+ 'ACTION_ID:LOGIN',
94
+ 'INPUT_ID:EMAIL_INPUT',
95
+ 'NAV_ID:HOME'
96
+ ),
97
+ labels: fc.array(fc.lorem({ maxCount: 3 }), { minLength: 1, maxLength: 5 }),
98
+ ariaRoles: fc.array(fc.constantFrom('button', 'link', 'textbox'), { minLength: 0, maxLength: 2 }),
99
+ textContent: fc.lorem({ maxCount: 5 }),
100
+ });
101
+
102
+ /**
103
+ * Generate a Functional Anchor
104
+ */
105
+ const functionalAnchorArb: fc.Arbitrary<FunctionalAnchor> = fc.record({
106
+ eventTypes: fc.array(fc.constantFrom('click', 'input', 'change', 'focus', 'blur', 'submit'), { minLength: 1, maxLength: 4 }),
107
+ formTarget: fc.option(fc.constant('form'), { nil: undefined }),
108
+ navigationTarget: fc.option(fc.constantFrom('NAV_ID:HOME', 'NAV_ID:PROFILE'), { nil: undefined }),
109
+ });
110
+
111
+ /**
112
+ * Generate a complete AnchorSet
113
+ */
114
+ const anchorSetArb: fc.Arbitrary<AnchorSet> = fc.record({
115
+ spatial: spatialAnchorArb,
116
+ semantic: semanticAnchorArb,
117
+ functional: functionalAnchorArb,
118
+ });
119
+
120
+ // =============================================================================
121
+ // Property Tests
122
+ // =============================================================================
123
+
124
+ describe('Triangulation Engine Property Tests', () => {
125
+ /**
126
+ * **Feature: omnibridge, Property 4: Three-Anchor Storage**
127
+ * **Validates: Requirements 2.1**
128
+ *
129
+ * For any element stored by the Triangulation_Engine, the AnchorSet
130
+ * SHALL contain all three anchor types: Spatial, Semantic, and Functional.
131
+ */
132
+ test('Property 4: Three-Anchor Storage - all elements have all three anchor types', () => {
133
+ fc.assert(
134
+ fc.property(intentDocumentArb, (document) => {
135
+ // For each element in the document
136
+ for (let i = 0; i < document.elements.length; i++) {
137
+ const element = document.elements[i];
138
+ const anchors = engine.storeAnchors(element, document, i);
139
+
140
+ // Verify all three anchor types exist
141
+ expect(anchors).toHaveProperty('spatial');
142
+ expect(anchors).toHaveProperty('semantic');
143
+ expect(anchors).toHaveProperty('functional');
144
+
145
+ // Verify Spatial anchor has required properties
146
+ expect(anchors.spatial).toHaveProperty('region');
147
+ expect(anchors.spatial).toHaveProperty('relativePosition');
148
+ expect(anchors.spatial).toHaveProperty('nearbyLandmarks');
149
+ expect(['header', 'main', 'sidebar', 'footer']).toContain(anchors.spatial.region);
150
+ expect(anchors.spatial.relativePosition.x).toBeGreaterThanOrEqual(0);
151
+ expect(anchors.spatial.relativePosition.x).toBeLessThanOrEqual(1);
152
+ expect(anchors.spatial.relativePosition.y).toBeGreaterThanOrEqual(0);
153
+ expect(anchors.spatial.relativePosition.y).toBeLessThanOrEqual(1);
154
+ expect(Array.isArray(anchors.spatial.nearbyLandmarks)).toBe(true);
155
+
156
+ // Verify Semantic anchor has required properties
157
+ expect(anchors.semantic).toHaveProperty('intentId');
158
+ expect(anchors.semantic).toHaveProperty('labels');
159
+ expect(anchors.semantic).toHaveProperty('ariaRoles');
160
+ expect(anchors.semantic).toHaveProperty('textContent');
161
+ expect(typeof anchors.semantic.intentId).toBe('string');
162
+ expect(Array.isArray(anchors.semantic.labels)).toBe(true);
163
+ expect(Array.isArray(anchors.semantic.ariaRoles)).toBe(true);
164
+
165
+ // Verify Functional anchor has required properties
166
+ expect(anchors.functional).toHaveProperty('eventTypes');
167
+ expect(Array.isArray(anchors.functional.eventTypes)).toBe(true);
168
+ }
169
+
170
+ return true;
171
+ }),
172
+ { numRuns: 100 }
173
+ );
174
+ });
175
+
176
+
177
+ /**
178
+ * **Feature: omnibridge, Property 5: Two-of-Three Healing**
179
+ * **Validates: Requirements 2.2**
180
+ *
181
+ * For any UI change where at least 2 of 3 anchors match, the Triangulation_Engine
182
+ * SHALL successfully locate the element and return `found: true`.
183
+ */
184
+ test('Property 5: Two-of-Three Healing - elements found when 2+ anchors match', () => {
185
+ fc.assert(
186
+ fc.property(intentDocumentArb, (document) => {
187
+ // Skip if document has no elements
188
+ if (document.elements.length === 0) return true;
189
+
190
+ // Pick a random element and store its anchors
191
+ const elementIndex = Math.floor(Math.random() * document.elements.length);
192
+ const element = document.elements[elementIndex];
193
+ const anchors = engine.storeAnchors(element, document, elementIndex);
194
+
195
+ // Try to locate the same element in the same document
196
+ // This should always succeed with 3/3 anchors matching
197
+ const result = engine.locate(anchors, document);
198
+
199
+ // When searching for an element in the same document, it should be found
200
+ // with high confidence (all 3 anchors should match)
201
+ expect(result.found).toBe(true);
202
+ expect(result.matchedAnchors).toBeGreaterThanOrEqual(2);
203
+
204
+ return true;
205
+ }),
206
+ { numRuns: 100 }
207
+ );
208
+ });
209
+
210
+ /**
211
+ * **Feature: omnibridge, Property 6: Confidence Threshold Alerting**
212
+ * **Validates: Requirements 2.4, 2.5**
213
+ *
214
+ * For any element identification with confidence below 70%, the result
215
+ * SHALL include `flaggedForReview: true`.
216
+ */
217
+ test('Property 6: Confidence Threshold Alerting - low confidence flagged for review', () => {
218
+ fc.assert(
219
+ fc.property(intentDocumentArb, anchorSetArb, (document, randomAnchors) => {
220
+ // Try to locate using random anchors (likely won't match well)
221
+ const result = engine.locate(randomAnchors, document);
222
+
223
+ // If confidence is below 70%, it should be flagged for review
224
+ if (result.confidence < 0.7) {
225
+ expect(result.flaggedForReview).toBe(true);
226
+ }
227
+
228
+ // If confidence is 70% or above, it should NOT be flagged
229
+ if (result.confidence >= 0.7) {
230
+ expect(result.flaggedForReview).toBe(false);
231
+ }
232
+
233
+ return true;
234
+ }),
235
+ { numRuns: 100 }
236
+ );
237
+ });
238
+
239
+ /**
240
+ * **Feature: omnibridge, Property 7: Schema Drift Detection**
241
+ * **Validates: Requirements 2.6**
242
+ *
243
+ * For any element lookup where all three anchors fail to match, the
244
+ * Triangulation_Engine SHALL return an error with `type: 'schema_drift'`.
245
+ */
246
+ test('Property 7: Schema Drift Detection - returns schema_drift when all anchors fail', () => {
247
+ fc.assert(
248
+ fc.property(intentDocumentArb, (document) => {
249
+ // Create anchors that definitely won't match anything in the document
250
+ const unmatchableAnchors: AnchorSet = {
251
+ spatial: {
252
+ region: 'sidebar', // Unlikely region
253
+ relativePosition: { x: 0.99, y: 0.99 },
254
+ nearbyLandmarks: ['NONEXISTENT_ID:FAKE_ELEMENT', 'NONEXISTENT_ID:ANOTHER_FAKE'],
255
+ },
256
+ semantic: {
257
+ intentId: 'NONEXISTENT_ID:COMPLETELY_FAKE_ELEMENT_THAT_DOES_NOT_EXIST',
258
+ labels: ['xyzzy_nonexistent_label_12345', 'plugh_fake_label_67890'],
259
+ ariaRoles: ['nonexistent_role'],
260
+ textContent: 'xyzzy_plugh_nonexistent_text_content_that_will_never_match',
261
+ },
262
+ functional: {
263
+ eventTypes: ['nonexistent_event_type'],
264
+ formTarget: 'nonexistent_form_target',
265
+ navigationTarget: 'NONEXISTENT_ID:FAKE_NAV_TARGET',
266
+ },
267
+ };
268
+
269
+ const result = engine.locate(unmatchableAnchors, document);
270
+ const schemaDriftError = engine.detectSchemaDrift(unmatchableAnchors, document);
271
+
272
+ // If no anchors matched, we should get a schema_drift error
273
+ if (result.matchedAnchors === 0) {
274
+ expect(schemaDriftError).not.toBeNull();
275
+ if (schemaDriftError && schemaDriftError.type === 'schema_drift') {
276
+ expect(schemaDriftError.type).toBe('schema_drift');
277
+ expect(typeof schemaDriftError.details).toBe('string');
278
+ expect(schemaDriftError.details.length).toBeGreaterThan(0);
279
+ }
280
+ }
281
+
282
+ // If element was found (some anchors matched), no schema drift
283
+ if (result.found) {
284
+ expect(schemaDriftError).toBeNull();
285
+ }
286
+
287
+ return true;
288
+ }),
289
+ { numRuns: 100 }
290
+ );
291
+ });
292
+ });