nexus-agents 2.71.0 → 2.72.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. package/dist/{adaptive-memory-MKSYEBST.js → adaptive-memory-UPE76IP6.js} +5 -5
  2. package/dist/{chunk-DWLATKBK.js → child-mcp-config-5HRJGLCR.js} +6 -4
  3. package/dist/child-mcp-config-5HRJGLCR.js.map +1 -0
  4. package/dist/{chunk-7LHQBMBM.js → chunk-2JMUX5UA.js} +25 -12
  5. package/dist/{chunk-7LHQBMBM.js.map → chunk-2JMUX5UA.js.map} +1 -1
  6. package/dist/{chunk-ZPPX2K57.js → chunk-2KB63QGE.js} +2 -2
  7. package/dist/{chunk-L2LQ3TSV.js → chunk-2MD5MWCK.js} +2 -2
  8. package/dist/{chunk-ANC3HU6F.js → chunk-345KMHWH.js} +6 -6
  9. package/dist/chunk-345KMHWH.js.map +1 -0
  10. package/dist/{chunk-NER7H3RJ.js → chunk-3FIDMWFC.js} +2 -2
  11. package/dist/{chunk-AGVLFRN7.js → chunk-3HR6UJ2E.js} +2269 -7188
  12. package/dist/chunk-3HR6UJ2E.js.map +1 -0
  13. package/dist/{chunk-POQQ7A5E.js → chunk-53K3KEKT.js} +51 -707
  14. package/dist/chunk-53K3KEKT.js.map +1 -0
  15. package/dist/chunk-5MHIWRKB.js +691 -0
  16. package/dist/chunk-5MHIWRKB.js.map +1 -0
  17. package/dist/{chunk-VGZJIR22.js → chunk-5WQ3SRSE.js} +2 -2
  18. package/dist/{chunk-TOYPY5XA.js → chunk-A35XORXU.js} +73 -10
  19. package/dist/chunk-A35XORXU.js.map +1 -0
  20. package/dist/chunk-BVETPIOQ.js +556 -0
  21. package/dist/chunk-BVETPIOQ.js.map +1 -0
  22. package/dist/{chunk-OF7CYMMA.js → chunk-DA5UDQYW.js} +2 -2
  23. package/dist/{chunk-XATH462F.js → chunk-ES6GFP35.js} +186 -34
  24. package/dist/chunk-ES6GFP35.js.map +1 -0
  25. package/dist/chunk-GOT7OAL5.js +59 -0
  26. package/dist/chunk-GOT7OAL5.js.map +1 -0
  27. package/dist/{chunk-LJT65EA7.js → chunk-I7ORMAO7.js} +2 -2
  28. package/dist/{chunk-LMRKHQG5.js → chunk-L6N2S3UB.js} +2 -2
  29. package/dist/{chunk-7OBFO4GF.js → chunk-O4KUCF5S.js} +125 -40
  30. package/dist/chunk-O4KUCF5S.js.map +1 -0
  31. package/dist/chunk-P5OFZWDW.js +303 -0
  32. package/dist/chunk-P5OFZWDW.js.map +1 -0
  33. package/dist/{chunk-MJHOSM5U.js → chunk-QECRZ3YA.js} +2 -2
  34. package/dist/{chunk-WYSHXPKK.js → chunk-QL4HCYRD.js} +4 -44
  35. package/dist/chunk-QL4HCYRD.js.map +1 -0
  36. package/dist/{chunk-E66KFRSJ.js → chunk-TF3GROMO.js} +2 -2
  37. package/dist/{chunk-U3HZQTUF.js → chunk-TQFRPFMG.js} +2 -2
  38. package/dist/{chunk-KJCSRP34.js → chunk-V7ATY4BG.js} +3 -3
  39. package/dist/{chunk-32RIOULO.js → chunk-VPC3YNFR.js} +2 -2
  40. package/dist/{chunk-3BKVYSY6.js → chunk-VTVKC4FS.js} +4 -4
  41. package/dist/{chunk-U6BK5DQU.js → chunk-XHVDKY3X.js} +315 -31
  42. package/dist/chunk-XHVDKY3X.js.map +1 -0
  43. package/dist/cli-circuit-breaker-GFF2RLBZ.js +14 -0
  44. package/dist/cli.d.ts +3 -1
  45. package/dist/cli.js +1038 -1581
  46. package/dist/cli.js.map +1 -1
  47. package/dist/{composite-router-AYVJPIOS.js → composite-router-33F3F74I.js} +4 -4
  48. package/dist/{consensus-vote-EXWACBMR.js → consensus-vote-5V4KVHBE.js} +12 -11
  49. package/dist/doctor-deep-AHDTNURD.js +13 -0
  50. package/dist/expert-bridge-DMDHHDEU.js +11 -0
  51. package/dist/factory-FVD7PZ6S.js +15 -0
  52. package/dist/{factory-KMBWFIX2.js → factory-VQS3HJ7V.js} +6 -6
  53. package/dist/index.d.ts +358 -3357
  54. package/dist/index.js +70 -807
  55. package/dist/index.js.map +1 -1
  56. package/dist/init-opencode-EIOIPVWL.js +158 -0
  57. package/dist/init-opencode-EIOIPVWL.js.map +1 -0
  58. package/dist/issue-triage-HJUJWGAD.js +16 -0
  59. package/dist/{learning-persistence-FILWP3IR.js → learning-persistence-N6ILD2HX.js} +3 -3
  60. package/dist/{mobimem-77W5ED4Z.js → mobimem-BOJFXQ7B.js} +4 -4
  61. package/dist/{nexus-data-dir-M6DYKIHJ.js → nexus-data-dir-77UO7N6J.js} +2 -2
  62. package/dist/{registry-command-BBLIXULQ.js → registry-command-NCWUJKAF.js} +4 -4
  63. package/dist/{repo-security-plan-7SNM7JQN.js → repo-security-plan-3J45VAD6.js} +5 -5
  64. package/dist/research-helpers-synthesize-UGQHZZJN.js +12 -0
  65. package/dist/{routing-memory-DCIZEEVC.js → routing-memory-NO7QEH7T.js} +4 -4
  66. package/dist/{session-memory-5TSAASQW.js → session-memory-DOXLEWEU.js} +5 -5
  67. package/dist/{setup-command-5VGIQETA.js → setup-command-DVEBFKR2.js} +10 -10
  68. package/dist/setup-config-E3JZYSLR.js +11 -0
  69. package/dist/{setup-custom-api-IQX3GD2D.js → setup-custom-api-DHJ5DRH2.js} +6 -6
  70. package/dist/{weather-report-NETGWTJX.js → weather-report-FNN4OX3N.js} +4 -4
  71. package/package.json +1 -1
  72. package/dist/chunk-7OBFO4GF.js.map +0 -1
  73. package/dist/chunk-AGVLFRN7.js.map +0 -1
  74. package/dist/chunk-ANC3HU6F.js.map +0 -1
  75. package/dist/chunk-DWLATKBK.js.map +0 -1
  76. package/dist/chunk-FDNWRZNJ.js +0 -22
  77. package/dist/chunk-FDNWRZNJ.js.map +0 -1
  78. package/dist/chunk-POQQ7A5E.js.map +0 -1
  79. package/dist/chunk-TOYPY5XA.js.map +0 -1
  80. package/dist/chunk-U6BK5DQU.js.map +0 -1
  81. package/dist/chunk-WYSHXPKK.js.map +0 -1
  82. package/dist/chunk-XATH462F.js.map +0 -1
  83. package/dist/cli-circuit-breaker-2CJ6NV52.js +0 -14
  84. package/dist/doctor-deep-BJFDBGPO.js +0 -13
  85. package/dist/expert-bridge-75WNNWI4.js +0 -11
  86. package/dist/factory-H5BYL4V5.js +0 -15
  87. package/dist/issue-triage-4SEP4WID.js +0 -16
  88. package/dist/mcp-config-OCWIXE2Y.js +0 -13
  89. package/dist/research-helpers-synthesize-7CI2FJE5.js +0 -12
  90. package/dist/setup-config-EA5RDIO2.js +0 -11
  91. package/dist/weather-report-NETGWTJX.js.map +0 -1
  92. /package/dist/{adaptive-memory-MKSYEBST.js.map → adaptive-memory-UPE76IP6.js.map} +0 -0
  93. /package/dist/{chunk-ZPPX2K57.js.map → chunk-2KB63QGE.js.map} +0 -0
  94. /package/dist/{chunk-L2LQ3TSV.js.map → chunk-2MD5MWCK.js.map} +0 -0
  95. /package/dist/{chunk-NER7H3RJ.js.map → chunk-3FIDMWFC.js.map} +0 -0
  96. /package/dist/{chunk-VGZJIR22.js.map → chunk-5WQ3SRSE.js.map} +0 -0
  97. /package/dist/{chunk-OF7CYMMA.js.map → chunk-DA5UDQYW.js.map} +0 -0
  98. /package/dist/{chunk-LJT65EA7.js.map → chunk-I7ORMAO7.js.map} +0 -0
  99. /package/dist/{chunk-LMRKHQG5.js.map → chunk-L6N2S3UB.js.map} +0 -0
  100. /package/dist/{chunk-MJHOSM5U.js.map → chunk-QECRZ3YA.js.map} +0 -0
  101. /package/dist/{chunk-E66KFRSJ.js.map → chunk-TF3GROMO.js.map} +0 -0
  102. /package/dist/{chunk-U3HZQTUF.js.map → chunk-TQFRPFMG.js.map} +0 -0
  103. /package/dist/{chunk-KJCSRP34.js.map → chunk-V7ATY4BG.js.map} +0 -0
  104. /package/dist/{chunk-32RIOULO.js.map → chunk-VPC3YNFR.js.map} +0 -0
  105. /package/dist/{chunk-3BKVYSY6.js.map → chunk-VTVKC4FS.js.map} +0 -0
  106. /package/dist/{cli-circuit-breaker-2CJ6NV52.js.map → cli-circuit-breaker-GFF2RLBZ.js.map} +0 -0
  107. /package/dist/{composite-router-AYVJPIOS.js.map → composite-router-33F3F74I.js.map} +0 -0
  108. /package/dist/{consensus-vote-EXWACBMR.js.map → consensus-vote-5V4KVHBE.js.map} +0 -0
  109. /package/dist/{doctor-deep-BJFDBGPO.js.map → doctor-deep-AHDTNURD.js.map} +0 -0
  110. /package/dist/{expert-bridge-75WNNWI4.js.map → expert-bridge-DMDHHDEU.js.map} +0 -0
  111. /package/dist/{factory-H5BYL4V5.js.map → factory-FVD7PZ6S.js.map} +0 -0
  112. /package/dist/{factory-KMBWFIX2.js.map → factory-VQS3HJ7V.js.map} +0 -0
  113. /package/dist/{issue-triage-4SEP4WID.js.map → issue-triage-HJUJWGAD.js.map} +0 -0
  114. /package/dist/{learning-persistence-FILWP3IR.js.map → learning-persistence-N6ILD2HX.js.map} +0 -0
  115. /package/dist/{mcp-config-OCWIXE2Y.js.map → mobimem-BOJFXQ7B.js.map} +0 -0
  116. /package/dist/{mobimem-77W5ED4Z.js.map → nexus-data-dir-77UO7N6J.js.map} +0 -0
  117. /package/dist/{registry-command-BBLIXULQ.js.map → registry-command-NCWUJKAF.js.map} +0 -0
  118. /package/dist/{nexus-data-dir-M6DYKIHJ.js.map → repo-security-plan-3J45VAD6.js.map} +0 -0
  119. /package/dist/{repo-security-plan-7SNM7JQN.js.map → research-helpers-synthesize-UGQHZZJN.js.map} +0 -0
  120. /package/dist/{research-helpers-synthesize-7CI2FJE5.js.map → routing-memory-NO7QEH7T.js.map} +0 -0
  121. /package/dist/{routing-memory-DCIZEEVC.js.map → session-memory-DOXLEWEU.js.map} +0 -0
  122. /package/dist/{session-memory-5TSAASQW.js.map → setup-command-DVEBFKR2.js.map} +0 -0
  123. /package/dist/{setup-command-5VGIQETA.js.map → setup-config-E3JZYSLR.js.map} +0 -0
  124. /package/dist/{setup-custom-api-IQX3GD2D.js.map → setup-custom-api-DHJ5DRH2.js.map} +0 -0
  125. /package/dist/{setup-config-EA5RDIO2.js.map → weather-report-FNN4OX3N.js.map} +0 -0
package/dist/index.d.ts CHANGED
@@ -2,7 +2,6 @@ import { ZodError, z, ZodType, ZodSafeParseResult } from 'zod';
2
2
  import { C as CliNameLiteral, M as ModelId } from './model-capabilities-types-B57GZryc.js';
3
3
  import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
4
4
  import { Transport } from '@modelcontextprotocol/sdk/shared/transport.js';
5
- import { ChildProcess } from 'node:child_process';
6
5
 
7
6
  /**
8
7
  * nexus-agents - Version constant
@@ -487,7 +486,7 @@ interface AgentResponse {
487
486
  /**
488
487
  * Context provided during agent initialization.
489
488
  */
490
- interface AgentContext$1 {
489
+ interface AgentContext {
491
490
  /** Agent configuration */
492
491
  config: AgentConfig;
493
492
  /** Available tools */
@@ -537,7 +536,7 @@ interface IAgent {
537
536
  * @param ctx - Agent context
538
537
  * @returns Result with void or AgentError
539
538
  */
540
- initialize(ctx: AgentContext$1): Promise<Result<void, AgentError>>;
539
+ initialize(ctx: AgentContext): Promise<Result<void, AgentError>>;
541
540
  /**
542
541
  * Cleanup agent resources.
543
542
  */
@@ -3201,6 +3200,46 @@ interface IModelAdapter {
3201
3200
  * @returns Ok if valid, ConfigError if invalid
3202
3201
  */
3203
3202
  validateConfig(): Result<void, ConfigError>;
3203
+ /**
3204
+ * (Optional, #2529) List models served by this adapter's endpoint.
3205
+ *
3206
+ * Implemented by adapters facing OpenAI-compatible endpoints (the
3207
+ * upstream OpenAI API, OpenRouter, vLLM, custom gateways, etc.) —
3208
+ * usually wraps `GET /v1/models`. Result is the harness-side identity
3209
+ * resolver's most-trusted signal for "what model is actually being
3210
+ * served behind this adapter."
3211
+ *
3212
+ * Subprocess-CLI adapters (claude / codex / gemini / opencode) leave
3213
+ * this undefined; identity for those falls back to `modelId` parse.
3214
+ *
3215
+ * Implementations should cache the result for ~5 minutes — operators
3216
+ * shouldn't pay round-trip latency on every resolve. Failures
3217
+ * (network error, endpoint unsupported, auth missing) should throw
3218
+ * so the caller can fall back; do NOT silently return an empty list.
3219
+ */
3220
+ listModels?(): Promise<readonly ModelMetadata[]>;
3221
+ }
3222
+ /**
3223
+ * Metadata for one model served by an OpenAI-compatible endpoint
3224
+ * (#2529). Mirrors the shape of `GET /v1/models`. Most fields are
3225
+ * optional because gateways differ in what they expose.
3226
+ */
3227
+ interface ModelMetadata {
3228
+ /** Stable model id — matches what callers pass as `modelId` to `complete`. */
3229
+ readonly id: string;
3230
+ /** Free-form vendor / org tag. Upstream OpenAI: `openai`/`system`. OpenRouter: `anthropic`/`google`/etc. */
3231
+ readonly ownedBy?: string;
3232
+ /** Unix epoch seconds when the model was created (when the gateway reports it). */
3233
+ readonly createdAt?: number;
3234
+ /** Free-form capability strings the gateway exposes — passthrough, no normalisation. */
3235
+ readonly capabilities?: readonly string[];
3236
+ /** Maximum context window in tokens — populated by gateways that report it (OpenRouter does). */
3237
+ readonly contextLength?: number;
3238
+ /** Pricing — passthrough only. Gateway-defined units. */
3239
+ readonly pricing?: {
3240
+ readonly input?: number;
3241
+ readonly output?: number;
3242
+ };
3204
3243
  }
3205
3244
 
3206
3245
  /**
@@ -5815,6 +5854,22 @@ declare class OpenAIAdapter extends BaseAdapter {
5815
5854
  * Creates an empty response when no choices are returned.
5816
5855
  */
5817
5856
  private createEmptyResponse;
5857
+ /**
5858
+ * (#2529) List models served by this OpenAI-compatible endpoint.
5859
+ *
5860
+ * Wraps `GET /v1/models`. Result is cached for `LIST_MODELS_TTL_MS`
5861
+ * so identity resolution doesn't round-trip on every adapter.
5862
+ * Concurrent callers share the in-flight promise.
5863
+ *
5864
+ * Throws on non-2xx so the harness-side identity resolver knows to
5865
+ * fall back to modelId parsing — silent empty-list returns would be
5866
+ * indistinguishable from "this gateway has no models", which a
5867
+ * misconfigured endpoint shouldn't be allowed to claim.
5868
+ */
5869
+ listModels(): Promise<readonly ModelMetadata[]>;
5870
+ private modelsCache;
5871
+ private modelsInFlight;
5872
+ private fetchModels;
5818
5873
  }
5819
5874
  /**
5820
5875
  * Creates an OpenAIAdapter with the specified configuration.
@@ -6190,7 +6245,7 @@ type CircuitState = 'closed' | 'open' | 'half-open';
6190
6245
  /**
6191
6246
  * Categories of failures for circuit breaker decisions.
6192
6247
  */
6193
- type FailureCategory$1 = 'timeout' | 'crash' | 'authentication' | 'rate_limit' | 'connection' | 'unknown';
6248
+ type FailureCategory = 'timeout' | 'crash' | 'authentication' | 'rate_limit' | 'connection' | 'unknown';
6194
6249
  /**
6195
6250
  * Configuration options for circuit breaker.
6196
6251
  */
@@ -6273,7 +6328,7 @@ interface ICircuitBreaker {
6273
6328
  /**
6274
6329
  * Records a failure manually (for external failure detection).
6275
6330
  */
6276
- recordFailure(category: FailureCategory$1): void;
6331
+ recordFailure(category: FailureCategory): void;
6277
6332
  /**
6278
6333
  * Records a success manually (for external success detection).
6279
6334
  */
@@ -6295,12 +6350,12 @@ declare class CircuitError extends NexusError {
6295
6350
  readonly circuitErrorCode: CircuitErrorCode;
6296
6351
  readonly cliName: CliName;
6297
6352
  readonly circuitState: CircuitState;
6298
- readonly failureCategory?: FailureCategory$1;
6353
+ readonly failureCategory?: FailureCategory;
6299
6354
  constructor(message: string, options: {
6300
6355
  circuitErrorCode: CircuitErrorCode;
6301
6356
  cliName: CliName;
6302
6357
  circuitState: CircuitState;
6303
- failureCategory?: FailureCategory$1;
6358
+ failureCategory?: FailureCategory;
6304
6359
  cause?: Error;
6305
6360
  });
6306
6361
  }
@@ -8564,7 +8619,7 @@ declare abstract class BaseAgent implements IAgent {
8564
8619
  get state(): AgentState$2;
8565
8620
  /** Builds the context state object for helper functions. */
8566
8621
  private get contextState();
8567
- initialize(ctx: AgentContext$1): Promise<Result<void, AgentError>>;
8622
+ initialize(ctx: AgentContext): Promise<Result<void, AgentError>>;
8568
8623
  execute(task: Task$1): Promise<Result<TaskResult, AgentError>>;
8569
8624
  handleMessage(msg: AgentMessage): Promise<Result<AgentResponse, AgentError>>;
8570
8625
  cleanup(): Promise<void>;
@@ -13434,9 +13489,9 @@ type CrossTreeStrategy = 'none' | 'conclusions' | 'insights' | 'full';
13434
13489
  */
13435
13490
  declare const CrossTreeStrategySchema: z.ZodEnum<{
13436
13491
  none: "none";
13492
+ insights: "insights";
13437
13493
  full: "full";
13438
13494
  conclusions: "conclusions";
13439
- insights: "insights";
13440
13495
  }>;
13441
13496
  /**
13442
13497
  * Strategy for pruning low-quality branches.
@@ -13521,9 +13576,9 @@ declare const ForestConfigSchema: z.ZodObject<{
13521
13576
  explorationConstant: z.ZodDefault<z.ZodNumber>;
13522
13577
  crossTreeStrategy: z.ZodDefault<z.ZodEnum<{
13523
13578
  none: "none";
13579
+ insights: "insights";
13524
13580
  full: "full";
13525
13581
  conclusions: "conclusions";
13526
- insights: "insights";
13527
13582
  }>>;
13528
13583
  pruningStrategy: z.ZodDefault<z.ZodEnum<{
13529
13584
  score: "score";
@@ -13996,7 +14051,7 @@ declare const SharedInsightSchema: z.ZodObject<{
13996
14051
  /**
13997
14052
  * A pattern that has been identified as ineffective.
13998
14053
  */
13999
- interface FailurePattern$1 {
14054
+ interface FailurePattern {
14000
14055
  /** Pattern description */
14001
14056
  readonly pattern: string;
14002
14057
  /** Number of times this pattern failed */
@@ -14021,7 +14076,7 @@ interface CrossTreeInfo {
14021
14076
  /** Useful intermediate results from other trees */
14022
14077
  readonly sharedInsights: readonly SharedInsight[];
14023
14078
  /** Patterns that have been proven ineffective */
14024
- readonly failurePatterns: readonly FailurePattern$1[];
14079
+ readonly failurePatterns: readonly FailurePattern[];
14025
14080
  }
14026
14081
  /**
14027
14082
  * Schema for CrossTreeInfo validation.
@@ -20004,6 +20059,39 @@ declare function registerDelegateToModelTool(server: McpServer, deps: DelegateDe
20004
20059
  * (Source: Issue #833 — Orchestrator checkpointing)
20005
20060
  */
20006
20061
 
20062
+ /**
20063
+ * Captures a paused-execution context when a node returns an Interrupt.
20064
+ * Persisted alongside the checkpoint so the resume() caller can read the
20065
+ * value the node surfaced and supply a matching `{[id]: resumeValue}` map.
20066
+ */
20067
+ interface CheckpointInterrupt {
20068
+ /** Node that returned the interrupt — re-runnable as the first step on resume. */
20069
+ readonly nodeId: string;
20070
+ /** Stable interrupt id from the Interrupt envelope. */
20071
+ readonly interruptId: string;
20072
+ /** Value the node surfaced for the human. */
20073
+ readonly value: unknown;
20074
+ /** ISO timestamp when the interrupt fired. */
20075
+ readonly createdAt: string;
20076
+ /**
20077
+ * ISO timestamp when this interrupt was consumed by a successful
20078
+ * resumeFromCheckpoint() call. A second resume against the same checkpoint
20079
+ * is rejected — see #2425 idempotency requirement.
20080
+ */
20081
+ readonly consumedAt?: string;
20082
+ /**
20083
+ * Additional interrupts dropped because they fired in the same super-step
20084
+ * as the primary one (#2425 multi-interrupt observability). Phase 1
20085
+ * silently dropped these; Phase 2 surfaces them so operators can detect
20086
+ * lost human-input requests in the wild. The executor still only honors
20087
+ * the primary interrupt; downstream tooling can fan out from this list.
20088
+ */
20089
+ readonly additionalInterrupts?: readonly {
20090
+ readonly nodeId: string;
20091
+ readonly interruptId: string;
20092
+ readonly value: unknown;
20093
+ }[];
20094
+ }
20007
20095
  /** Schema version for forward compatibility. */
20008
20096
  declare const CHECKPOINT_SCHEMA_VERSION = 1;
20009
20097
  /**
@@ -20029,6 +20117,12 @@ interface Checkpoint {
20029
20117
  readonly createdAt: string;
20030
20118
  /** Optional metadata for debugging. */
20031
20119
  readonly metadata?: Record<string, unknown> | undefined;
20120
+ /**
20121
+ * If present, the checkpoint was created because a node returned an
20122
+ * Interrupt. The resume API uses this to know which node to re-run and
20123
+ * which interrupt id to match resume values against. (#1895)
20124
+ */
20125
+ readonly interrupt?: CheckpointInterrupt | undefined;
20032
20126
  }
20033
20127
  /**
20034
20128
  * Summary of a checkpoint (for listing without full state).
@@ -20135,10 +20229,61 @@ type StateSchema = Record<string, StateFieldSchema>;
20135
20229
  */
20136
20230
  type GraphState = Record<string, unknown>;
20137
20231
  /**
20138
- * Handler function for a graph node. Receives current state,
20139
- * returns partial state updates.
20232
+ * Marks a deliberate pause in graph execution. Returned (or thrown) by a node
20233
+ * to halt the super-step loop and surface `value` to a human. Resumption is
20234
+ * keyed by `id`: the caller provides `{[id]: resumeValue}` to
20235
+ * `resumeFromCheckpoint(...)`, and the value is delivered to the same node via
20236
+ * its NodeContext on the next run.
20237
+ *
20238
+ * Modeled on langchain-ai/langgraph's Interrupt primitive (#1895).
20239
+ */
20240
+ interface Interrupt {
20241
+ readonly type: 'interrupt';
20242
+ /** Context shown to the human / written to the checkpoint metadata. */
20243
+ readonly value: unknown;
20244
+ /** Stable identifier — matched by the resume() call to inject the value. */
20245
+ readonly id: string;
20246
+ }
20247
+ /**
20248
+ * Re-entry primitive returned by a NodeHandler. Combines state mutation
20249
+ * (`update`) with optional dynamic redirection (`goto`, Phase 2 — not yet
20250
+ * wired) into a single typed envelope.
20251
+ *
20252
+ * In Phase 1 of #1895, only `update` is honored by the executor. `goto` is
20253
+ * accepted in the type to avoid a breaking change when it lands.
20254
+ */
20255
+ interface Command {
20256
+ readonly type: 'command';
20257
+ /** State mutations to merge via the standard reducer pipeline. */
20258
+ readonly update?: Partial<GraphState>;
20259
+ /** Phase 2 — node ID to redirect to. Currently ignored by the executor. */
20260
+ readonly goto?: string;
20261
+ }
20262
+ /**
20263
+ * Per-execution context passed to NodeHandler. Currently just delivers values
20264
+ * provided to `resumeFromCheckpoint(...)` — the node sees `{interrupt_id:
20265
+ * resumed_value}` on the run that follows the resume call.
20266
+ */
20267
+ interface NodeContext {
20268
+ /**
20269
+ * Values supplied to the most recent resume() call, keyed by interrupt id.
20270
+ * Empty object when not resuming. Frozen.
20271
+ */
20272
+ readonly resumeValues: Readonly<Record<string, unknown>>;
20273
+ }
20274
+ /** Allowed return shapes for a NodeHandler. */
20275
+ type NodeReturn = Partial<GraphState> | Interrupt | Command;
20276
+ /**
20277
+ * Handler function for a graph node. Receives current state and an optional
20278
+ * per-run context, returns either:
20279
+ * - `Partial<GraphState>` (legacy, common case) — merged via reducers
20280
+ * - `Command` — `update` portion is merged via reducers
20281
+ * - `Interrupt` — pauses the graph; emits checkpoint with interrupt metadata
20282
+ *
20283
+ * The `ctx` parameter is optional — pre-#1895 handlers that take only `state`
20284
+ * remain valid (additive widening).
20140
20285
  */
20141
- type NodeHandler$1 = (state: Readonly<GraphState>) => Promise<Partial<GraphState>>;
20286
+ type NodeHandler$1 = (state: Readonly<GraphState>, ctx?: NodeContext) => Promise<NodeReturn>;
20142
20287
  /**
20143
20288
  * A node in the workflow graph.
20144
20289
  */
@@ -20188,8 +20333,17 @@ interface NodeResult {
20188
20333
  readonly nodeId: string;
20189
20334
  readonly stateUpdates: Partial<GraphState>;
20190
20335
  readonly durationMs: number;
20191
- readonly status: 'success' | 'failed' | 'skipped';
20336
+ readonly status: 'success' | 'failed' | 'skipped' | 'interrupted';
20192
20337
  readonly error?: string;
20338
+ /** Set when the node returned an Interrupt envelope (#1895). */
20339
+ readonly interrupt?: Interrupt;
20340
+ /**
20341
+ * Set when the node returned a Command with `goto`. The executor uses this
20342
+ * to redirect the next runnable set instead of resolving outgoing edges.
20343
+ * Validated against the compiled graph; unknown targets are logged + ignored.
20344
+ * (#2425)
20345
+ */
20346
+ readonly gotoTarget?: string;
20193
20347
  }
20194
20348
  /**
20195
20349
  * Result of a full graph execution.
@@ -20199,6 +20353,17 @@ interface GraphExecutionResult {
20199
20353
  readonly nodeResults: readonly NodeResult[];
20200
20354
  readonly totalDurationMs: number;
20201
20355
  readonly stepsExecuted: number;
20356
+ /**
20357
+ * Set when execution paused on an Interrupt return. The checkpoint
20358
+ * referenced here can be passed to `resumeFromCheckpoint(...)` along with a
20359
+ * matching `{[interruptId]: resumeValue}` map. (#1895)
20360
+ */
20361
+ readonly halted?: {
20362
+ readonly checkpointId: string;
20363
+ readonly nodeId: string;
20364
+ readonly interruptId: string;
20365
+ readonly value: unknown;
20366
+ };
20202
20367
  }
20203
20368
  /**
20204
20369
  * Options for graph execution.
@@ -20214,6 +20379,12 @@ interface GraphExecuteOptions {
20214
20379
  readonly executionId?: string;
20215
20380
  /** Event listener for streaming observation (Issue #838). */
20216
20381
  readonly onEvent?: (event: GraphEvent) => void;
20382
+ /**
20383
+ * Values supplied for HITL resume. Keyed by Interrupt id; passed to each
20384
+ * NodeHandler via its NodeContext on this run only. Empty when not
20385
+ * resuming. (#1895)
20386
+ */
20387
+ readonly resumeValues?: Readonly<Record<string, unknown>>;
20217
20388
  }
20218
20389
  /** Discriminated union of graph lifecycle events for streaming observation. */
20219
20390
  type GraphEvent = {
@@ -20535,6 +20706,8 @@ declare function createCheckpoint(opts: {
20535
20706
  pendingNodeIds: readonly string[];
20536
20707
  completedResults: readonly NodeResult[];
20537
20708
  metadata?: Record<string, unknown>;
20709
+ /** Set when persisting an interrupt-flavored checkpoint (#1895). */
20710
+ interrupt?: CheckpointInterrupt;
20538
20711
  }): Checkpoint;
20539
20712
  /**
20540
20713
  * Creates a new InMemoryCheckpointStore.
@@ -22382,102 +22555,6 @@ type ExecuteSpecDeps = BaseMcpToolDeps;
22382
22555
  /** Registers the execute_spec tool with an MCP server. @category MCP */
22383
22556
  declare function registerExecuteSpecTool(server: McpServer, deps: ExecuteSpecDeps): void;
22384
22557
 
22385
- /**
22386
- * Session Memory Types and Schemas
22387
- *
22388
- * Type definitions for cross-session episodic memory persistence.
22389
- *
22390
- * @module context/session-memory-types
22391
- * (Source: Issue #130, arXiv:2303.11366 - Reflexion)
22392
- */
22393
-
22394
- /**
22395
- * A learning captured during a session.
22396
- */
22397
- interface SessionLearning {
22398
- /** The pattern or technique learned */
22399
- readonly pattern: string;
22400
- /** Context where this learning applies */
22401
- readonly context: string;
22402
- /** Confidence in this learning (0-1) */
22403
- readonly confidence: number;
22404
- /** Optional source (e.g., task, error, user feedback) */
22405
- readonly source?: string;
22406
- }
22407
- /**
22408
- * A task completed during a session.
22409
- */
22410
- interface CompletedTask {
22411
- /** Issue or task identifier */
22412
- readonly issue?: string | number;
22413
- /** Approach used to complete the task */
22414
- readonly approach: string;
22415
- /** Challenges encountered */
22416
- readonly challenges: readonly string[];
22417
- /** Duration in milliseconds */
22418
- readonly durationMs?: number;
22419
- }
22420
- /**
22421
- * An error resolved during a session.
22422
- */
22423
- interface ResolvedError {
22424
- /** Error message or type */
22425
- readonly error: string;
22426
- /** Solution applied */
22427
- readonly solution: string;
22428
- /** File pattern where this applies */
22429
- readonly filePattern?: string;
22430
- }
22431
- /**
22432
- * Complete session episode data.
22433
- */
22434
- interface SessionEpisode {
22435
- /** Unique session identifier */
22436
- readonly sessionId: string;
22437
- /** Session date (ISO format) */
22438
- readonly date: string;
22439
- /** Session duration in milliseconds */
22440
- readonly durationMs: number;
22441
- /** Brief summary of the session */
22442
- readonly summary: string;
22443
- /** Learnings captured */
22444
- readonly learnings: readonly SessionLearning[];
22445
- /** Tasks completed */
22446
- readonly tasksCompleted: readonly CompletedTask[];
22447
- /** Errors resolved */
22448
- readonly errorsResolved: readonly ResolvedError[];
22449
- }
22450
- /**
22451
- * Error for session memory operations.
22452
- */
22453
- declare class SessionMemoryError extends Error {
22454
- readonly context?: Record<string, unknown> | undefined;
22455
- constructor(message: string, context?: Record<string, unknown> | undefined);
22456
- }
22457
- /**
22458
- * Configuration for SessionMemory.
22459
- */
22460
- interface SessionMemoryConfig {
22461
- /** Base directory for memory storage */
22462
- readonly memoryDir: string;
22463
- /** Maximum episodes to load on session start */
22464
- readonly maxEpisodesToLoad?: number;
22465
- /** Maximum learnings to include in context */
22466
- readonly maxLearningsInContext?: number;
22467
- /** Minimum confidence threshold for learnings */
22468
- readonly minConfidenceThreshold?: number;
22469
- /** Maximum learnings per session (FIFO eviction). */
22470
- readonly maxLearningsPerSession?: number;
22471
- /** Maximum tasks per session (FIFO eviction). */
22472
- readonly maxTasksPerSession?: number;
22473
- /** Maximum errors per session (FIFO eviction). */
22474
- readonly maxErrorsPerSession?: number;
22475
- /** Maximum episode files to retain on disk. Oldest are deleted. */
22476
- readonly maxEpisodeFiles?: number;
22477
- /** Logger instance */
22478
- readonly logger?: ILogger;
22479
- }
22480
-
22481
22558
  /**
22482
22559
  * nexus-agents/mcp - Memory Query Tool
22483
22560
  *
@@ -23900,7 +23977,7 @@ declare function isRetryableError(code: CliErrorCode): boolean;
23900
23977
  * Categorizes a CLI error for circuit breaker tracking.
23901
23978
  * Returns a FailureCategory compatible with the circuit breaker.
23902
23979
  */
23903
- declare function categorizeError(error: CliError): FailureCategory$1;
23980
+ declare function categorizeError(error: CliError): FailureCategory;
23904
23981
  /**
23905
23982
  * Executes a CLI operation with retry logic and optional circuit breaker.
23906
23983
  *
@@ -23974,12 +24051,37 @@ declare abstract class SubprocessCliAdapter extends BaseCliAdapter {
23974
24051
  * Sets up child process event handlers for output collection and error handling.
23975
24052
  */
23976
24053
  private setupChildProcessHandlers;
24054
+ /** Attach stdout/stderr data handlers + capture first-byte time (#2472). */
24055
+ private attachStdoutHandlers;
24056
+ /**
24057
+ * Log spawn-latency vs streaming breakdown at info level (#2472). Emits
24058
+ * one structured event per subprocess invocation, queryable via the
24059
+ * existing trace JSONL infrastructure. The breakdown lets operators
24060
+ * identify whether a slow run was caused by:
24061
+ * - High spawn-latency: model gateway took its time before producing
24062
+ * the first token (cold-start, queueing, network jitter).
24063
+ * - High streaming-time: response body was large or generation slow.
24064
+ * - Total approaches the timeout cap with no first-byte: hung process.
24065
+ *
24066
+ * Structured fields chosen so existing query_trace tooling can group by
24067
+ * cli + provider + model and surface tail-latency outliers.
24068
+ */
24069
+ private logTimingBreakdown;
23977
24070
  /** Classify a subprocess close event into a Result. */
23978
24071
  private classifyCloseResult;
23979
24072
  /**
23980
24073
  * Handles successful subprocess output.
23981
24074
  */
23982
24075
  private handleSubprocessOutput;
24076
+ /**
24077
+ * Handles the parse-failure branch: when the CLI's structured response
24078
+ * parser returned null. Order of recovery attempts (most-specific first):
24079
+ * 1. Rate-limit text in raw stdout (#1320)
24080
+ * 2. Structured CLI error envelope (#2440)
24081
+ * 3. Plaintext fallback for natural-language output (#1401)
24082
+ * 4. Generic PARSE_ERROR with truncated snippet
24083
+ */
24084
+ private handleUnparseableOutput;
23983
24085
  /**
23984
24086
  * Handles subprocess execution errors.
23985
24087
  */
@@ -24538,7 +24640,7 @@ declare function createAllAdapters(logger?: ILogger, codexTransport?: CliTranspo
24538
24640
  * @param cache - Optional cache to use
24539
24641
  * @returns True if CLI is healthy
24540
24642
  */
24541
- declare function isCliAvailable$1(cli: CliName, cache?: ICliDetectionCache): Promise<boolean>;
24643
+ declare function isCliAvailable(cli: CliName, cache?: ICliDetectionCache): Promise<boolean>;
24542
24644
  /**
24543
24645
  * Gets all available CLIs by running health checks.
24544
24646
  * Uses cache if provided to avoid repeated subprocess calls.
@@ -24831,53 +24933,6 @@ declare class TokenCounter implements ITokenCounter {
24831
24933
  */
24832
24934
  declare function createTokenCounter(config?: TokenCounterConfig): TokenCounter;
24833
24935
 
24834
- /** Manages cross-session episodic memory with per-session bounds and disk retention. */
24835
- declare class SessionMemory {
24836
- private readonly memoryDir;
24837
- private readonly maxEpisodesToLoad;
24838
- private readonly maxLearningsInContext;
24839
- private readonly minConfidenceThreshold;
24840
- private readonly maxLearningsPerSession;
24841
- private readonly maxTasksPerSession;
24842
- private readonly maxErrorsPerSession;
24843
- private readonly maxEpisodeFiles;
24844
- private readonly log;
24845
- private currentSession;
24846
- private sessionStartTime;
24847
- constructor(config: SessionMemoryConfig);
24848
- /** Start a new session and load relevant memories. */
24849
- startSession(sessionId: string): Result<readonly SessionLearning[], SessionMemoryError>;
24850
- /** End the current session and persist episode. */
24851
- endSession(summary: string): Result<SessionEpisode, SessionMemoryError>;
24852
- /** Check if a session is currently active. */
24853
- isSessionActive(): boolean;
24854
- /** Get the current session ID. */
24855
- getCurrentSessionId(): string | null;
24856
- /** Get learnings accumulated in the current (unpersisted) session. */
24857
- getCurrentSessionLearnings(): readonly SessionLearning[];
24858
- /** Record a learning during the current session. */
24859
- recordLearning(learning: SessionLearning): Result<void, SessionMemoryError>;
24860
- /** Record a completed task during the current session. */
24861
- recordTask(task: CompletedTask): Result<void, SessionMemoryError>;
24862
- /** Record a resolved error during the current session. */
24863
- recordError(error: ResolvedError): Result<void, SessionMemoryError>;
24864
- /** Load all episodes from the memory directory. */
24865
- loadEpisodes(limit?: number): readonly SessionEpisode[];
24866
- /** Load learnings relevant to the current context. */
24867
- loadRelevantLearnings(): readonly SessionLearning[];
24868
- /** Search for learnings matching a query (includes current session).
24869
- * Uses keyword-based matching: all query words must appear in the text. */
24870
- searchLearnings(query: string): readonly SessionLearning[];
24871
- /** Get recent errors and their solutions. */
24872
- getRecentErrorSolutions(limit?: number): readonly ResolvedError[];
24873
- private ensureMemoryDir;
24874
- private getEpisodeFiles;
24875
- private loadEpisodeFile;
24876
- private persistEpisode;
24877
- /** Delete oldest episode files when count exceeds maxEpisodeFiles. */
24878
- private enforceEpisodeRetention;
24879
- }
24880
-
24881
24936
  /**
24882
24937
  * nexus-agents/learning - SQLite Outcome Storage
24883
24938
  *
@@ -25470,8 +25525,18 @@ declare function logRateLimitAudit(opts: LogRateLimitAuditOpts): void;
25470
25525
  */
25471
25526
  /**
25472
25527
  * Sandbox execution mode.
25528
+ *
25529
+ * - `none`: no isolation; for development only.
25530
+ * - `policy`: rule-based enforcement with no process isolation. Catches
25531
+ * policy violations but a misbehaving process can still touch the host.
25532
+ * - `container`: Docker-based OS-level isolation. Strongest, but requires
25533
+ * Docker on the host.
25534
+ * - `deno`: process-level permission gating via Deno's `--allow-*` flags
25535
+ * (#1898). Weaker than container — same OS, just process permissions —
25536
+ * but works without Docker (Mac without Docker Desktop, locked-down CI
25537
+ * runners). No CPU/memory limits.
25473
25538
  */
25474
- type SandboxMode = 'none' | 'policy' | 'container';
25539
+ type SandboxMode = 'none' | 'policy' | 'container' | 'deno';
25475
25540
  /**
25476
25541
  * Security capability that can be restricted.
25477
25542
  */
@@ -25537,6 +25602,13 @@ interface PolicyEvaluation {
25537
25602
  readonly policyId: string;
25538
25603
  /** Violations found. */
25539
25604
  readonly violations: readonly PolicyViolation$1[];
25605
+ /**
25606
+ * Configuration mismatches the executor surfaces to operators — capabilities
25607
+ * declared in the policy but unenforceable because the corresponding
25608
+ * allowlist is empty (e.g. `process_spawn` set but `allowedCommands: []`).
25609
+ * Source: #2428 ask 1. Not security violations; informational only.
25610
+ */
25611
+ readonly configurationWarnings?: readonly string[];
25540
25612
  }
25541
25613
  /**
25542
25614
  * A specific policy violation.
@@ -25666,6 +25738,14 @@ declare function getPolicy(id: string): SandboxPolicy | undefined;
25666
25738
 
25667
25739
  /**
25668
25740
  * Create a sandbox executor with optional config.
25741
+ *
25742
+ * @deprecated [#2499] Unused in production. The sandbox layer's
25743
+ * supported surface is the validation primitives (`validateCommand`,
25744
+ * `validateArgs`, `SandboxPolicy` types) consumed by
25745
+ * `cli/sandbox-exec.ts`. Slated for removal one minor release after
25746
+ * the deprecation lands; see
25747
+ * [#2499](https://github.com/williamzujkowski/nexus-agents/issues/2499)
25748
+ * for context.
25669
25749
  */
25670
25750
  declare function createSandboxExecutor(config?: Partial<SandboxConfig>): ISandboxExecutor;
25671
25751
 
@@ -27778,7 +27858,7 @@ interface ImprovementSuggestion {
27778
27858
  /**
27779
27859
  * Complete failure analysis result.
27780
27860
  */
27781
- interface FailureAnalysis$1 {
27861
+ interface FailureAnalysis {
27782
27862
  /** Overall pass/fail */
27783
27863
  readonly passed: boolean;
27784
27864
  /** Satisfaction score from validation (0-1) */
@@ -27808,7 +27888,7 @@ interface AnalysisError {
27808
27888
  /**
27809
27889
  * Analyzes execution results for failure patterns.
27810
27890
  */
27811
- declare function analyzeFailures(executionResult: SpecExecutionResult): Result<FailureAnalysis$1, never>;
27891
+ declare function analyzeFailures(executionResult: SpecExecutionResult): Result<FailureAnalysis, never>;
27812
27892
 
27813
27893
  /**
27814
27894
  * Scenario Validator — checks execution results against acceptance criteria.
@@ -28642,3224 +28722,145 @@ interface ITaskTracker {
28642
28722
  }
28643
28723
 
28644
28724
  /**
28645
- * nexus-agents/swe-bench - Type Definitions
28725
+ * nexus-agents/benchmarks - Type Definitions
28646
28726
  *
28647
- * Types for SWE-bench benchmark integration.
28727
+ * Types for performance benchmarking and metrics collection.
28648
28728
  *
28649
- * @module swe-bench/types
28650
- * (Source: Issue #257 - SWE-Bench Evaluation)
28651
- */
28652
-
28653
- /**
28654
- * SWE-bench dataset variants.
28655
- */
28656
- type SWEBenchVariant = 'lite' | 'verified' | 'full';
28657
- /**
28658
- * A single SWE-bench instance representing a GitHub issue.
28729
+ * @module benchmarks/benchmark-types
28730
+ * (Source: Issue #156, Mem0 metrics validation)
28659
28731
  */
28660
- interface SWEBenchInstance {
28661
- /** Unique identifier (e.g., "django__django-11099"). */
28662
- readonly instance_id: string;
28663
- /** Repository name (e.g., "django/django"). */
28664
- readonly repo: string;
28665
- /** Base commit SHA to checkout. */
28666
- readonly base_commit: string;
28667
- /** The problem statement (issue description). */
28668
- readonly problem_statement: string;
28669
- /** Hints for solving the issue (optional). */
28670
- readonly hints_text?: string;
28671
- /** Created at timestamp. */
28672
- readonly created_at: string;
28673
- /** Test patch for evaluation. */
28674
- readonly test_patch?: string;
28675
- /** Version of the repository. */
28676
- readonly version?: string;
28677
- /** Environment setup script. */
28678
- readonly environment_setup_commit?: string;
28679
- /** Tests that should pass after the fix (currently failing). */
28680
- readonly FAIL_TO_PASS?: readonly string[];
28681
- /** Tests that should continue passing after the fix. */
28682
- readonly PASS_TO_PASS?: readonly string[];
28683
- }
28684
28732
  /**
28685
- * A prediction/solution for a SWE-bench instance.
28733
+ * Latency percentile metrics.
28686
28734
  */
28687
- interface SWEBenchPrediction {
28688
- /** Instance ID this prediction is for. */
28689
- readonly instance_id: string;
28690
- /** Model or agent name. */
28691
- readonly model_name_or_path: string;
28692
- /** The generated patch (git diff format). */
28693
- readonly model_patch: string;
28735
+ interface LatencyMetrics {
28736
+ /** Minimum latency in milliseconds. */
28737
+ readonly min: number;
28738
+ /** Maximum latency in milliseconds. */
28739
+ readonly max: number;
28740
+ /** Mean latency in milliseconds. */
28741
+ readonly mean: number;
28742
+ /** 50th percentile (median) in milliseconds. */
28743
+ readonly p50: number;
28744
+ /** 75th percentile in milliseconds. */
28745
+ readonly p75: number;
28746
+ /** 90th percentile in milliseconds. */
28747
+ readonly p90: number;
28748
+ /** 95th percentile in milliseconds. */
28749
+ readonly p95: number;
28750
+ /** 99th percentile in milliseconds. */
28751
+ readonly p99: number;
28752
+ /** Standard deviation in milliseconds. */
28753
+ readonly stdDev: number;
28754
+ /** Total number of samples. */
28755
+ readonly sampleCount: number;
28694
28756
  }
28695
28757
  /**
28696
- * Result of running agent on a single instance.
28758
+ * Throughput metrics.
28697
28759
  */
28698
- interface SWEBenchRunResult {
28699
- /** Instance ID. */
28700
- readonly instance_id: string;
28701
- /** Whether the agent completed without error. */
28702
- readonly completed: boolean;
28703
- /** The generated prediction (if completed). */
28704
- readonly prediction?: SWEBenchPrediction;
28705
- /** Error message if failed. */
28706
- readonly error?: string;
28707
- /** Duration in milliseconds. */
28708
- readonly duration_ms: number;
28709
- /** Token usage. */
28710
- readonly tokens_used?: number;
28711
- /** Number of agent iterations/turns. */
28712
- readonly iterations?: number;
28713
- }
28714
- /**
28715
- * Evaluation result for a single prediction.
28716
- */
28717
- interface SWEBenchEvalResult {
28718
- /** Instance ID. */
28719
- readonly instance_id: string;
28720
- /** Whether the prediction resolved the issue. */
28721
- readonly resolved: boolean;
28722
- /** Test results. */
28723
- readonly tests_status: 'passed' | 'failed' | 'error';
28724
- /** Number of tests that passed. */
28725
- readonly tests_passed?: number;
28726
- /** Number of tests that failed. */
28727
- readonly tests_failed?: number;
28728
- /** Error message if evaluation failed. */
28729
- readonly error?: string;
28730
- }
28731
- /**
28732
- * Summary of a benchmark run.
28733
- */
28734
- interface SWEBenchSummary {
28735
- /** Dataset variant used. */
28736
- readonly variant: SWEBenchVariant;
28737
- /** Total instances in dataset. */
28738
- readonly total_instances: number;
28739
- /** Instances attempted. */
28740
- readonly attempted: number;
28741
- /** Instances completed (no agent error). */
28742
- readonly completed: number;
28743
- /** Instances resolved (passed evaluation). */
28744
- readonly resolved: number;
28745
- /** Resolution rate (resolved / attempted). */
28746
- readonly resolution_rate: number;
28747
- /** Total tokens used. */
28748
- readonly total_tokens: number;
28749
- /** Average tokens per instance. */
28750
- readonly avg_tokens_per_instance: number;
28760
+ interface ThroughputMetrics {
28761
+ /** Operations per second. */
28762
+ readonly opsPerSecond: number;
28763
+ /** Total operations completed. */
28764
+ readonly totalOps: number;
28751
28765
  /** Total duration in milliseconds. */
28752
- readonly total_duration_ms: number;
28753
- /** Average duration per instance. */
28754
- readonly avg_duration_ms: number;
28755
- /** Model name. */
28756
- readonly model: string;
28757
- /** Run timestamp. */
28758
- readonly timestamp: string;
28759
- }
28760
- /**
28761
- * Configuration for running SWE-bench.
28762
- */
28763
- interface SWEBenchConfig {
28764
- /** Dataset variant. */
28765
- readonly variant: SWEBenchVariant;
28766
- /** Model to use. */
28767
- readonly model: CliNameLiteral | 'auto';
28768
- /** Maximum instances to run (for testing). */
28769
- readonly limit?: number;
28770
- /** Output path for predictions. */
28771
- readonly output_path: string;
28772
- /** Whether to resume from checkpoint. */
28773
- readonly resume: boolean;
28774
- /** Timeout per instance in milliseconds. */
28775
- readonly timeout_ms: number;
28776
- /** Maximum agent iterations per instance. */
28777
- readonly max_iterations: number;
28778
- /** Working directory for repo clones. */
28779
- readonly work_dir: string;
28780
- /** Number of concurrent workers (1 = sequential). */
28781
- readonly concurrency: number;
28782
- /** Directory for cross-run memory persistence. Empty string disables. */
28783
- readonly memory_dir: string;
28784
- /** Enable MCP tools in child CLI sessions (memory, research). Default: false. */
28785
- readonly mcp_enabled: boolean;
28766
+ readonly durationMs: number;
28786
28767
  }
28787
28768
  /**
28788
- * Default configuration.
28789
- */
28790
- declare const DEFAULT_SWE_BENCH_CONFIG: SWEBenchConfig;
28791
- /**
28792
- * Checkpoint for resuming a run.
28769
+ * Token usage metrics.
28793
28770
  */
28794
- interface SWEBenchCheckpoint {
28795
- /** Config used for the run. */
28796
- readonly config: SWEBenchConfig;
28797
- /** Instance IDs already processed. */
28798
- readonly completed_instances: readonly string[];
28799
- /** Last processed timestamp. */
28800
- readonly last_updated: string;
28771
+ interface TokenMetrics {
28772
+ /** Total input tokens. */
28773
+ readonly inputTokens: number;
28774
+ /** Total output tokens. */
28775
+ readonly outputTokens: number;
28776
+ /** Total tokens (input + output). */
28777
+ readonly totalTokens: number;
28778
+ /** Average tokens per operation. */
28779
+ readonly avgTokensPerOp: number;
28801
28780
  }
28802
28781
  /**
28803
- * Dataset metadata.
28782
+ * Quality metrics for retrieval operations.
28804
28783
  */
28805
- interface SWEBenchDatasetInfo {
28806
- /** Dataset variant. */
28807
- readonly variant: SWEBenchVariant;
28808
- /** Number of instances. */
28809
- readonly num_instances: number;
28810
- /** Repositories included. */
28811
- readonly repositories: readonly string[];
28812
- /** HuggingFace dataset ID. */
28813
- readonly hf_dataset_id: string;
28784
+ interface QualityMetrics {
28785
+ /** Precision: relevant retrieved / total retrieved. */
28786
+ readonly precision: number;
28787
+ /** Recall: relevant retrieved / total relevant. */
28788
+ readonly recall: number;
28789
+ /** F1 score: harmonic mean of precision and recall. */
28790
+ readonly f1Score: number;
28791
+ /** Mean reciprocal rank. */
28792
+ readonly mrr: number;
28793
+ /** Normalized discounted cumulative gain at k. */
28794
+ readonly ndcgAtK: number;
28814
28795
  }
28815
28796
  /**
28816
- * Dataset variant metadata.
28797
+ * Resource usage metrics.
28817
28798
  */
28818
- declare const SWE_BENCH_DATASETS: Record<SWEBenchVariant, SWEBenchDatasetInfo>;
28819
- /** Relevance of a file to the current issue. */
28820
- type FileRelevance = 'high' | 'medium' | 'low';
28821
- /** A file explored during an iteration. */
28822
- interface ExploredFile {
28823
- readonly path: string;
28824
- readonly relevance: FileRelevance;
28825
- }
28826
- /** Outcome of an approach attempt. */
28827
- type ApproachOutcome = 'patch_invalid' | 'patch_rejected' | 'no_patch' | 'success';
28828
- /** Record of an attempted approach. */
28829
- interface ApproachRecord {
28830
- readonly iteration: number;
28831
- readonly approach: string;
28832
- readonly outcome: ApproachOutcome;
28833
- readonly errorSummary?: string;
28834
- }
28835
- /** Cross-iteration context accumulated during agent execution. */
28836
- interface IterationContext {
28837
- /** Files explored and their relevance. */
28838
- readonly filesExplored: readonly ExploredFile[];
28839
- /** Current root cause hypothesis. */
28840
- readonly rootCauseHypothesis: string | null;
28841
- /** History of approaches attempted. */
28842
- readonly approachHistory: readonly ApproachRecord[];
28799
+ interface ResourceMetrics {
28800
+ /** Peak memory usage in bytes. */
28801
+ readonly peakMemoryBytes: number;
28802
+ /** Average memory usage in bytes. */
28803
+ readonly avgMemoryBytes: number;
28804
+ /** CPU time in milliseconds. */
28805
+ readonly cpuTimeMs: number;
28806
+ /** Database file size in bytes (if applicable). */
28807
+ readonly dbSizeBytes?: number;
28843
28808
  }
28844
-
28845
- /**
28846
- * nexus-agents/swe-bench - Dataset Loader
28847
- *
28848
- * Load SWE-bench datasets from HuggingFace.
28849
- *
28850
- * @module swe-bench/dataset-loader
28851
- * (Source: Issue #257 - SWE-Bench Evaluation)
28852
- */
28853
-
28854
28809
  /**
28855
- * Error types for dataset loading.
28810
+ * Benchmark result for a single operation type.
28856
28811
  */
28857
- declare class DatasetLoadError extends Error {
28858
- readonly cause?: unknown;
28859
- constructor(message: string, cause?: unknown);
28812
+ interface OperationBenchmark {
28813
+ /** Operation name. */
28814
+ readonly operation: string;
28815
+ /** Dataset size used. */
28816
+ readonly datasetSize: number;
28817
+ /** Latency metrics. */
28818
+ readonly latency: LatencyMetrics;
28819
+ /** Throughput metrics. */
28820
+ readonly throughput: ThroughputMetrics;
28821
+ /** Resource metrics. */
28822
+ readonly resources: ResourceMetrics;
28823
+ /** Quality metrics (for retrieval operations). */
28824
+ readonly quality?: QualityMetrics;
28825
+ /** Timestamp when benchmark was run. */
28826
+ readonly timestamp: string;
28860
28827
  }
28861
28828
  /**
28862
- * Options for loading dataset.
28829
+ * Complete benchmark suite result.
28863
28830
  */
28864
- interface DatasetLoadOptions {
28865
- /** Maximum instances to load (for testing). */
28866
- readonly limit?: number;
28867
- /** Skip instances that don't match filter. */
28868
- readonly filter?: (instance: SWEBenchInstance) => boolean;
28869
- /** Specific instance IDs to load. */
28870
- readonly instanceIds?: readonly string[];
28831
+ interface BenchmarkSuiteResult {
28832
+ /** Suite name. */
28833
+ readonly name: string;
28834
+ /** Component being benchmarked. */
28835
+ readonly component: string;
28836
+ /** Version of the component. */
28837
+ readonly version: string;
28838
+ /** Individual operation benchmarks. */
28839
+ readonly operations: readonly OperationBenchmark[];
28840
+ /** Environment information. */
28841
+ readonly environment: BenchmarkEnvironment;
28842
+ /** Overall summary. */
28843
+ readonly summary: BenchmarkSummary;
28871
28844
  }
28872
28845
  /**
28873
- * Result of loading dataset.
28846
+ * Benchmark environment information.
28874
28847
  */
28875
- interface DatasetLoadResult {
28876
- /** Loaded instances. */
28877
- readonly instances: readonly SWEBenchInstance[];
28878
- /** Dataset info. */
28879
- readonly info: SWEBenchDatasetInfo;
28880
- /** Number of instances loaded. */
28881
- readonly count: number;
28882
- /** Number of instances filtered out. */
28883
- readonly filtered: number;
28884
- /** Load duration in ms. */
28885
- readonly durationMs: number;
28848
+ interface BenchmarkEnvironment {
28849
+ /** Node.js version. */
28850
+ readonly nodeVersion: string;
28851
+ /** Platform. */
28852
+ readonly platform: string;
28853
+ /** Architecture. */
28854
+ readonly arch: string;
28855
+ /** CPU model. */
28856
+ readonly cpuModel: string;
28857
+ /** CPU cores. */
28858
+ readonly cpuCores: number;
28859
+ /** Total memory in bytes. */
28860
+ readonly totalMemory: number;
28886
28861
  }
28887
28862
  /**
28888
- * Loads SWE-bench dataset from HuggingFace.
28889
- */
28890
- declare function loadDataset(variant: SWEBenchVariant, options?: DatasetLoadOptions): Promise<Result<DatasetLoadResult, DatasetLoadError>>;
28891
- /**
28892
- * Gets a single instance by ID.
28893
- */
28894
- declare function getInstance(variant: SWEBenchVariant, instanceId: string): Promise<Result<SWEBenchInstance, DatasetLoadError>>;
28895
- /**
28896
- * Lists available instances (IDs only) for a variant.
28897
- */
28898
- declare function listInstances(variant: SWEBenchVariant, options?: DatasetLoadOptions): Promise<Result<readonly string[], DatasetLoadError>>;
28899
- /**
28900
- * Gets dataset info without loading instances.
28901
- */
28902
- declare function getDatasetInfo(variant: SWEBenchVariant): SWEBenchDatasetInfo;
28903
- /**
28904
- * Filters instances by repository.
28905
- */
28906
- declare function filterByRepo(repo: string): (instance: SWEBenchInstance) => boolean;
28907
- /**
28908
- * Filters instances by version.
28909
- */
28910
- declare function filterByVersion(version: string): (instance: SWEBenchInstance) => boolean;
28911
-
28912
- /**
28913
- * nexus-agents/swe-bench - Prediction Writer
28914
- *
28915
- * Write predictions in SWE-bench JSONL format.
28916
- *
28917
- * @module swe-bench/prediction-writer
28918
- * (Source: Issue #257 - SWE-Bench Evaluation)
28919
- */
28920
-
28921
- /**
28922
- * Error types for prediction writing.
28923
- */
28924
- declare class PredictionWriteError extends Error {
28925
- readonly cause?: unknown;
28926
- constructor(message: string, cause?: unknown);
28927
- }
28928
- /**
28929
- * Options for the prediction writer.
28930
- */
28931
- interface PredictionWriterOptions {
28932
- /** Output file path. */
28933
- readonly outputPath: string;
28934
- /** Model name to use in predictions. */
28935
- readonly modelName: string;
28936
- /** Whether to append to existing file. */
28937
- readonly append: boolean;
28938
- }
28939
- /**
28940
- * Creates a prediction from a run result.
28941
- */
28942
- declare function createPrediction(result: SWEBenchRunResult, modelName: string): SWEBenchPrediction | null;
28943
- /**
28944
- * Prediction writer for streaming output.
28945
- */
28946
- declare class PredictionWriter {
28947
- private fileHandle;
28948
- private predictionCount;
28949
- private readonly options;
28950
- constructor(options: PredictionWriterOptions);
28951
- /**
28952
- * Opens the output file for writing.
28953
- */
28954
- open(): Promise<Result<void, PredictionWriteError>>;
28955
- /**
28956
- * Writes a prediction to the output file.
28957
- */
28958
- write(prediction: SWEBenchPrediction): Promise<Result<void, PredictionWriteError>>;
28959
- /**
28960
- * Writes a run result as a prediction (if completed).
28961
- */
28962
- writeResult(result: SWEBenchRunResult): Promise<Result<boolean, PredictionWriteError>>;
28963
- /**
28964
- * Closes the output file.
28965
- */
28966
- close(): Promise<Result<void, PredictionWriteError>>;
28967
- /**
28968
- * Gets the number of predictions written.
28969
- */
28970
- getPredictionCount(): number;
28971
- /**
28972
- * Gets the output path.
28973
- */
28974
- getOutputPath(): string;
28975
- }
28976
- /**
28977
- * Writes multiple predictions to a file at once.
28978
- */
28979
- declare function writePredictions(predictions: readonly SWEBenchPrediction[], outputPath: string, options?: {
28980
- append?: boolean;
28981
- }): Promise<Result<number, PredictionWriteError>>;
28982
- /**
28983
- * Reads predictions from a JSONL file.
28984
- */
28985
- declare function readPredictions(inputPath: string): Promise<Result<readonly SWEBenchPrediction[], PredictionWriteError>>;
28986
- /**
28987
- * Gets instance IDs from a predictions file.
28988
- */
28989
- declare function getCompletedInstanceIds(inputPath: string): Promise<Result<Set<string>, PredictionWriteError>>;
28990
- /**
28991
- * Validates a prediction has required fields.
28992
- */
28993
- declare function validatePrediction(prediction: unknown): prediction is SWEBenchPrediction;
28994
-
28995
- /**
28996
- * nexus-agents/swe-bench - Prompt Templates
28997
- *
28998
- * Prompts for running agents on SWE-bench instances.
28999
- *
29000
- * @module swe-bench/prompt-template
29001
- * (Source: Issue #257 - SWE-Bench Evaluation)
29002
- */
29003
-
29004
- /**
29005
- * System prompt for SWE-bench agent.
29006
- */
29007
- declare const SWE_BENCH_SYSTEM_PROMPT = "You are an expert software engineer solving GitHub issues.\n\nYour task is to find the root cause and fix it with a minimal patch. Change as few lines as possible. Do not refactor surrounding code.\n\nGuidelines:\n1. Read the problem statement carefully.\n2. Read the FAIL_TO_PASS test names to understand expected behavior, but do NOT edit test files.\n3. Start with files mentioned in the error/traceback, then search for the function/class name.\n4. Identify the root cause of the issue.\n5. Edit only the source files needed for a minimal fix. Maintain backward compatibility.\n6. VERIFY your fix by running the failing test(s) BEFORE outputting the patch.\n7. If the test still fails after your fix, analyze the failure output and iterate.\n8. Run `git diff` to confirm your final changes.\n\nCONTEXT BUDGET: You have limited context. Be efficient:\n- Don't read entire files \u2014 use grep/search to find relevant sections\n- Don't explore the whole codebase \u2014 go directly to files mentioned in the error\n- Keep your analysis concise \u2014 focus on the root cause, not comprehensive review\n- If you're running low on context, output your best patch immediately\n\nIMPORTANT: After making your fix, output the patch using this exact format:\n\n```diff\n[paste your \"git diff\" output here]\n```";
29008
- /**
29009
- * Creates a user prompt for a specific SWE-bench instance.
29010
- */
29011
- declare function createInstancePrompt(instance: SWEBenchInstance): string;
29012
- /**
29013
- * Creates a retry prompt when the initial attempt failed.
29014
- */
29015
- declare function createRetryPrompt(error: string, previousPatch?: string, contextSummary?: string): string;
29016
- /**
29017
- * Extracts a git diff patch from agent response.
29018
- */
29019
- declare function extractPatch(response: string): string | null;
29020
- /**
29021
- * Validates that a patch looks like a valid git diff.
29022
- */
29023
- declare function validatePatchFormat(patch: string): {
29024
- valid: boolean;
29025
- error?: string;
29026
- };
29027
- /**
29028
- * Creates a summary prompt for generating final output.
29029
- */
29030
- declare function createSummaryPrompt(instance: SWEBenchInstance, patch: string, iterations: number): string;
29031
- /**
29032
- * Creates initial exploration prompt for understanding the codebase.
29033
- */
29034
- declare function createExplorationPrompt(instance: SWEBenchInstance): string;
29035
-
29036
- /**
29037
- * nexus-agents/swe-bench - Agent Runner Helpers
29038
- *
29039
- * Git operations and result builders extracted from agent-runner.ts.
29040
- *
29041
- * @module swe-bench/agent-runner-helpers
29042
- * (Source: Issue #257 - SWE-Bench Evaluation)
29043
- */
29044
-
29045
- /**
29046
- * Error for agent runner.
29047
- */
29048
- declare class AgentRunnerError extends Error {
29049
- readonly cause?: unknown;
29050
- constructor(message: string, cause?: unknown);
29051
- }
29052
-
29053
- /**
29054
- * nexus-agents/swe-bench - Agent Runner
29055
- *
29056
- * Runs an agent on a SWE-bench instance to generate patches.
29057
- *
29058
- * @module swe-bench/agent-runner
29059
- * (Source: Issue #257 - SWE-Bench Evaluation)
29060
- */
29061
-
29062
- /**
29063
- * Agent execution context.
29064
- */
29065
- interface AgentContext {
29066
- /** Instance being solved. */
29067
- readonly instance: SWEBenchInstance;
29068
- /** Working directory (cloned repo). */
29069
- readonly workDir: string;
29070
- /** Configuration. */
29071
- readonly config: SWEBenchConfig;
29072
- /** Callback for agent messages. */
29073
- readonly onMessage?: (message: string) => void;
29074
- }
29075
- /**
29076
- * Agent executor interface.
29077
- */
29078
- interface IAgentExecutor {
29079
- execute(systemPrompt: string, userPrompt: string, context: AgentContext): Promise<Result<AgentExecutionResult, AgentRunnerError>>;
29080
- }
29081
- /**
29082
- * Result from agent execution.
29083
- */
29084
- interface AgentExecutionResult {
29085
- readonly response: string;
29086
- readonly tokensUsed: number;
29087
- readonly durationMs: number;
29088
- }
29089
- /**
29090
- * Result of a post-patch verification attempt (#2032 integration).
29091
- */
29092
- interface VerifyResult {
29093
- readonly passed: boolean;
29094
- readonly stderr: string;
29095
- readonly stdout: string;
29096
- }
29097
- /**
29098
- * Adapter that runs the instance's test suite against a freshly-applied
29099
- * patch (#2032 integration). Verification is opt-in — when no adapter
29100
- * is provided, the runner behaves exactly as before.
29101
- */
29102
- interface IVerifyAdapter {
29103
- verify(instance: SWEBenchInstance, patch: string, workDir: string): Promise<VerifyResult>;
29104
- }
29105
- /**
29106
- * Options for running an agent on an instance.
29107
- */
29108
- interface RunOptions {
29109
- readonly executor: IAgentExecutor;
29110
- readonly config: SWEBenchConfig;
29111
- readonly onMessage?: (message: string) => void;
29112
- readonly signal?: AbortSignal;
29113
- /** Override system prompt (e.g., with memory-enriched version). */
29114
- readonly systemPrompt?: string;
29115
- /**
29116
- * Optional post-patch verify adapter (#2032). When provided, successful
29117
- * patches are verified by running the instance's test suite; failures
29118
- * trigger a bounded retry loop using the classification + retry-hint
29119
- * logic from `verify-loop.ts`. Default cap: 2 retries.
29120
- */
29121
- readonly verifyAdapter?: IVerifyAdapter;
29122
- /** Override max verify retries (default 2). */
29123
- readonly maxVerifyRetries?: number;
29124
- }
29125
- /**
29126
- * Runs an agent on a SWE-bench instance.
29127
- */
29128
- declare function runAgentOnInstance(instance: SWEBenchInstance, options: RunOptions): Promise<Result<SWEBenchRunResult, AgentRunnerError>>;
29129
- /**
29130
- * Creates a mock executor for testing.
29131
- */
29132
- declare function createMockExecutor(responses: string[]): IAgentExecutor;
29133
-
29134
- /**
29135
- * nexus-agents/swe-bench - Evaluation Configuration Types
29136
- *
29137
- * Configuration types for SWE-bench evaluation harness.
29138
- *
29139
- * @module swe-bench/evaluation-config-types
29140
- * @see https://www.swebench.com/SWE-bench/guides/evaluation/
29141
- * (Source: Issue #257 - SWE-Bench Evaluation)
29142
- */
29143
-
29144
- /**
29145
- * Cache level for Docker image management.
29146
- * Controls how aggressively to cache intermediate build layers.
29147
- */
29148
- type EvaluationCacheLevel = 'none' | 'base' | 'env' | 'instance';
29149
- /**
29150
- * Evaluation execution mode.
29151
- */
29152
- type EvaluationMode = 'local' | 'docker' | 'modal';
29153
- /**
29154
- * Configuration for running SWE-bench evaluation harness.
29155
- */
29156
- interface EvaluationHarnessConfig {
29157
- /** Dataset variant to evaluate against. */
29158
- readonly datasetName: SWEBenchVariant;
29159
- /** Path to predictions JSONL file. */
29160
- readonly predictionsPath: string;
29161
- /** Number of parallel workers (recommended: 8-12). */
29162
- readonly maxWorkers: number;
29163
- /** Unique identifier for this evaluation run. */
29164
- readonly runId: string;
29165
- /** Docker image cache level. */
29166
- readonly cacheLevel: EvaluationCacheLevel;
29167
- /** Execution mode. */
29168
- readonly mode: EvaluationMode;
29169
- /** Optional: specific instance IDs to evaluate. */
29170
- readonly instanceIds?: readonly string[];
29171
- /** Timeout per instance in seconds. */
29172
- readonly timeoutSeconds: number;
29173
- /** Directory for logs and results. */
29174
- readonly outputDir: string;
29175
- /** Namespace for Docker images (empty for local build). */
29176
- readonly dockerNamespace?: string;
29177
- /** Whether to use Modal cloud execution. */
29178
- readonly useModal: boolean;
29179
- }
29180
- /**
29181
- * Default evaluation configuration.
29182
- */
29183
- declare const DEFAULT_EVALUATION_CONFIG: EvaluationHarnessConfig;
29184
-
29185
- /**
29186
- * nexus-agents/swe-bench - Evaluation Result Types
29187
- *
29188
- * Per-instance and aggregate result types for SWE-bench evaluation.
29189
- *
29190
- * @module swe-bench/evaluation-result-types
29191
- * @see https://www.swebench.com/SWE-bench/guides/evaluation/
29192
- * (Source: Issue #257 - SWE-Bench Evaluation)
29193
- */
29194
-
29195
- /**
29196
- * Test execution status for a single test case.
29197
- */
29198
- type TestStatus = 'passed' | 'failed' | 'error' | 'skipped' | 'timeout';
29199
- /**
29200
- * Result of a single test case execution.
29201
- */
29202
- interface TestCaseResult {
29203
- /** Test name/identifier. */
29204
- readonly testName: string;
29205
- /** Test status. */
29206
- readonly status: TestStatus;
29207
- /** Duration in milliseconds. */
29208
- readonly durationMs: number;
29209
- /** Error message if failed/error. */
29210
- readonly errorMessage?: string;
29211
- /** Stack trace if available. */
29212
- readonly stackTrace?: string;
29213
- }
29214
- /**
29215
- * Resolution status for an instance.
29216
- */
29217
- type ResolutionStatus = 'resolved' | 'unresolved' | 'error' | 'timeout';
29218
- /**
29219
- * Detailed evaluation result for a single instance.
29220
- */
29221
- interface InstanceEvaluationResult {
29222
- /** Instance ID being evaluated. */
29223
- readonly instanceId: string;
29224
- /** Model that generated the prediction. */
29225
- readonly modelNameOrPath: string;
29226
- /** Whether the issue was resolved. */
29227
- readonly resolved: boolean;
29228
- /** Resolution status category. */
29229
- readonly status: ResolutionStatus;
29230
- /** Individual test results. */
29231
- readonly testResults: readonly TestCaseResult[];
29232
- /** Number of tests that passed. */
29233
- readonly testsPassed: number;
29234
- /** Number of tests that failed. */
29235
- readonly testsFailed: number;
29236
- /** Total number of tests. */
29237
- readonly testsTotal: number;
29238
- /** Whether the patch applied cleanly. */
29239
- readonly patchApplied: boolean;
29240
- /** Patch application error if any. */
29241
- readonly patchError?: string;
29242
- /** Total evaluation duration in milliseconds. */
29243
- readonly durationMs: number;
29244
- /** Docker container ID used. */
29245
- readonly containerId?: string;
29246
- /** Log file path for this instance. */
29247
- readonly logPath?: string;
29248
- }
29249
- /**
29250
- * Aggregate metrics for an evaluation run.
29251
- */
29252
- interface EvaluationMetrics {
29253
- /** Total instances in dataset. */
29254
- readonly totalInstances: number;
29255
- /** Instances with predictions. */
29256
- readonly predictedInstances: number;
29257
- /** Instances successfully resolved. */
29258
- readonly resolvedInstances: number;
29259
- /** Resolution rate (resolved / predicted). */
29260
- readonly resolutionRate: number;
29261
- /** Instances where patch applied cleanly. */
29262
- readonly patchesApplied: number;
29263
- /** Patch application rate. */
29264
- readonly patchApplicationRate: number;
29265
- /** Instances that timed out. */
29266
- readonly timeouts: number;
29267
- /** Instances with evaluation errors. */
29268
- readonly errors: number;
29269
- /** Average evaluation time per instance (ms). */
29270
- readonly avgDurationMs: number;
29271
- /** Total evaluation time (ms). */
29272
- readonly totalDurationMs: number;
29273
- }
29274
- /**
29275
- * Per-repository breakdown of results.
29276
- */
29277
- interface RepositoryMetrics {
29278
- /** Repository name (e.g., "django/django"). */
29279
- readonly repository: string;
29280
- /** Total instances from this repo. */
29281
- readonly totalInstances: number;
29282
- /** Resolved instances. */
29283
- readonly resolvedInstances: number;
29284
- /** Resolution rate for this repo. */
29285
- readonly resolutionRate: number;
29286
- }
29287
- /**
29288
- * Complete evaluation run result.
29289
- */
29290
- interface EvaluationRunResult {
29291
- /** Run identifier. */
29292
- readonly runId: string;
29293
- /** Dataset variant evaluated. */
29294
- readonly datasetName: SWEBenchVariant;
29295
- /** Model being evaluated. */
29296
- readonly modelNameOrPath: string;
29297
- /** Evaluation start timestamp (ISO 8601). */
29298
- readonly startedAt: string;
29299
- /** Evaluation completion timestamp (ISO 8601). */
29300
- readonly completedAt: string;
29301
- /** Aggregate metrics. */
29302
- readonly metrics: EvaluationMetrics;
29303
- /** Per-repository breakdown. */
29304
- readonly repositoryMetrics: readonly RepositoryMetrics[];
29305
- /** Per-instance results. */
29306
- readonly instanceResults: readonly InstanceEvaluationResult[];
29307
- /** Configuration used. */
29308
- readonly config: EvaluationHarnessConfig;
29309
- /** Harness version used. */
29310
- readonly harnessVersion?: string;
29311
- }
29312
-
29313
- /**
29314
- * nexus-agents/swe-bench - Evaluation Interface Types
29315
- *
29316
- * Interface and progress types for evaluation harness.
29317
- *
29318
- * @module swe-bench/evaluation-interface-types
29319
- * @see https://www.swebench.com/SWE-bench/guides/evaluation/
29320
- * (Source: Issue #257 - SWE-Bench Evaluation)
29321
- */
29322
-
29323
- /**
29324
- * Progress callback for evaluation.
29325
- */
29326
- type EvaluationProgressCallback = (progress: EvaluationProgress) => void;
29327
- /**
29328
- * Phases of evaluation.
29329
- */
29330
- type EvaluationPhase = 'initializing' | 'loading_predictions' | 'building_containers' | 'evaluating' | 'aggregating' | 'complete';
29331
- /**
29332
- * Progress information during evaluation.
29333
- */
29334
- interface EvaluationProgress {
29335
- /** Current instance being evaluated. */
29336
- readonly currentInstanceId: string;
29337
- /** Index of current instance (0-based). */
29338
- readonly currentIndex: number;
29339
- /** Total instances to evaluate. */
29340
- readonly totalInstances: number;
29341
- /** Instances completed so far. */
29342
- readonly completedInstances: number;
29343
- /** Instances resolved so far. */
29344
- readonly resolvedSoFar: number;
29345
- /** Current resolution rate. */
29346
- readonly currentResolutionRate: number;
29347
- /** Estimated time remaining in ms. */
29348
- readonly estimatedRemainingMs: number;
29349
- /** Current phase. */
29350
- readonly phase: EvaluationPhase;
29351
- }
29352
- /**
29353
- * Error codes for evaluation failures.
29354
- */
29355
- type EvaluationErrorCode = 'DOCKER_NOT_AVAILABLE' | 'PREDICTIONS_NOT_FOUND' | 'INVALID_PREDICTIONS_FORMAT' | 'HARNESS_NOT_INSTALLED' | 'INSTANCE_TIMEOUT' | 'CONTAINER_FAILED' | 'NETWORK_ERROR' | 'INSUFFICIENT_RESOURCES' | 'UNKNOWN';
29356
- /**
29357
- * Evaluation harness error types.
29358
- */
29359
- declare class EvaluationHarnessError extends Error {
29360
- readonly cause?: unknown;
29361
- readonly code: EvaluationErrorCode;
29362
- constructor(message: string, code: EvaluationErrorCode, cause?: unknown);
29363
- }
29364
- /**
29365
- * Result of harness validation.
29366
- */
29367
- interface EvaluationValidationResult {
29368
- /** Whether the harness is ready. */
29369
- readonly ready: boolean;
29370
- /** Docker availability. */
29371
- readonly dockerAvailable: boolean;
29372
- /** Docker version if available. */
29373
- readonly dockerVersion?: string;
29374
- /** Python/swebench availability. */
29375
- readonly harnessInstalled: boolean;
29376
- /** Harness version if installed. */
29377
- readonly harnessVersion?: string;
29378
- /** Available disk space in bytes. */
29379
- readonly availableDiskSpace: number;
29380
- /** Available memory in bytes. */
29381
- readonly availableMemory: number;
29382
- /** CPU cores available. */
29383
- readonly cpuCores: number;
29384
- /** Validation errors if not ready. */
29385
- readonly errors: readonly string[];
29386
- /** Warnings that don't prevent execution. */
29387
- readonly warnings: readonly string[];
29388
- }
29389
- /**
29390
- * Interface for evaluation harness implementations.
29391
- */
29392
- interface IEvaluationHarness {
29393
- /**
29394
- * Validates that the harness is ready to run.
29395
- */
29396
- validate(): Promise<EvaluationValidationResult>;
29397
- /**
29398
- * Runs evaluation on predictions.
29399
- */
29400
- evaluate(predictions: readonly SWEBenchPrediction[], config: EvaluationHarnessConfig, onProgress?: EvaluationProgressCallback): Promise<EvaluationRunResult>;
29401
- /**
29402
- * Evaluates a single instance (for testing/debugging).
29403
- */
29404
- evaluateInstance(prediction: SWEBenchPrediction, config: EvaluationHarnessConfig): Promise<InstanceEvaluationResult>;
29405
- /**
29406
- * Cancels an in-progress evaluation.
29407
- */
29408
- cancel(): Promise<void>;
29409
- /**
29410
- * Gets the version of the harness.
29411
- */
29412
- getVersion(): Promise<string>;
29413
- }
29414
-
29415
- /**
29416
- * Harness-backed verify adapter (#2054).
29417
- *
29418
- * Concrete implementation of `IVerifyAdapter` that delegates to the
29419
- * existing `IEvaluationHarness` to actually run the instance's test
29420
- * suite. Translates the harness's `InstanceEvaluationResult` into the
29421
- * `VerifyResult` shape the agent-runner expects.
29422
- *
29423
- * Wire this into the agent-runner via `RunOptions.verifyAdapter`:
29424
- *
29425
- * ```typescript
29426
- * const harness = await createValidatedHarness(...);
29427
- * const verifyAdapter = new HarnessVerifyAdapter(harness, modelName, evalConfig);
29428
- * runAgentOnInstance(instance, { executor, config, verifyAdapter });
29429
- * ```
29430
- *
29431
- * @module swe-bench/harness-verify-adapter
29432
- */
29433
-
29434
- /**
29435
- * Builds a `VerifyResult` from an `InstanceEvaluationResult`.
29436
- *
29437
- * Mapping:
29438
- * - `passed` = `resolved` (all FAIL_TO_PASS now pass, all PASS_TO_PASS still pass)
29439
- * - `stderr` = patch application error, if any; else pytest-style summary of failed tests
29440
- * - `stdout` = run summary (counts + status)
29441
- *
29442
- * Exported for direct use by tests.
29443
- */
29444
- declare function translateEvaluationResult(result: InstanceEvaluationResult): VerifyResult;
29445
- /**
29446
- * Concrete `IVerifyAdapter` that calls `harness.evaluateInstance` per
29447
- * verify request. Requires a validated harness — call
29448
- * `createValidatedHarness()` first, then pass the result here.
29449
- */
29450
- declare class HarnessVerifyAdapter implements IVerifyAdapter {
29451
- private readonly harness;
29452
- private readonly modelNameOrPath;
29453
- private readonly evalConfig;
29454
- constructor(harness: IEvaluationHarness, modelNameOrPath: string, evalConfig: EvaluationHarnessConfig);
29455
- verify(instance: SWEBenchInstance, patch: string, _workDir: string): Promise<VerifyResult>;
29456
- }
29457
-
29458
- /**
29459
- * nexus-agents/swe-bench - Nexus Agent Executor
29460
- *
29461
- * Real implementation of IAgentExecutor using nexus-agents ClaudeAdapter.
29462
- *
29463
- * @module swe-bench/nexus-agent-executor
29464
- * (Source: Issue #257 - SWE-Bench Evaluation)
29465
- */
29466
-
29467
- /**
29468
- * Configuration for the Nexus agent executor.
29469
- */
29470
- interface NexusAgentExecutorConfig {
29471
- /** Anthropic API key. */
29472
- readonly apiKey: string;
29473
- /** Model ID to use (default: derived from canonical registry). */
29474
- readonly modelId?: string | undefined;
29475
- /** Maximum tokens for response (default: 16384). */
29476
- readonly maxTokens?: number | undefined;
29477
- /** Temperature for generation (default: 0.2). */
29478
- readonly temperature?: number | undefined;
29479
- /** Callback for message logging. */
29480
- readonly onMessage?: ((message: string) => void) | undefined;
29481
- }
29482
- /**
29483
- * Real agent executor using nexus-agents ClaudeAdapter.
29484
- *
29485
- * This implements the IAgentExecutor interface to run actual
29486
- * model inference for SWE-bench tasks. Uses ClaudeAdapter directly
29487
- * (not UnifiedAdapterRegistry) because SWE-bench evaluation is
29488
- * Claude-specific and requires direct API key configuration.
29489
- */
29490
- declare class NexusAgentExecutor implements IAgentExecutor {
29491
- private readonly adapter;
29492
- private readonly maxTokens;
29493
- private readonly temperature;
29494
- private readonly messageCallback;
29495
- constructor(config: NexusAgentExecutorConfig);
29496
- /**
29497
- * Execute a prompt using the Claude model.
29498
- */
29499
- execute(systemPrompt: string, userPrompt: string, context: AgentContext): Promise<Result<AgentExecutionResult, AgentRunnerError>>;
29500
- /**
29501
- * Extract text content from response content blocks.
29502
- */
29503
- private extractTextFromResponse;
29504
- /**
29505
- * Get the model ID being used.
29506
- */
29507
- getModelId(): string;
29508
- }
29509
- /**
29510
- * Creates a NexusAgentExecutor from environment.
29511
- *
29512
- * Looks for ANTHROPIC_API_KEY environment variable.
29513
- */
29514
- declare function createNexusExecutorFromEnv(overrides?: Partial<Omit<NexusAgentExecutorConfig, 'apiKey'>>): Result<NexusAgentExecutor, AgentRunnerError>;
29515
-
29516
- /**
29517
- * nexus-agents/swe-bench - CLI Agent Executor
29518
- *
29519
- * Implementation of IAgentExecutor using Claude CLI (subprocess).
29520
- * Does not require an API key - uses OAuth credentials from claude CLI.
29521
- *
29522
- * @module swe-bench/cli-agent-executor
29523
- * (Source: Issue #257 - SWE-Bench Evaluation)
29524
- */
29525
-
29526
- /**
29527
- * Configuration for the CLI agent executor.
29528
- */
29529
- interface CliAgentExecutorConfig {
29530
- /** Model ID to use (default: claude-sonnet-4). */
29531
- readonly modelId?: string | undefined;
29532
- /** Timeout per execution in milliseconds (default: 300000 = 5 minutes). */
29533
- readonly timeoutMs?: number | undefined;
29534
- /** Callback for message logging. */
29535
- readonly onMessage?: ((message: string) => void) | undefined;
29536
- /** Path to MCP config file for child CLI sessions. */
29537
- readonly mcpConfigPath?: string | undefined;
29538
- }
29539
- /**
29540
- * Agent executor using Claude CLI (subprocess transport).
29541
- *
29542
- * This implements the IAgentExecutor interface using the Claude CLI,
29543
- * which authenticates via OAuth and doesn't require an API key.
29544
- */
29545
- declare class CliAgentExecutor implements IAgentExecutor {
29546
- private readonly adapter;
29547
- private readonly modelId;
29548
- private readonly timeoutMs;
29549
- private readonly messageCallback;
29550
- private readonly mcpConfigPath;
29551
- constructor(config?: CliAgentExecutorConfig);
29552
- /** Build task options including optional MCP config. */
29553
- private buildTaskOptions;
29554
- /**
29555
- * Execute a prompt using the Claude CLI.
29556
- */
29557
- execute(systemPrompt: string, userPrompt: string, context: AgentContext): Promise<Result<AgentExecutionResult, AgentRunnerError>>;
29558
- /**
29559
- * Estimate tokens when usage not available.
29560
- * Uses ~4 chars per token heuristic.
29561
- */
29562
- private estimateTokens;
29563
- /**
29564
- * Get the model ID being used.
29565
- */
29566
- getModelId(): string;
29567
- }
29568
- /**
29569
- * Checks if Claude CLI is available and authenticated.
29570
- */
29571
- declare function isCliAvailable(): Promise<boolean>;
29572
- /**
29573
- * Creates a CliAgentExecutor if CLI is available.
29574
- *
29575
- * @returns Executor if CLI available, error otherwise
29576
- */
29577
- declare function createCliExecutor(config?: CliAgentExecutorConfig): Promise<Result<CliAgentExecutor, AgentRunnerError>>;
29578
-
29579
- /**
29580
- * Result from running benchmark.
29581
- */
29582
- interface BenchmarkRunResult {
29583
- readonly success: boolean;
29584
- readonly message: string;
29585
- readonly total: number;
29586
- readonly completed: number;
29587
- readonly failed: number;
29588
- readonly tokensUsed: number;
29589
- readonly outputPath: string;
29590
- }
29591
- /**
29592
- * Options for running benchmark.
29593
- */
29594
- interface BenchmarkRunOptions {
29595
- readonly instances: readonly SWEBenchInstance[];
29596
- readonly config: SWEBenchConfig;
29597
- readonly outputPath: string;
29598
- readonly append: boolean;
29599
- readonly verbose: boolean;
29600
- }
29601
- /**
29602
- * Executor with model ID for reporting.
29603
- */
29604
- interface ExecutorWithModel extends IAgentExecutor {
29605
- getModelId(): string;
29606
- }
29607
- /** Options for creating an executor. */
29608
- interface CreateExecutorOptions {
29609
- readonly verbose: boolean;
29610
- readonly mcpEnabled?: boolean;
29611
- }
29612
- /**
29613
- * Create executor, preferring CLI over API.
29614
- * When mcpEnabled, generates MCP config for child CLI sessions.
29615
- */
29616
- declare function createExecutor(verboseOrOptions: boolean | CreateExecutorOptions): Promise<Result<ExecutorWithModel & {
29617
- mcpCleanup?: () => Promise<void>;
29618
- }, AgentRunnerError>>;
29619
- /**
29620
- * Minimal writer interface for runSingleInstance.
29621
- * Both PredictionWriter and LockedWriter satisfy this.
29622
- */
29623
- interface IBenchmarkWriter {
29624
- writeResult(result: Parameters<PredictionWriter['writeResult']>[0]): Promise<Result<boolean, PredictionWriteError>>;
29625
- }
29626
- /** Options for running a single benchmark instance. */
29627
- interface SingleInstanceOptions {
29628
- readonly instance: SWEBenchInstance;
29629
- readonly executor: ExecutorWithModel;
29630
- readonly config: SWEBenchConfig;
29631
- readonly writer: IBenchmarkWriter;
29632
- readonly verbose: boolean;
29633
- readonly systemPrompt?: string;
29634
- /**
29635
- * Optional post-patch verify adapter (#2032). When present, runs the
29636
- * instance's test suite after each successful patch; on failure,
29637
- * feeds a retry hint back to the agent for up to `maxVerifyRetries`
29638
- * iterations. Construct via `createHarnessVerifyAdapter`.
29639
- */
29640
- readonly verifyAdapter?: IVerifyAdapter;
29641
- /** Override max verify retries (default 2). */
29642
- readonly maxVerifyRetries?: number;
29643
- }
29644
- /** Run single instance and handle result. */
29645
- declare function runSingleInstance(opts: SingleInstanceOptions): Promise<{
29646
- completed: boolean;
29647
- tokens: number;
29648
- }>;
29649
- /**
29650
- * Run all instances and write predictions.
29651
- */
29652
- declare function runBenchmarkInstances(executor: ExecutorWithModel, options: BenchmarkRunOptions): Promise<BenchmarkRunResult>;
29653
-
29654
- /**
29655
- * nexus-agents/swe-bench - Parallel Benchmark Runner
29656
- *
29657
- * Runs SWE-bench instances concurrently with thread-safe prediction writes
29658
- * and per-slot isolated work directories.
29659
- *
29660
- * @module swe-bench/parallel-runner
29661
- * (Source: Issue #257 - SWE-Bench Evaluation)
29662
- */
29663
-
29664
- /**
29665
- * Thread-safe wrapper around PredictionWriter.
29666
- * Uses a promise chain as a mutex to serialize writes.
29667
- * Implements IBenchmarkWriter for use with runSingleInstance.
29668
- */
29669
- declare class LockedWriter implements IBenchmarkWriter {
29670
- private readonly writer;
29671
- private chain;
29672
- constructor(writer: PredictionWriter);
29673
- /** Serialized writeResult — concurrent calls queue behind previous ones. */
29674
- writeResult(result: Parameters<PredictionWriter['writeResult']>[0]): ReturnType<PredictionWriter['writeResult']>;
29675
- getPredictionCount(): number;
29676
- }
29677
- /** Stats accumulated across all workers. */
29678
- interface ParallelStats {
29679
- completed: number;
29680
- failed: number;
29681
- tokensUsed: number;
29682
- }
29683
- /** Memory context for enriching parallel worker prompts. */
29684
- interface ParallelMemoryContext {
29685
- readonly memory: SessionMemory;
29686
- /** Initial learnings snapshot — use refreshLearnings() for live updates. */
29687
- readonly learnings: readonly SessionLearning[];
29688
- }
29689
- /** Options for parallel benchmark execution. */
29690
- interface ParallelRunOptions {
29691
- readonly executor: ExecutorWithModel;
29692
- readonly instances: readonly SWEBenchInstance[];
29693
- readonly config: SWEBenchConfig;
29694
- readonly outputPath: string;
29695
- readonly append: boolean;
29696
- readonly verbose: boolean;
29697
- readonly concurrency: number;
29698
- readonly memCtx?: ParallelMemoryContext | null;
29699
- }
29700
- /**
29701
- * Runs benchmark instances in parallel with N concurrent workers.
29702
- *
29703
- * Each worker gets an isolated work directory (`slot-0`, `slot-1`, etc.)
29704
- * to prevent repository clone collisions. Prediction writes are serialized
29705
- * via LockedWriter to prevent JSONL interleaving.
29706
- */
29707
- declare function runBenchmarkParallel(opts: ParallelRunOptions): Promise<ParallelStats>;
29708
-
29709
- /**
29710
- * nexus-agents/swe-bench - Instance Priority Sorter
29711
- *
29712
- * Sorts SWE-bench instances by estimated difficulty to maximize early
29713
- * throughput. Easier instances run first, producing results faster and
29714
- * enabling early stopping strategies.
29715
- *
29716
- * Scoring factors:
29717
- * 1. Repository complexity (Flask=1, Django=7, SymPy=9)
29718
- * 2. Problem statement length (proxy for issue complexity)
29719
- * 3. Past success rates from memory (when available)
29720
- *
29721
- * @module swe-bench/instance-sorter
29722
- * (Source: Issue #1407 - SWE-bench parallel execution)
29723
- */
29724
-
29725
- /**
29726
- * Relative complexity scores for SWE-bench Lite repositories.
29727
- * Based on codebase size, framework complexity, and historical solve rates.
29728
- * Lower = easier. Scale: 1-10.
29729
- */
29730
- declare const REPO_COMPLEXITY: Record<string, number>;
29731
- /** Options for priority sorting. */
29732
- interface SortOptions {
29733
- /** Map of instance_id -> success rate (0.0-1.0) from past runs. */
29734
- readonly pastSuccessRates?: ReadonlyMap<string, number>;
29735
- }
29736
- /**
29737
- * Estimate difficulty score for an instance (lower = easier).
29738
- * Range: roughly 0-15 without memory, 0-20 with memory penalties.
29739
- */
29740
- declare function estimateDifficulty(instance: SWEBenchInstance, options?: SortOptions): number;
29741
- /**
29742
- * Sort instances by estimated difficulty (easiest first).
29743
- * Returns a new array; does not modify the input.
29744
- */
29745
- declare function sortByPriority(instances: readonly SWEBenchInstance[], options?: SortOptions): SWEBenchInstance[];
29746
-
29747
- /**
29748
- * nexus-agents/swe-bench - Memory Enrichment
29749
- *
29750
- * Integrates nexus-agents' session memory into SWE-bench agent prompts.
29751
- * Records per-instance outcomes and injects relevant learnings from
29752
- * prior runs into system prompts for future attempts.
29753
- *
29754
- * @module swe-bench/memory-enrichment
29755
- * (Source: Issue #257 - SWE-Bench Evaluation)
29756
- */
29757
-
29758
- /**
29759
- * Create a session memory instance for SWE-bench runs.
29760
- */
29761
- declare function createBenchmarkMemory(memoryDir?: string): SessionMemory;
29762
- /**
29763
- * Extract repo name from instance ID (e.g., "django__django-12345" -> "django/django").
29764
- */
29765
- declare function extractRepoName(instanceId: string): string;
29766
- /**
29767
- * Build an enriched system prompt by injecting relevant learnings
29768
- * from past SWE-bench runs.
29769
- */
29770
- declare function buildEnrichedPrompt(learnings: readonly SessionLearning[], instance: SWEBenchInstance): string;
29771
- /**
29772
- * Record the outcome of a SWE-bench instance for future learning.
29773
- */
29774
- declare function recordOutcome(memory: SessionMemory, instance: SWEBenchInstance, result: SWEBenchRunResult): void;
29775
- /**
29776
- * Extract past success rates from memory learnings.
29777
- * Returns a Map of instance_id -> success rate (1.0 = solved, 0.0 = failed).
29778
- * Used by instance-sorter to prioritize easier instances.
29779
- */
29780
- declare function extractPastSuccessRates(learnings: readonly SessionLearning[]): Map<string, number>;
29781
-
29782
- /**
29783
- * nexus-agents/swe-bench - Evaluation Comparison Types
29784
- *
29785
- * Comparison and leaderboard types for competitor analysis.
29786
- *
29787
- * @module swe-bench/evaluation-comparison-types
29788
- * @see https://www.swebench.com/SWE-bench/guides/evaluation/
29789
- * (Source: Issue #257 - SWE-Bench Evaluation)
29790
- */
29791
-
29792
- /**
29793
- * Known competitor systems for comparison.
29794
- */
29795
- type CompetitorSystem = 'devin' | 'aider' | 'claude-code' | 'cursor' | 'codex' | 'gpt-engineer' | 'auto-gpt' | 'other';
29796
- /**
29797
- * Comparison data point for a competitor.
29798
- */
29799
- interface CompetitorResult {
29800
- /** Competitor system name. */
29801
- readonly system: CompetitorSystem;
29802
- /** Display name. */
29803
- readonly displayName: string;
29804
- /** SWE-bench variant evaluated. */
29805
- readonly variant: SWEBenchVariant;
29806
- /** Resolution rate achieved. */
29807
- readonly resolutionRate: number;
29808
- /** Number of instances resolved. */
29809
- readonly resolvedInstances: number;
29810
- /** Total instances evaluated. */
29811
- readonly totalInstances: number;
29812
- /** Average tokens per instance (if available). */
29813
- readonly avgTokensPerInstance?: number;
29814
- /** Average cost per instance (if available). */
29815
- readonly avgCostPerInstance?: number;
29816
- /** Data source URL. */
29817
- readonly sourceUrl?: string;
29818
- /** Date of the result. */
29819
- readonly resultDate: string;
29820
- }
29821
- /**
29822
- * Comparison report between nexus-agents and competitors.
29823
- */
29824
- interface ComparisonReport {
29825
- /** nexus-agents result. */
29826
- readonly nexusResult: EvaluationRunResult;
29827
- /** Competitor results for comparison. */
29828
- readonly competitors: readonly CompetitorResult[];
29829
- /** Ranking among competitors. */
29830
- readonly ranking: number;
29831
- /** Total systems compared. */
29832
- readonly totalSystems: number;
29833
- /** Report generation timestamp. */
29834
- readonly generatedAt: string;
29835
- }
29836
- /**
29837
- * Leaderboard entry for a model/system.
29838
- */
29839
- interface LeaderboardEntry {
29840
- /** Rank on leaderboard. */
29841
- readonly rank: number;
29842
- /** System/model name. */
29843
- readonly modelName: string;
29844
- /** Organization/team. */
29845
- readonly organization?: string;
29846
- /** Resolution rate on SWE-bench Lite. */
29847
- readonly liteResolutionRate?: number;
29848
- /** Resolution rate on SWE-bench Verified. */
29849
- readonly verifiedResolutionRate?: number;
29850
- /** Resolution rate on full SWE-bench. */
29851
- readonly fullResolutionRate?: number;
29852
- /** Submission date. */
29853
- readonly submissionDate: string;
29854
- /** Whether this is an agent system vs. single-turn model. */
29855
- readonly isAgentSystem: boolean;
29856
- /** Source/paper URL. */
29857
- readonly sourceUrl?: string;
29858
- }
29859
- /**
29860
- * Snapshot of the SWE-bench leaderboard.
29861
- */
29862
- interface LeaderboardSnapshot {
29863
- /** When this snapshot was taken. */
29864
- readonly snapshotDate: string;
29865
- /** Entries sorted by rank. */
29866
- readonly entries: readonly LeaderboardEntry[];
29867
- /** Source URL for the leaderboard. */
29868
- readonly sourceUrl: string;
29869
- }
29870
-
29871
- /**
29872
- * nexus-agents/swe-bench - Evaluation Statistics Types
29873
- *
29874
- * Statistical summary and metrics types for evaluation reports.
29875
- *
29876
- * @module swe-bench/evaluation-statistics-types
29877
- * (Source: Issue #257 - SWE-Bench Evaluation)
29878
- */
29879
- /**
29880
- * Statistical summary with distribution info.
29881
- */
29882
- interface StatisticalSummary {
29883
- /** Minimum value. */
29884
- readonly min: number;
29885
- /** Maximum value. */
29886
- readonly max: number;
29887
- /** Mean (average). */
29888
- readonly mean: number;
29889
- /** Median (50th percentile). */
29890
- readonly median: number;
29891
- /** Standard deviation. */
29892
- readonly stdDev: number;
29893
- /** 25th percentile. */
29894
- readonly p25: number;
29895
- /** 75th percentile. */
29896
- readonly p75: number;
29897
- /** 90th percentile. */
29898
- readonly p90: number;
29899
- /** 95th percentile. */
29900
- readonly p95: number;
29901
- /** Sample count. */
29902
- readonly count: number;
29903
- }
29904
- /**
29905
- * Timing statistics for evaluation.
29906
- */
29907
- interface TimingStatistics {
29908
- /** Per-instance duration stats (ms). */
29909
- readonly instanceDuration: StatisticalSummary;
29910
- /** Total wall-clock time (ms). */
29911
- readonly totalWallTime: number;
29912
- /** Total CPU time (ms). */
29913
- readonly totalCpuTime?: number;
29914
- /** Time spent applying patches (ms). */
29915
- readonly patchApplicationTime: number;
29916
- /** Time spent running tests (ms). */
29917
- readonly testExecutionTime: number;
29918
- }
29919
- /**
29920
- * Resource usage statistics.
29921
- */
29922
- interface ResourceStatistics {
29923
- /** Peak memory usage (bytes). */
29924
- readonly peakMemory: number;
29925
- /** Average memory usage (bytes). */
29926
- readonly avgMemory: number;
29927
- /** Total disk space used (bytes). */
29928
- readonly diskSpaceUsed: number;
29929
- /** Number of Docker containers created. */
29930
- readonly containersCreated: number;
29931
- }
29932
-
29933
- /**
29934
- * nexus-agents/swe-bench - Evaluation Failure Types
29935
- *
29936
- * Failure analysis types for evaluation reports.
29937
- *
29938
- * @module swe-bench/evaluation-failure-types
29939
- * (Source: Issue #257 - SWE-Bench Evaluation)
29940
- */
29941
- /**
29942
- * Categories of failures for analysis.
29943
- */
29944
- type FailureCategory = 'patch_not_applicable' | 'test_failure' | 'syntax_error' | 'runtime_error' | 'timeout' | 'missing_dependency' | 'wrong_file_modified' | 'incomplete_fix' | 'regression_introduced' | 'unknown';
29945
- /**
29946
- * Failure analysis for an instance.
29947
- */
29948
- interface FailureAnalysis {
29949
- /** Instance ID. */
29950
- readonly instanceId: string;
29951
- /** Primary failure category. */
29952
- readonly category: FailureCategory;
29953
- /** Detailed error message. */
29954
- readonly errorMessage: string;
29955
- /** Affected file(s). */
29956
- readonly affectedFiles: readonly string[];
29957
- /** Suggested fix approach (if determinable). */
29958
- readonly suggestedApproach?: string;
29959
- /** Similarity to other failures (for clustering). */
29960
- readonly similarFailures?: readonly string[];
29961
- }
29962
- /**
29963
- * A pattern of recurring failures.
29964
- */
29965
- interface FailurePattern {
29966
- /** Pattern description. */
29967
- readonly description: string;
29968
- /** Number of occurrences. */
29969
- readonly occurrences: number;
29970
- /** Example instance IDs. */
29971
- readonly examples: readonly string[];
29972
- /** Potential root cause. */
29973
- readonly potentialCause?: string;
29974
- }
29975
- /**
29976
- * Aggregate failure statistics.
29977
- */
29978
- interface FailureStatistics {
29979
- /** Breakdown by failure category. */
29980
- readonly byCategory: Record<FailureCategory, number>;
29981
- /** Most common failure patterns. */
29982
- readonly commonPatterns: readonly FailurePattern[];
29983
- /** Failures by repository. */
29984
- readonly byRepository: Record<string, number>;
29985
- }
29986
-
29987
- /**
29988
- * nexus-agents/swe-bench - Evaluation Cost Types
29989
- *
29990
- * Token usage and cost estimation types for evaluation reports.
29991
- *
29992
- * @module swe-bench/evaluation-cost-types
29993
- * (Source: Issue #257 - SWE-Bench Evaluation)
29994
- */
29995
-
29996
- /**
29997
- * Token usage by evaluation phase.
29998
- */
29999
- interface TokensByPhase {
30000
- /** Exploration/reading phase. */
30001
- readonly exploration: number;
30002
- /** Planning phase. */
30003
- readonly planning: number;
30004
- /** Implementation phase. */
30005
- readonly implementation: number;
30006
- /** Retry/iteration phase. */
30007
- readonly retry: number;
30008
- }
30009
- /**
30010
- * Token usage breakdown.
30011
- */
30012
- interface TokenUsageBreakdown {
30013
- /** Total input tokens. */
30014
- readonly totalInputTokens: number;
30015
- /** Total output tokens. */
30016
- readonly totalOutputTokens: number;
30017
- /** Total tokens. */
30018
- readonly totalTokens: number;
30019
- /** Per-instance token stats. */
30020
- readonly perInstance: StatisticalSummary;
30021
- /** Tokens by phase. */
30022
- readonly byPhase: TokensByPhase;
30023
- }
30024
- /**
30025
- * Model pricing information.
30026
- */
30027
- interface ModelPricing {
30028
- /** Model name. */
30029
- readonly modelName: string;
30030
- /** Price per 1M input tokens (USD). */
30031
- readonly inputPricePerMillion: number;
30032
- /** Price per 1M output tokens (USD). */
30033
- readonly outputPricePerMillion: number;
30034
- /** Price effective date. */
30035
- readonly priceDate: string;
30036
- }
30037
- /**
30038
- * Cost estimation for the evaluation.
30039
- */
30040
- interface CostEstimate$1 {
30041
- /** Total estimated cost (USD). */
30042
- readonly totalCostUsd: number;
30043
- /** Cost per instance (USD). */
30044
- readonly perInstanceCostUsd: number;
30045
- /** Cost per resolved instance (USD). */
30046
- readonly perResolvedInstanceCostUsd: number;
30047
- /** Model pricing used for estimate. */
30048
- readonly pricingModel: ModelPricing;
30049
- }
30050
-
30051
- /**
30052
- * nexus-agents/swe-bench - Evaluation Report Core Types
30053
- *
30054
- * Core report structure types for evaluation reports.
30055
- *
30056
- * @module swe-bench/evaluation-report-core-types
30057
- * (Source: Issue #257 - SWE-Bench Evaluation)
30058
- */
30059
-
30060
- /**
30061
- * Output format for evaluation reports.
30062
- */
30063
- type ReportFormat = 'json' | 'markdown' | 'html' | 'csv';
30064
- /**
30065
- * Detail level for reports.
30066
- */
30067
- type ReportDetailLevel = 'summary' | 'standard' | 'detailed' | 'verbose';
30068
- /**
30069
- * Configuration for report generation.
30070
- */
30071
- interface ReportConfig {
30072
- /** Output format. */
30073
- readonly format: ReportFormat;
30074
- /** Detail level. */
30075
- readonly detailLevel: ReportDetailLevel;
30076
- /** Include per-instance breakdown. */
30077
- readonly includeInstanceDetails: boolean;
30078
- /** Include competitor comparison. */
30079
- readonly includeComparison: boolean;
30080
- /** Include charts/visualizations (for HTML). */
30081
- readonly includeCharts: boolean;
30082
- /** Output file path. */
30083
- readonly outputPath: string;
30084
- /** Report title. */
30085
- readonly title?: string;
30086
- }
30087
- /**
30088
- * Default report configuration.
30089
- */
30090
- declare const DEFAULT_REPORT_CONFIG: ReportConfig;
30091
- /**
30092
- * Summary section of the report.
30093
- */
30094
- interface ReportSummary {
30095
- /** Headline metric: resolution rate. */
30096
- readonly resolutionRate: number;
30097
- /** Instances resolved. */
30098
- readonly resolvedCount: number;
30099
- /** Total instances evaluated. */
30100
- readonly totalCount: number;
30101
- /** Ranking vs competitors (if compared). */
30102
- readonly ranking?: number;
30103
- /** Key highlights. */
30104
- readonly highlights: readonly string[];
30105
- /** Areas needing improvement. */
30106
- readonly improvementAreas: readonly string[];
30107
- }
30108
- /**
30109
- * Detailed metrics section.
30110
- */
30111
- interface ReportMetrics {
30112
- /** Core evaluation metrics. */
30113
- readonly evaluation: EvaluationMetrics;
30114
- /** Timing statistics. */
30115
- readonly timing: TimingStatistics;
30116
- /** Resource usage. */
30117
- readonly resources: ResourceStatistics;
30118
- /** Token usage (if tracked). */
30119
- readonly tokens?: TokenUsageBreakdown;
30120
- /** Cost estimate (if calculable). */
30121
- readonly cost?: CostEstimate$1;
30122
- }
30123
- /**
30124
- * Repository breakdown section.
30125
- */
30126
- interface ReportRepositoryBreakdown {
30127
- /** Per-repository metrics. */
30128
- readonly repositories: readonly RepositoryMetrics[];
30129
- /** Best performing repository. */
30130
- readonly bestRepository: RepositoryMetrics;
30131
- /** Worst performing repository. */
30132
- readonly worstRepository: RepositoryMetrics;
30133
- /** Variance in performance across repos. */
30134
- readonly performanceVariance: number;
30135
- }
30136
- /**
30137
- * Competitor comparison section.
30138
- */
30139
- interface ReportComparison {
30140
- /** Competitor results. */
30141
- readonly competitors: readonly CompetitorResult[];
30142
- /** nexus-agents ranking. */
30143
- readonly nexusRanking: number;
30144
- /** Resolution rate difference from top system. */
30145
- readonly gapFromTop: number;
30146
- /** Resolution rate difference from average. */
30147
- readonly differenceFromAverage: number;
30148
- /** Areas where nexus-agents excels. */
30149
- readonly strengths: readonly string[];
30150
- /** Areas where nexus-agents lags. */
30151
- readonly weaknesses: readonly string[];
30152
- }
30153
- /**
30154
- * Instance-level details section.
30155
- */
30156
- interface ReportInstanceDetails {
30157
- /** Resolved instances. */
30158
- readonly resolved: readonly InstanceEvaluationResult[];
30159
- /** Unresolved instances with failure analysis. */
30160
- readonly unresolved: readonly FailureAnalysis[];
30161
- /** Grouped by failure category. */
30162
- readonly byFailureCategory: Record<FailureCategory, readonly FailureAnalysis[]>;
30163
- }
30164
- /**
30165
- * Report metadata.
30166
- */
30167
- interface ReportMetadata {
30168
- /** Report title. */
30169
- readonly title: string;
30170
- /** Report generation timestamp. */
30171
- readonly generatedAt: string;
30172
- /** Dataset variant. */
30173
- readonly variant: SWEBenchVariant;
30174
- /** Model evaluated. */
30175
- readonly modelName: string;
30176
- /** nexus-agents version. */
30177
- readonly nexusVersion: string;
30178
- /** Report version/format. */
30179
- readonly reportVersion: string;
30180
- }
30181
- /**
30182
- * Complete evaluation report.
30183
- */
30184
- interface EvaluationReport {
30185
- /** Report metadata. */
30186
- readonly metadata: ReportMetadata;
30187
- /** Executive summary. */
30188
- readonly summary: ReportSummary;
30189
- /** Detailed metrics. */
30190
- readonly metrics: ReportMetrics;
30191
- /** Repository breakdown. */
30192
- readonly repositoryBreakdown: ReportRepositoryBreakdown;
30193
- /** Failure analysis. */
30194
- readonly failureAnalysis: FailureStatistics;
30195
- /** Competitor comparison (if included). */
30196
- readonly comparison?: ReportComparison;
30197
- /** Instance details (if included). */
30198
- readonly instanceDetails?: ReportInstanceDetails;
30199
- /** Raw evaluation result. */
30200
- readonly rawResult: EvaluationRunResult;
30201
- }
30202
- /**
30203
- * Interface for report generators.
30204
- */
30205
- interface IReportGenerator {
30206
- /**
30207
- * Generates a report from evaluation results.
30208
- */
30209
- generate(result: EvaluationRunResult, config: ReportConfig, competitors?: readonly CompetitorResult[]): Promise<EvaluationReport>;
30210
- /**
30211
- * Renders report to the specified format.
30212
- */
30213
- render(report: EvaluationReport, format: ReportFormat): Promise<string>;
30214
- /**
30215
- * Saves report to file.
30216
- */
30217
- save(report: EvaluationReport, config: ReportConfig): Promise<void>;
30218
- }
30219
- /**
30220
- * Error for report generation failures.
30221
- */
30222
- declare class ReportGenerationError extends Error {
30223
- readonly cause?: unknown;
30224
- constructor(message: string, cause?: unknown);
30225
- }
30226
-
30227
- /**
30228
- * nexus-agents/swe-bench - SWE-Bench Runner Types
30229
- *
30230
- * Type definitions for the SWE-bench runner module.
30231
- * Includes error types, progress tracking, and configuration interfaces.
30232
- *
30233
- * @module swe-bench/swe-bench-runner-types
30234
- * (Source: Issue #257 - SWE-Bench Evaluation)
30235
- */
30236
-
30237
- /**
30238
- * Error codes for runner failures.
30239
- */
30240
- type RunnerErrorCode = 'DATASET_LOAD_FAILED' | 'EXECUTOR_NOT_SET' | 'RUN_ABORTED' | 'CHECKPOINT_ERROR' | 'IO_ERROR' | 'UNKNOWN';
30241
- /**
30242
- * Error for runner operations.
30243
- */
30244
- declare class SWEBenchRunnerError extends Error {
30245
- readonly cause?: unknown;
30246
- readonly code: RunnerErrorCode;
30247
- constructor(message: string, code: RunnerErrorCode, cause?: unknown);
30248
- }
30249
- /**
30250
- * Progress information during a run.
30251
- */
30252
- interface RunProgress {
30253
- /** Current instance index (0-based). */
30254
- readonly currentIndex: number;
30255
- /** Total instances to process. */
30256
- readonly totalInstances: number;
30257
- /** Current instance ID. */
30258
- readonly currentInstanceId: string;
30259
- /** Number of completed instances. */
30260
- readonly completed: number;
30261
- /** Number of failed instances. */
30262
- readonly failed: number;
30263
- /** Total tokens used so far. */
30264
- readonly tokensUsed: number;
30265
- /** Elapsed time in milliseconds. */
30266
- readonly elapsedMs: number;
30267
- /** Estimated remaining time in milliseconds. */
30268
- readonly estimatedRemainingMs: number;
30269
- /** Current resolution rate. */
30270
- readonly resolutionRate: number;
30271
- }
30272
- /**
30273
- * Progress callback type.
30274
- */
30275
- type ProgressCallback = (progress: RunProgress) => void;
30276
- /**
30277
- * Configuration for the runner.
30278
- */
30279
- interface RunnerConfig {
30280
- /** SWE-bench configuration. */
30281
- readonly benchConfig: SWEBenchConfig;
30282
- /** Dataset load options. */
30283
- readonly loadOptions?: DatasetLoadOptions;
30284
- /** Model name for predictions. */
30285
- readonly modelName: string;
30286
- /** Whether to resume from checkpoint. */
30287
- readonly resume: boolean;
30288
- /** Checkpoint file path (if resuming). */
30289
- readonly checkpointPath?: string;
30290
- /** Progress callback. */
30291
- readonly onProgress?: ProgressCallback;
30292
- /** Message callback. */
30293
- readonly onMessage?: (message: string) => void;
30294
- /** Abort signal. */
30295
- readonly signal?: AbortSignal;
30296
- }
30297
-
30298
- /**
30299
- * nexus-agents/swe-bench - SWE-Bench Runner
30300
- *
30301
- * Main runner class for executing SWE-bench evaluations.
30302
- * Coordinates dataset loading, agent execution, and result collection.
30303
- *
30304
- * @module swe-bench/swe-bench-runner
30305
- * (Source: Issue #257 - SWE-Bench Evaluation)
30306
- */
30307
-
30308
- /**
30309
- * Main runner for SWE-bench evaluations.
30310
- */
30311
- declare class SWEBenchRunner {
30312
- private executor;
30313
- private readonly config;
30314
- constructor(config?: Partial<RunnerConfig>);
30315
- /**
30316
- * Sets the agent executor to use.
30317
- */
30318
- setExecutor(executor: IAgentExecutor): void;
30319
- /**
30320
- * Gets the current configuration.
30321
- */
30322
- getConfig(): RunnerConfig;
30323
- /**
30324
- * Loads instances from the dataset.
30325
- */
30326
- loadInstances(variant?: SWEBenchVariant): Promise<Result<readonly SWEBenchInstance[], SWEBenchRunnerError>>;
30327
- /**
30328
- * Loads checkpoint if resuming.
30329
- */
30330
- loadCheckpoint(): Promise<Result<SWEBenchCheckpoint | null, SWEBenchRunnerError>>;
30331
- /**
30332
- * Saves checkpoint.
30333
- */
30334
- saveCheckpoint(completedIds: readonly string[]): Promise<Result<void, SWEBenchRunnerError>>;
30335
- /**
30336
- * Runs on a single instance.
30337
- */
30338
- private runInstance;
30339
- /**
30340
- * Resolves instances to process - loads from dataset if not provided.
30341
- */
30342
- private resolveInstances;
30343
- /**
30344
- * Prepares run state with checkpoint data.
30345
- */
30346
- private prepareRunState;
30347
- /**
30348
- * Processes a single instance in the run loop.
30349
- */
30350
- private processInstance;
30351
- /**
30352
- * Executes the benchmark run.
30353
- */
30354
- run(instances?: readonly SWEBenchInstance[]): Promise<Result<SWEBenchRunResult[], SWEBenchRunnerError>>;
30355
- /**
30356
- * Runs and writes predictions to a file.
30357
- */
30358
- runAndWrite(instances?: readonly SWEBenchInstance[]): Promise<Result<SWEBenchSummary, SWEBenchRunnerError>>;
30359
- /**
30360
- * Calculates summary statistics from results.
30361
- */
30362
- calculateSummary(results: readonly SWEBenchRunResult[]): SWEBenchSummary;
30363
- }
30364
- /**
30365
- * Creates a runner with the given configuration.
30366
- */
30367
- declare function createRunner(config?: Partial<RunnerConfig>): SWEBenchRunner;
30368
- /**
30369
- * Creates a runner for a specific variant.
30370
- */
30371
- declare function createVariantRunner(variant: SWEBenchVariant, options?: Partial<Omit<RunnerConfig, 'benchConfig'>>): SWEBenchRunner;
30372
- /**
30373
- * Quick run for testing with limited instances.
30374
- */
30375
- declare function quickRun(executor: IAgentExecutor, variant?: SWEBenchVariant, limit?: number): Promise<Result<SWEBenchSummary, SWEBenchRunnerError>>;
30376
-
30377
- /**
30378
- * nexus-agents/swe-bench - Environment Validator Types
30379
- *
30380
- * Type definitions and constants for SWE-bench environment validation.
30381
- *
30382
- * @module swe-bench/environment-validator-types
30383
- * (Source: Issue #257 - SWE-Bench Evaluation)
30384
- */
30385
- /**
30386
- * Python environment validation result.
30387
- */
30388
- interface PythonValidation {
30389
- /** Whether a compatible Python is available. */
30390
- readonly available: boolean;
30391
- /** Python version string (e.g., "3.10.12"). */
30392
- readonly version?: string;
30393
- /** Path to the Python executable. */
30394
- readonly path?: string;
30395
- }
30396
- /**
30397
- * SWE-bench package validation result.
30398
- */
30399
- interface SwebenchValidation {
30400
- /** Whether swebench package is installed. */
30401
- readonly installed: boolean;
30402
- /** swebench version string. */
30403
- readonly version?: string;
30404
- }
30405
- /**
30406
- * Docker environment validation result.
30407
- */
30408
- interface DockerValidation {
30409
- /** Whether Docker daemon is running. */
30410
- readonly running: boolean;
30411
- /** Docker version string. */
30412
- readonly version?: string;
30413
- }
30414
- /**
30415
- * Disk space validation result.
30416
- */
30417
- interface DiskSpaceValidation {
30418
- /** Available disk space in bytes. */
30419
- readonly available: number;
30420
- /** Whether disk space is sufficient (>= 120GB). */
30421
- readonly sufficient: boolean;
30422
- }
30423
- /**
30424
- * Complete environment validation result.
30425
- */
30426
- interface EnvironmentValidationResult {
30427
- /** Whether the environment is valid for SWE-bench evaluation. */
30428
- readonly valid: boolean;
30429
- /** Python environment validation. */
30430
- readonly python: PythonValidation;
30431
- /** SWE-bench package validation. */
30432
- readonly swebench: SwebenchValidation;
30433
- /** Docker environment validation. */
30434
- readonly docker: DockerValidation;
30435
- /** Disk space validation. */
30436
- readonly diskSpace: DiskSpaceValidation;
30437
- /** Critical errors that prevent evaluation. */
30438
- readonly errors: readonly string[];
30439
- /** Non-critical warnings. */
30440
- readonly warnings: readonly string[];
30441
- }
30442
-
30443
- /**
30444
- * nexus-agents/swe-bench - Environment Validator Checks
30445
- *
30446
- * Individual validation functions for SWE-bench environment requirements:
30447
- * - Python 3.10 or 3.11 (not 3.12+ due to swebench compatibility)
30448
- * - swebench package installed
30449
- * - Docker daemon running
30450
- * - Sufficient disk space (120GB recommended)
30451
- *
30452
- * @module swe-bench/environment-validator-checks
30453
- * (Source: Issue #257 - SWE-Bench Evaluation)
30454
- */
30455
-
30456
- /**
30457
- * Validates Python environment.
30458
- *
30459
- * Checks for Python 3.10 or 3.11 (swebench doesn't support 3.12+).
30460
- * Tries python3, python3.11, python3.10 in order.
30461
- */
30462
- declare function validatePython(logger?: ILogger): Promise<PythonValidation>;
30463
- /**
30464
- * Validates swebench package installation.
30465
- */
30466
- declare function validateSwebench(logger?: ILogger): Promise<SwebenchValidation>;
30467
- /**
30468
- * Validates Docker environment.
30469
- */
30470
- declare function validateDocker(logger?: ILogger): Promise<DockerValidation>;
30471
- /**
30472
- * Validates available disk space.
30473
- */
30474
- declare function validateDiskSpace(logger?: ILogger): Promise<DiskSpaceValidation>;
30475
-
30476
- /**
30477
- * nexus-agents/swe-bench - Environment Validator
30478
- *
30479
- * Validates that the local environment meets SWE-bench evaluation requirements:
30480
- * - Python 3.10 or 3.11 (not 3.12+ due to swebench compatibility)
30481
- * - swebench package installed
30482
- * - Docker daemon running
30483
- * - Sufficient disk space (120GB recommended)
30484
- *
30485
- * @module swe-bench/environment-validator
30486
- * (Source: Issue #257 - SWE-Bench Evaluation)
30487
- */
30488
-
30489
- /**
30490
- * Validates the complete environment for SWE-bench evaluation.
30491
- */
30492
- declare function validateEnvironment(logger?: ILogger): Promise<EnvironmentValidationResult>;
30493
- /**
30494
- * Formats validation result for display.
30495
- */
30496
- declare function formatValidationResult(result: EnvironmentValidationResult): string;
30497
-
30498
- /**
30499
- * nexus-agents/swe-bench - Harness Executor Types
30500
- *
30501
- * Type definitions and constants for SWE-bench harness execution.
30502
- *
30503
- * @module swe-bench/harness-executor-types
30504
- * @see https://www.swebench.com/SWE-bench/guides/evaluation/
30505
- * (Source: Issue #257 - SWE-Bench Evaluation)
30506
- */
30507
-
30508
- /**
30509
- * Configuration for a single harness execution.
30510
- */
30511
- interface HarnessExecutionConfig {
30512
- /** Path to predictions JSONL file. */
30513
- readonly predictionsPath: string;
30514
- /** SWE-bench dataset name/variant. */
30515
- readonly datasetName: SWEBenchVariant;
30516
- /** Maximum number of parallel workers. */
30517
- readonly maxWorkers: number;
30518
- /** Unique run identifier. */
30519
- readonly runId: string;
30520
- /** Timeout per instance in seconds. */
30521
- readonly timeoutSeconds: number;
30522
- /** Output directory for logs and results. */
30523
- readonly outputDir: string;
30524
- /** Optional specific instance IDs to evaluate. */
30525
- readonly instanceIds?: readonly string[];
30526
- /** Whether to use Docker-based execution. */
30527
- readonly useDocker: boolean;
30528
- /** Docker cache level. */
30529
- readonly cacheLevel: 'none' | 'base' | 'env' | 'instance';
30530
- }
30531
- /**
30532
- * Default harness execution configuration.
30533
- */
30534
- declare const DEFAULT_HARNESS_EXECUTION_CONFIG: HarnessExecutionConfig;
30535
- /**
30536
- * Raw test result from harness output.
30537
- */
30538
- interface RawTestResult {
30539
- readonly test_name: string;
30540
- readonly status: 'PASSED' | 'FAILED' | 'ERROR' | 'SKIPPED' | 'TIMEOUT';
30541
- readonly duration_ms?: number;
30542
- readonly error_message?: string;
30543
- readonly stack_trace?: string;
30544
- }
30545
- /**
30546
- * Raw instance result from harness output.
30547
- */
30548
- interface RawInstanceResult {
30549
- readonly instance_id: string;
30550
- readonly model_name_or_path: string;
30551
- readonly resolved: boolean;
30552
- readonly patch_applied: boolean;
30553
- readonly patch_error?: string;
30554
- readonly tests_passed: number;
30555
- readonly tests_failed: number;
30556
- readonly tests_total: number;
30557
- readonly test_results?: readonly RawTestResult[];
30558
- readonly duration_ms: number;
30559
- readonly log_path?: string;
30560
- readonly container_id?: string;
30561
- }
30562
- /**
30563
- * Raw harness execution output.
30564
- */
30565
- interface RawHarnessOutput {
30566
- readonly run_id: string;
30567
- readonly dataset_name: string;
30568
- readonly model_name_or_path: string;
30569
- readonly started_at: string;
30570
- readonly completed_at: string;
30571
- readonly total_instances: number;
30572
- readonly predicted_instances: number;
30573
- readonly resolved_instances: number;
30574
- readonly instance_results: readonly RawInstanceResult[];
30575
- readonly harness_version?: string;
30576
- readonly errors?: readonly string[];
30577
- }
30578
- /**
30579
- * Harness execution state.
30580
- */
30581
- type HarnessExecutionState = 'idle' | 'starting' | 'running' | 'parsing' | 'completed' | 'failed' | 'cancelled';
30582
- /**
30583
- * Progress information during harness execution.
30584
- */
30585
- interface HarnessExecutionProgress {
30586
- /** Current execution state. */
30587
- readonly state: HarnessExecutionState;
30588
- /** Current instance being evaluated (if known). */
30589
- readonly currentInstanceId?: string;
30590
- /** Number of instances completed. */
30591
- readonly completedCount: number;
30592
- /** Total instances to evaluate. */
30593
- readonly totalCount: number;
30594
- /** Number resolved so far. */
30595
- readonly resolvedCount: number;
30596
- /** Elapsed time in milliseconds. */
30597
- readonly elapsedMs: number;
30598
- /** Estimated remaining time in milliseconds. */
30599
- readonly estimatedRemainingMs?: number;
30600
- /** Latest log line from harness. */
30601
- readonly latestLog?: string;
30602
- }
30603
- /**
30604
- * Callback for progress updates during execution.
30605
- */
30606
- type HarnessProgressCallback = (progress: HarnessExecutionProgress) => void;
30607
- /**
30608
- * Error codes for harness execution failures.
30609
- */
30610
- type HarnessErrorCode = 'HARNESS_NOT_FOUND' | 'PREDICTIONS_NOT_FOUND' | 'INVALID_PREDICTIONS' | 'EXECUTION_TIMEOUT' | 'EXECUTION_FAILED' | 'PARSE_ERROR' | 'DOCKER_ERROR' | 'CANCELLED' | 'UNKNOWN';
30611
- /**
30612
- * Error thrown during harness execution.
30613
- */
30614
- declare class HarnessExecutorError extends Error {
30615
- readonly cause?: unknown;
30616
- readonly code: HarnessErrorCode;
30617
- readonly details?: Record<string, unknown>;
30618
- constructor(message: string, code: HarnessErrorCode, cause?: unknown);
30619
- }
30620
- /**
30621
- * Interface for harness executor implementations.
30622
- */
30623
- interface IHarnessExecutor {
30624
- /**
30625
- * Validates that the harness is ready to execute.
30626
- */
30627
- validate(): Promise<HarnessValidationResult>;
30628
- /**
30629
- * Executes the SWE-bench harness on predictions.
30630
- */
30631
- execute(config: HarnessExecutionConfig, onProgress?: HarnessProgressCallback): Promise<HarnessExecutionResult>;
30632
- /**
30633
- * Executes evaluation for a single instance (for testing/debugging).
30634
- */
30635
- executeInstance(instanceId: string, config: HarnessExecutionConfig): Promise<InstanceEvaluationResult>;
30636
- /**
30637
- * Cancels an in-progress execution.
30638
- */
30639
- cancel(): Promise<void>;
30640
- /**
30641
- * Gets the harness version.
30642
- */
30643
- getVersion(): Promise<string>;
30644
- }
30645
- /**
30646
- * Result of harness validation.
30647
- */
30648
- interface HarnessValidationResult {
30649
- /** Whether the harness is ready. */
30650
- readonly ready: boolean;
30651
- /** Python available. */
30652
- readonly pythonAvailable: boolean;
30653
- /** Python version. */
30654
- readonly pythonVersion?: string;
30655
- /** swebench package installed. */
30656
- readonly swebenchInstalled: boolean;
30657
- /** swebench version. */
30658
- readonly swebenchVersion?: string;
30659
- /** Docker available (if required). */
30660
- readonly dockerAvailable: boolean;
30661
- /** Docker version. */
30662
- readonly dockerVersion?: string;
30663
- /** Validation errors. */
30664
- readonly errors: readonly string[];
30665
- }
30666
- /**
30667
- * Result of harness execution.
30668
- */
30669
- interface HarnessExecutionResult {
30670
- /** Whether execution completed successfully. */
30671
- readonly success: boolean;
30672
- /** Run identifier. */
30673
- readonly runId: string;
30674
- /** Dataset variant evaluated. */
30675
- readonly datasetName: SWEBenchVariant;
30676
- /** Model name. */
30677
- readonly modelNameOrPath: string;
30678
- /** Execution start time (ISO 8601). */
30679
- readonly startedAt: string;
30680
- /** Execution end time (ISO 8601). */
30681
- readonly completedAt: string;
30682
- /** Total instances in predictions. */
30683
- readonly totalInstances: number;
30684
- /** Instances successfully resolved. */
30685
- readonly resolvedInstances: number;
30686
- /** Resolution rate. */
30687
- readonly resolutionRate: number;
30688
- /** Per-instance results. */
30689
- readonly instanceResults: readonly InstanceEvaluationResult[];
30690
- /** Harness version used. */
30691
- readonly harnessVersion?: string;
30692
- /** Error message if failed. */
30693
- readonly error?: string;
30694
- /** Path to output logs. */
30695
- readonly logPath?: string;
30696
- }
30697
- /**
30698
- * Maps raw test status to typed TestStatus.
30699
- */
30700
- declare function mapTestStatus(raw: string): TestStatus;
30701
- /**
30702
- * Determines resolution status from raw result.
30703
- */
30704
- declare function mapResolutionStatus(raw: RawInstanceResult): ResolutionStatus;
30705
-
30706
- /**
30707
- * nexus-agents/swe-bench - Harness Executor Implementation
30708
- *
30709
- * Core implementation of the SWE-bench evaluation harness executor.
30710
- * Executes the official SWE-bench evaluation harness and parses results.
30711
- *
30712
- * @module swe-bench/harness-executor-impl
30713
- * @see https://www.swebench.com/SWE-bench/guides/evaluation/
30714
- * (Source: Issue #257 - SWE-Bench Evaluation)
30715
- */
30716
-
30717
- /**
30718
- * SWE-bench harness executor.
30719
- *
30720
- * Executes the official SWE-bench evaluation harness and parses results.
30721
- * Uses Docker containers to run test evaluations in isolated environments.
30722
- */
30723
- declare class HarnessExecutor implements IHarnessExecutor {
30724
- private readonly logger;
30725
- private currentProcess;
30726
- private isCancelled;
30727
- constructor(logger?: ILogger);
30728
- /**
30729
- * Validates that the environment is ready for harness execution.
30730
- */
30731
- validate(): Promise<HarnessValidationResult>;
30732
- /**
30733
- * Executes the SWE-bench harness on predictions.
30734
- */
30735
- execute(config: HarnessExecutionConfig, onProgress?: HarnessProgressCallback): Promise<HarnessExecutionResult>;
30736
- /**
30737
- * Prepares execution by validating environment and predictions.
30738
- */
30739
- private prepareExecution;
30740
- /**
30741
- * Runs the harness process.
30742
- */
30743
- private runHarness;
30744
- /**
30745
- * Builds the final execution result.
30746
- */
30747
- private buildExecutionResult;
30748
- /**
30749
- * Executes evaluation for a single instance.
30750
- */
30751
- executeInstance(instanceId: string, config: HarnessExecutionConfig): Promise<InstanceEvaluationResult>;
30752
- /**
30753
- * Cancels an in-progress execution.
30754
- */
30755
- cancel(): Promise<void>;
30756
- /**
30757
- * Gets the harness version.
30758
- */
30759
- getVersion(): Promise<string>;
30760
- }
30761
-
30762
- /**
30763
- * nexus-agents/swe-bench - Harness Executor Factory
30764
- *
30765
- * Factory functions and quick helpers for creating harness executors.
30766
- *
30767
- * @module swe-bench/harness-executor-factory
30768
- * @see https://www.swebench.com/SWE-bench/guides/evaluation/
30769
- * (Source: Issue #257 - SWE-Bench Evaluation)
30770
- */
30771
-
30772
- /**
30773
- * Creates a new harness executor instance.
30774
- */
30775
- declare function createHarnessExecutor(logger?: ILogger): HarnessExecutor;
30776
- /**
30777
- * Validates the environment and returns a configured executor if ready.
30778
- */
30779
- declare function createValidatedExecutor(logger?: ILogger): Promise<{
30780
- executor: HarnessExecutor;
30781
- validation: HarnessValidationResult;
30782
- }>;
30783
- /**
30784
- * Quick execution helper for simple use cases.
30785
- */
30786
- declare function executeHarness(predictionsPath: string, options?: Partial<HarnessExecutionConfig>, onProgress?: HarnessProgressCallback): Promise<HarnessExecutionResult>;
30787
-
30788
- /**
30789
- * nexus-agents/swe-bench - Harness Version Detection
30790
- *
30791
- * Version detection utilities for SWE-bench harness.
30792
- *
30793
- * @module swe-bench/harness-version-detection
30794
- * (Source: Issue #257 - SWE-Bench Evaluation)
30795
- */
30796
-
30797
- /**
30798
- * Gets the swebench package version.
30799
- */
30800
- declare function getSwebenchVersion(logger?: ILogger): Promise<string | null>;
30801
- /**
30802
- * Gets the Python version.
30803
- */
30804
- declare function getPythonVersion(logger?: ILogger): Promise<string | null>;
30805
- /**
30806
- * Gets the Docker version.
30807
- */
30808
- declare function getDockerVersion(logger?: ILogger): Promise<string | null>;
30809
-
30810
- /**
30811
- * nexus-agents/swe-bench - Harness Output Parsing
30812
- *
30813
- * Output parsing utilities for SWE-bench harness results.
30814
- *
30815
- * @module swe-bench/harness-output-parsing
30816
- * (Source: Issue #257 - SWE-Bench Evaluation)
30817
- */
30818
-
30819
- /**
30820
- * Extracts progress information from harness stdout line.
30821
- */
30822
- declare function parseProgressLine(line: string, _currentProgress: HarnessExecutionProgress): Partial<HarnessExecutionProgress> | null;
30823
- /**
30824
- * Transforms a raw test result to typed TestCaseResult.
30825
- */
30826
- declare function transformTestResult(raw: RawTestResult): TestCaseResult;
30827
- /**
30828
- * Transforms a raw instance result to typed InstanceEvaluationResult.
30829
- */
30830
- declare function transformInstanceResult(raw: RawInstanceResult): InstanceEvaluationResult;
30831
- /**
30832
- * Transforms raw harness output to typed results.
30833
- */
30834
- declare function transformHarnessOutput(raw: RawHarnessOutput): {
30835
- instanceResults: InstanceEvaluationResult[];
30836
- resolvedCount: number;
30837
- totalCount: number;
30838
- };
30839
-
30840
- /**
30841
- * nexus-agents/swe-bench - Harness File Operations
30842
- *
30843
- * File validation and process management for SWE-bench harness.
30844
- *
30845
- * @module swe-bench/harness-file-operations
30846
- * (Source: Issue #257 - SWE-Bench Evaluation)
30847
- */
30848
-
30849
- /**
30850
- * Builds command line arguments for swebench harness.
30851
- */
30852
- declare function buildHarnessArgs(config: HarnessExecutionConfig): string[];
30853
- /**
30854
- * Builds the full command string for harness execution.
30855
- */
30856
- declare function buildHarnessCommand(config: HarnessExecutionConfig): string;
30857
- /**
30858
- * Validates that the predictions file exists and is readable.
30859
- */
30860
- declare function validatePredictionsFile(predictionsPath: string, logger?: ILogger): Promise<{
30861
- valid: boolean;
30862
- lineCount: number;
30863
- error?: string;
30864
- }>;
30865
- /**
30866
- * Calculates estimated remaining time based on progress.
30867
- */
30868
- declare function calculateEstimatedRemaining(completedCount: number, totalCount: number, elapsedMs: number): number | undefined;
30869
- /**
30870
- * Creates initial progress state.
30871
- */
30872
- declare function createInitialProgress(totalCount: number): HarnessExecutionProgress;
30873
- /**
30874
- * Gets the expected results file path.
30875
- */
30876
- declare function getResultsFilePath(config: HarnessExecutionConfig): string;
30877
-
30878
- /**
30879
- * nexus-agents/swe-bench - Evaluation Harness
30880
- *
30881
- * Main evaluation harness implementation that orchestrates:
30882
- * - Patch application in isolated environments
30883
- * - Test execution and result collection
30884
- * - Scoring and metrics calculation
30885
- *
30886
- * @module swe-bench/evaluation-harness
30887
- * @see https://www.swebench.com/SWE-bench/guides/evaluation/
30888
- * (Source: Issue #257 - SWE-Bench Evaluation)
30889
- */
30890
-
30891
- /**
30892
- * Main SWE-bench evaluation harness.
30893
- *
30894
- * Coordinates the evaluation pipeline:
30895
- * 1. Validate environment prerequisites
30896
- * 2. Load and validate predictions
30897
- * 3. Execute harness in Docker containers
30898
- * 4. Aggregate and report results
30899
- */
30900
- declare class EvaluationHarness implements IEvaluationHarness {
30901
- private readonly logger;
30902
- private readonly executor;
30903
- private isCancelled;
30904
- constructor(logger?: ILogger);
30905
- /**
30906
- * Validates that the evaluation environment is ready.
30907
- */
30908
- validate(): Promise<EvaluationValidationResult>;
30909
- /**
30910
- * Runs evaluation on a set of predictions.
30911
- */
30912
- evaluate(predictions: readonly SWEBenchPrediction[], config: EvaluationHarnessConfig, onProgress?: EvaluationProgressCallback): Promise<EvaluationRunResult>;
30913
- /**
30914
- * Evaluates a single instance for testing/debugging.
30915
- */
30916
- evaluateInstance(prediction: SWEBenchPrediction, config: EvaluationHarnessConfig): Promise<InstanceEvaluationResult>;
30917
- /**
30918
- * Cancels an in-progress evaluation.
30919
- */
30920
- cancel(): Promise<void>;
30921
- /**
30922
- * Gets the harness version.
30923
- */
30924
- getVersion(): Promise<string>;
30925
- /**
30926
- * Writes predictions to a temporary JSONL file.
30927
- */
30928
- private writePredictionsFile;
30929
- /**
30930
- * Executes the harness and transforms progress updates.
30931
- */
30932
- private executeHarness;
30933
- }
30934
- /**
30935
- * Creates a new evaluation harness instance.
30936
- */
30937
- declare function createEvaluationHarness(logger?: ILogger): EvaluationHarness;
30938
- /**
30939
- * Validates environment and returns harness if ready.
30940
- */
30941
- declare function createValidatedHarness(logger?: ILogger): Promise<Result<EvaluationHarness, EvaluationHarnessError>>;
30942
- /**
30943
- * Quick evaluation helper for simple use cases.
30944
- */
30945
- declare function evaluatePredictions(predictions: readonly SWEBenchPrediction[], options?: Partial<EvaluationHarnessConfig>, onProgress?: EvaluationProgressCallback): Promise<EvaluationRunResult>;
30946
-
30947
- /**
30948
- * nexus-agents/swe-bench - Evaluation Harness Helpers
30949
- *
30950
- * Helper functions for metrics calculation, progress transformation,
30951
- * and system information gathering used by the evaluation harness.
30952
- *
30953
- * @module swe-bench/evaluation-harness-helpers
30954
- * @see https://www.swebench.com/SWE-bench/guides/evaluation/
30955
- * (Source: Issue #257 - SWE-Bench Evaluation)
30956
- */
30957
-
30958
- /**
30959
- * Calculates aggregate metrics from instance results.
30960
- */
30961
- declare function calculateMetrics(results: readonly InstanceEvaluationResult[]): EvaluationMetrics;
30962
- /**
30963
- * Calculates per-repository metrics.
30964
- */
30965
- declare function calculateRepositoryMetrics(results: readonly InstanceEvaluationResult[]): readonly RepositoryMetrics[];
30966
- /**
30967
- * Extracts repository name from instance ID.
30968
- * Instance IDs follow format: "owner__repo-issue_number"
30969
- * Handles hyphenated names like "scikit-learn__scikit-learn-9876"
30970
- */
30971
- declare function extractRepoFromInstanceId(instanceId: string): string;
30972
- /**
30973
- * Extracts model name from predictions.
30974
- */
30975
- declare function extractModelName(predictions: readonly SWEBenchPrediction[]): string;
30976
- /**
30977
- * Raw harness progress data structure.
30978
- */
30979
- interface RawHarnessProgress {
30980
- readonly currentInstanceId?: string;
30981
- readonly completedCount: number;
30982
- readonly totalCount: number;
30983
- readonly resolvedCount: number;
30984
- readonly elapsedMs: number;
30985
- readonly estimatedRemainingMs?: number;
30986
- readonly state: string;
30987
- }
30988
- /**
30989
- * Maps harness state to evaluation phase.
30990
- */
30991
- declare function mapStateToPhase(state: string): EvaluationPhase;
30992
- /**
30993
- * Transforms raw harness progress to evaluation progress.
30994
- */
30995
- declare function transformHarnessProgress(harnessProgress: RawHarnessProgress, totalPredictions: number): EvaluationProgress;
30996
- /**
30997
- * Creates a progress adapter from harness progress to evaluation progress.
30998
- */
30999
- declare function createProgressAdapter(totalPredictions: number, onProgress?: (progress: EvaluationProgress) => void): ((progress: unknown) => void) | undefined;
31000
- /**
31001
- * Memory information result.
31002
- */
31003
- interface MemoryInfo {
31004
- readonly total: number;
31005
- readonly free: number;
31006
- }
31007
- /**
31008
- * Gets memory information from the operating system.
31009
- */
31010
- declare function getMemoryInfo(): MemoryInfo;
31011
- /**
31012
- * Gets CPU core count from the operating system.
31013
- */
31014
- declare function getCpuCores(): number;
31015
-
31016
- /**
31017
- * nexus-agents/swe-bench - Patch Applicator Types
31018
- *
31019
- * Type definitions for patch application and validation.
31020
- *
31021
- * @module swe-bench/patch-applicator-types
31022
- * (Source: Issue #257 - SWE-Bench Evaluation)
31023
- */
31024
- /**
31025
- * Result of patch validation.
31026
- */
31027
- interface PatchValidationResult {
31028
- /** Whether the patch is valid. */
31029
- readonly valid: boolean;
31030
- /** Format detected (unified, context, git). */
31031
- readonly format: PatchFormat;
31032
- /** Number of hunks in the patch. */
31033
- readonly hunkCount: number;
31034
- /** Files affected by the patch. */
31035
- readonly affectedFiles: readonly string[];
31036
- /** Validation errors if invalid. */
31037
- readonly errors: readonly string[];
31038
- /** Warnings that don't prevent application. */
31039
- readonly warnings: readonly string[];
31040
- }
31041
- /**
31042
- * Supported patch formats.
31043
- */
31044
- type PatchFormat = 'unified' | 'context' | 'git' | 'unknown';
31045
- /**
31046
- * Result of applying a patch.
31047
- */
31048
- interface PatchApplicationResult {
31049
- /** Whether the patch was applied successfully. */
31050
- readonly success: boolean;
31051
- /** Files that were modified. */
31052
- readonly modifiedFiles: readonly string[];
31053
- /** Files that failed to patch. */
31054
- readonly failedFiles: readonly string[];
31055
- /** Whether the patch applied cleanly (no fuzz/offset). */
31056
- readonly appliedCleanly: boolean;
31057
- /** Fuzz factor used if needed. */
31058
- readonly fuzzFactor?: number;
31059
- /** Error message if failed. */
31060
- readonly error?: string;
31061
- /** Detailed output from patch command. */
31062
- readonly output: string;
31063
- /** Whether a backup was created. */
31064
- readonly backupCreated: boolean;
31065
- }
31066
- /**
31067
- * Options for patch application.
31068
- */
31069
- interface PatchApplicationOptions {
31070
- /** Working directory (repository root). */
31071
- readonly workDir: string;
31072
- /** Whether to allow fuzz matching (default: true). */
31073
- readonly allowFuzz?: boolean;
31074
- /** Maximum fuzz factor (default: 2). */
31075
- readonly maxFuzz?: number;
31076
- /** Whether to create backups (default: true). */
31077
- readonly createBackup?: boolean;
31078
- /** Whether to do a dry run (default: false). */
31079
- readonly dryRun?: boolean;
31080
- /** Strip path prefix level (default: 1 for git diffs). */
31081
- readonly stripLevel?: number;
31082
- /** Timeout in milliseconds (default: 30000). */
31083
- readonly timeoutMs?: number;
31084
- }
31085
- /**
31086
- * Default patch application options.
31087
- */
31088
- declare const DEFAULT_PATCH_OPTIONS: Required<Omit<PatchApplicationOptions, 'workDir'>>;
31089
- /**
31090
- * Error codes for patch operations.
31091
- */
31092
- type PatchErrorCode = 'INVALID_PATCH' | 'PATCH_CONFLICT' | 'FILE_NOT_FOUND' | 'PERMISSION_DENIED' | 'TIMEOUT' | 'EXECUTION_FAILED' | 'UNKNOWN';
31093
- /**
31094
- * Patch applicator error.
31095
- */
31096
- declare class PatchApplicatorError extends Error {
31097
- readonly cause?: unknown;
31098
- readonly code: PatchErrorCode;
31099
- readonly details?: Record<string, unknown>;
31100
- constructor(message: string, code: PatchErrorCode, cause?: unknown);
31101
- }
31102
- /**
31103
- * Interface for patch applicator implementations.
31104
- */
31105
- interface IPatchApplicator {
31106
- /**
31107
- * Validates a patch without applying it.
31108
- */
31109
- validate(patch: string): PatchValidationResult;
31110
- /**
31111
- * Applies a patch to the working directory.
31112
- */
31113
- apply(patch: string, options: PatchApplicationOptions): Promise<PatchApplicationResult>;
31114
- /**
31115
- * Reverts a previously applied patch.
31116
- */
31117
- revert(patch: string, options: PatchApplicationOptions): Promise<PatchApplicationResult>;
31118
- /**
31119
- * Checks if a patch can be applied cleanly.
31120
- */
31121
- canApply(patch: string, options: PatchApplicationOptions): Promise<boolean>;
31122
- }
31123
-
31124
- /**
31125
- * nexus-agents/swe-bench - Patch Applicator
31126
- *
31127
- * Applies and validates patches for SWE-bench evaluation.
31128
- * Handles git-style unified diffs with fuzz matching and rollback support.
31129
- *
31130
- * @module swe-bench/patch-applicator
31131
- * (Source: Issue #257 - SWE-Bench Evaluation)
31132
- */
31133
-
31134
- /**
31135
- * Applies patches using the system `patch` command.
31136
- *
31137
- * Supports:
31138
- * - Git-style unified diffs
31139
- * - Fuzz matching for imperfect patches
31140
- * - Dry-run validation
31141
- * - Rollback via reverse application
31142
- */
31143
- declare class PatchApplicator implements IPatchApplicator {
31144
- private readonly logger;
31145
- constructor(logger?: ILogger);
31146
- /**
31147
- * Validates a patch without applying it.
31148
- */
31149
- validate(patch: string): PatchValidationResult;
31150
- /**
31151
- * Applies a patch to the working directory.
31152
- */
31153
- apply(patch: string, options: PatchApplicationOptions): Promise<PatchApplicationResult>;
31154
- /**
31155
- * Reverts a previously applied patch.
31156
- */
31157
- revert(patch: string, options: PatchApplicationOptions): Promise<PatchApplicationResult>;
31158
- /**
31159
- * Checks if a patch can be applied cleanly.
31160
- */
31161
- canApply(patch: string, options: PatchApplicationOptions): Promise<boolean>;
31162
- /**
31163
- * Resolves partial options with defaults.
31164
- */
31165
- private resolveOptions;
31166
- }
31167
- /**
31168
- * Creates a new patch applicator instance.
31169
- */
31170
- declare function createPatchApplicator(logger?: ILogger): PatchApplicator;
31171
- /**
31172
- * Validates a patch string.
31173
- */
31174
- declare function validatePatch(patch: string): PatchValidationResult;
31175
- /**
31176
- * Quick helper to apply a patch.
31177
- */
31178
- declare function applyPatch(patch: string, workDir: string, options?: Partial<PatchApplicationOptions>): Promise<PatchApplicationResult>;
31179
- /**
31180
- * Quick helper to check if a patch can be applied.
31181
- */
31182
- declare function canApplyPatch(patch: string, workDir: string): Promise<boolean>;
31183
-
31184
- /**
31185
- * nexus-agents/swe-bench - Test Runner Types
31186
- *
31187
- * Type definitions for running repository test suites.
31188
- *
31189
- * @module swe-bench/test-runner-types
31190
- * (Source: Issue #257 - SWE-Bench Evaluation)
31191
- */
31192
-
31193
- /**
31194
- * Configuration for test execution.
31195
- */
31196
- interface TestRunnerConfig {
31197
- /** Working directory (repository root). */
31198
- readonly workDir: string;
31199
- /** Timeout per test in milliseconds. */
31200
- readonly testTimeoutMs: number;
31201
- /** Overall timeout in milliseconds. */
31202
- readonly overallTimeoutMs: number;
31203
- /** Whether to run tests in Docker. */
31204
- readonly useDocker: boolean;
31205
- /** Docker image to use (if useDocker is true). */
31206
- readonly dockerImage?: string;
31207
- /** Environment variables for test execution. */
31208
- readonly env?: Readonly<Record<string, string>>;
31209
- /** Specific test files/patterns to run. */
31210
- readonly testPatterns?: readonly string[];
31211
- /** Whether to capture stdout/stderr. */
31212
- readonly captureOutput: boolean;
31213
- /** Maximum output size in bytes. */
31214
- readonly maxOutputBytes: number;
31215
- }
31216
- /**
31217
- * Default test runner configuration.
31218
- */
31219
- declare const DEFAULT_TEST_RUNNER_CONFIG: Omit<TestRunnerConfig, 'workDir'>;
31220
- /**
31221
- * Result of running a test suite.
31222
- */
31223
- interface TestSuiteResult {
31224
- /** Whether all tests passed. */
31225
- readonly success: boolean;
31226
- /** Overall status. */
31227
- readonly status: TestStatus;
31228
- /** Individual test results. */
31229
- readonly tests: readonly TestCaseResult[];
31230
- /** Number of tests passed. */
31231
- readonly passed: number;
31232
- /** Number of tests failed. */
31233
- readonly failed: number;
31234
- /** Number of tests skipped. */
31235
- readonly skipped: number;
31236
- /** Number of tests that errored. */
31237
- readonly errored: number;
31238
- /** Total test count. */
31239
- readonly total: number;
31240
- /** Total duration in milliseconds. */
31241
- readonly durationMs: number;
31242
- /** Raw output from test runner. */
31243
- readonly output: string;
31244
- /** Error message if suite failed to run. */
31245
- readonly error?: string;
31246
- }
31247
- /**
31248
- * Supported test frameworks.
31249
- */
31250
- type TestFramework = 'pytest' | 'unittest' | 'nose' | 'tox' | 'unknown';
31251
- /**
31252
- * Test framework detection result.
31253
- */
31254
- interface FrameworkDetectionResult {
31255
- /** Detected framework. */
31256
- readonly framework: TestFramework;
31257
- /** Confidence level (0-1). */
31258
- readonly confidence: number;
31259
- /** Configuration files found. */
31260
- readonly configFiles: readonly string[];
31261
- /** Test command to use. */
31262
- readonly testCommand: string;
31263
- }
31264
- /**
31265
- * Error codes for test runner.
31266
- */
31267
- type TestRunnerErrorCode = 'FRAMEWORK_NOT_DETECTED' | 'TEST_TIMEOUT' | 'SETUP_FAILED' | 'EXECUTION_FAILED' | 'PARSE_ERROR' | 'DOCKER_ERROR' | 'UNKNOWN';
31268
- /**
31269
- * Test runner error.
31270
- */
31271
- declare class TestRunnerError extends Error {
31272
- readonly cause?: unknown;
31273
- readonly code: TestRunnerErrorCode;
31274
- constructor(message: string, code: TestRunnerErrorCode, cause?: unknown);
31275
- }
31276
- /**
31277
- * Interface for test runner implementations.
31278
- */
31279
- interface ITestRunner {
31280
- /**
31281
- * Detects the test framework used by the repository.
31282
- */
31283
- detectFramework(workDir: string): Promise<FrameworkDetectionResult>;
31284
- /**
31285
- * Runs the test suite.
31286
- */
31287
- run(config: TestRunnerConfig): Promise<TestSuiteResult>;
31288
- /**
31289
- * Runs specific tests by pattern.
31290
- */
31291
- runTests(config: TestRunnerConfig, testPatterns: readonly string[]): Promise<TestSuiteResult>;
31292
- /**
31293
- * Cancels a running test execution.
31294
- */
31295
- cancel(): void;
31296
- }
31297
-
31298
- /**
31299
- * nexus-agents/swe-bench - Test Runner
31300
- *
31301
- * Executes repository test suites for SWE-bench evaluation.
31302
- * Supports pytest (primary), unittest, and nose frameworks.
31303
- *
31304
- * @module swe-bench/test-runner
31305
- * (Source: Issue #257 - SWE-Bench Evaluation)
31306
- */
31307
-
31308
- /**
31309
- * Runs repository test suites for SWE-bench evaluation.
31310
- *
31311
- * Features:
31312
- * - Automatic framework detection (pytest, unittest, nose)
31313
- * - Docker isolation support
31314
- * - Timeout handling
31315
- * - Output parsing for detailed results
31316
- */
31317
- declare class TestRunner implements ITestRunner {
31318
- private readonly logger;
31319
- private readonly dockerState;
31320
- constructor(logger?: ILogger);
31321
- /**
31322
- * Detects the test framework used by the repository.
31323
- */
31324
- detectFramework(workDir: string): Promise<FrameworkDetectionResult>;
31325
- /**
31326
- * Runs the full test suite.
31327
- */
31328
- run(config: TestRunnerConfig): Promise<TestSuiteResult>;
31329
- /**
31330
- * Runs specific tests by pattern.
31331
- */
31332
- runTests(config: TestRunnerConfig, testPatterns: readonly string[]): Promise<TestSuiteResult>;
31333
- /**
31334
- * Cancels a running test execution.
31335
- */
31336
- cancel(): void;
31337
- /**
31338
- * Checks for framework configuration files.
31339
- */
31340
- private checkFrameworkFiles;
31341
- /**
31342
- * Finds configuration files in the working directory.
31343
- */
31344
- private findConfigFiles;
31345
- /**
31346
- * Calculates confidence based on found files.
31347
- */
31348
- private calculateConfidence;
31349
- /**
31350
- * Builds the test command string.
31351
- */
31352
- private buildTestCommand;
31353
- /**
31354
- * Executes the test command.
31355
- */
31356
- private executeTests;
31357
- /**
31358
- * Executes tests locally.
31359
- */
31360
- private executeLocally;
31361
- /**
31362
- * Handles test execution errors.
31363
- */
31364
- private handleTestError;
31365
- /**
31366
- * Creates a cancelled result.
31367
- */
31368
- private createCancelledResult;
31369
- }
31370
- /** Creates a new test runner instance. */
31371
- declare function createTestRunner(logger?: ILogger): TestRunner;
31372
- /** Quick helper to run tests. */
31373
- declare function runTests(workDir: string, options?: Partial<TestRunnerConfig>): Promise<TestSuiteResult>;
31374
- /** Quick helper to detect test framework. */
31375
- declare function detectTestFramework(workDir: string): Promise<FrameworkDetectionResult>;
31376
-
31377
- /**
31378
- * nexus-agents/swe-bench - Test Runner Parser
31379
- *
31380
- * Parses test execution output to extract structured results.
31381
- * Supports pytest JSON output and fallback stdout parsing.
31382
- *
31383
- * @module swe-bench/test-runner-parser
31384
- * (Source: Issue #257 - SWE-Bench Evaluation)
31385
- */
31386
-
31387
- /**
31388
- * Reads pytest JSON results file from the working directory.
31389
- */
31390
- declare function readJsonResults(workDir: string): Promise<Record<string, unknown> | null>;
31391
- /**
31392
- * Parses pytest JSON results into TestSuiteResult.
31393
- */
31394
- declare function parseJsonResults(json: Record<string, unknown>, output: string, durationMs: number): TestSuiteResult;
31395
- /**
31396
- * Parses test results from pytest stdout output (fallback).
31397
- */
31398
- declare function parseStdoutResults(output: string, durationMs: number): TestSuiteResult;
31399
- /**
31400
- * Parses test results from output, trying JSON first, then stdout.
31401
- */
31402
- declare function parseTestResults(output: string, startTime: number, workDir: string): Promise<TestSuiteResult>;
31403
-
31404
- /**
31405
- * nexus-agents/swe-bench - Test Runner Docker Execution
31406
- *
31407
- * Handles Docker-isolated test execution for SWE-bench evaluation.
31408
- *
31409
- * @module swe-bench/test-runner-docker
31410
- * (Source: Issue #257 - SWE-Bench Evaluation)
31411
- */
31412
-
31413
- /**
31414
- * State for Docker execution tracking.
31415
- */
31416
- interface DockerExecutionState {
31417
- currentProcess: ChildProcess | null;
31418
- isCancelled: boolean;
31419
- }
31420
- /**
31421
- * Callback for creating cancelled results.
31422
- */
31423
- type CancelledResultFactory = (startTime: number) => TestSuiteResult;
31424
- /**
31425
- * Callback for handling test errors.
31426
- */
31427
- type ErrorHandler = (err: unknown, startTime: number) => TestSuiteResult;
31428
- /**
31429
- * Options for executeInDocker function.
31430
- */
31431
- interface ExecuteInDockerOptions {
31432
- command: string;
31433
- config: TestRunnerConfig;
31434
- startTime: number;
31435
- state: DockerExecutionState;
31436
- createCancelledResult: CancelledResultFactory;
31437
- handleTestError: ErrorHandler;
31438
- logger: ILogger;
31439
- }
31440
- /**
31441
- * Builds Docker run arguments for test execution.
31442
- */
31443
- declare function buildDockerArgs(command: string, config: TestRunnerConfig, image: string): string[];
31444
- /**
31445
- * Executes tests in a Docker container.
31446
- */
31447
- declare function executeInDocker(options: ExecuteInDockerOptions): Promise<TestSuiteResult>;
31448
-
31449
- /**
31450
- * nexus-agents/swe-bench - Report Generator
31451
- *
31452
- * Generates detailed evaluation reports with metrics, comparisons, and analysis.
31453
- *
31454
- * @module swe-bench/report-generator
31455
- * (Source: Issue #257 - SWE-Bench Evaluation)
31456
- */
31457
-
31458
- /**
31459
- * Generates detailed evaluation reports.
31460
- *
31461
- * Supports:
31462
- * - Multiple output formats (JSON, Markdown, HTML)
31463
- * - Statistical analysis
31464
- * - Failure categorization
31465
- * - Competitor comparisons
31466
- */
31467
- declare class ReportGenerator implements IReportGenerator {
31468
- private readonly logger;
31469
- constructor(logger?: ILogger);
31470
- /**
31471
- * Generates a full evaluation report.
31472
- *
31473
- * Note: Method is async to satisfy IReportGenerator interface contract,
31474
- * which allows implementations to perform async operations (e.g., network
31475
- * calls for competitor data, async template processing).
31476
- */
31477
- generate(result: EvaluationRunResult, config: ReportConfig, competitors?: readonly CompetitorResult[]): Promise<EvaluationReport>;
31478
- /**
31479
- * Renders report to the specified format.
31480
- *
31481
- * Note: Method is async to satisfy IReportGenerator interface contract,
31482
- * which allows implementations to perform async operations (e.g., async
31483
- * template engines, remote rendering services).
31484
- */
31485
- render(report: EvaluationReport, format: ReportFormat): Promise<string>;
31486
- /**
31487
- * Saves report to file.
31488
- */
31489
- save(report: EvaluationReport, config: ReportConfig): Promise<void>;
31490
- /**
31491
- * Generates report metadata.
31492
- */
31493
- private generateMetadata;
31494
- /**
31495
- * Generates report summary.
31496
- */
31497
- private generateSummary;
31498
- /**
31499
- * Generates highlights based on results.
31500
- */
31501
- private generateHighlights;
31502
- /**
31503
- * Generates improvement areas based on results.
31504
- */
31505
- private generateImprovementAreas;
31506
- /**
31507
- * Generates detailed metrics.
31508
- */
31509
- private generateMetrics;
31510
- /**
31511
- * Generates timing statistics.
31512
- */
31513
- private generateTimingStatistics;
31514
- /**
31515
- * Generates resource statistics from evaluation result.
31516
- *
31517
- * Note: Memory tracking is estimated from current process; disk tracking
31518
- * is not yet implemented. Container count uses evaluated instance count
31519
- * as each instance runs in its own container.
31520
- *
31521
- * (Improved per Issue #454 - replace placeholder zeros with estimates)
31522
- */
31523
- private generateResourceStatistics;
31524
- /**
31525
- * Calculates statistical summary from values.
31526
- */
31527
- private calculateStatisticalSummary;
31528
- /**
31529
- * Calculates percentile from sorted array.
31530
- */
31531
- private percentile;
31532
- /**
31533
- * Generates repository breakdown.
31534
- */
31535
- private generateRepositoryBreakdown;
31536
- /**
31537
- * Creates empty repository metrics for edge cases.
31538
- */
31539
- private createEmptyRepoMetrics;
31540
- }
31541
- /**
31542
- * Creates a new report generator instance.
31543
- */
31544
- declare function createReportGenerator(logger?: ILogger): ReportGenerator;
31545
- /**
31546
- * Quick helper to generate a report.
31547
- */
31548
- declare function generateReport(result: EvaluationRunResult, config?: Partial<ReportConfig>, competitors?: readonly CompetitorResult[]): Promise<EvaluationReport>;
31549
- /**
31550
- * Quick helper to export a report.
31551
- */
31552
- declare function exportReport(result: EvaluationRunResult, outputPath: string, config?: Partial<ReportConfig>): Promise<void>;
31553
-
31554
- /**
31555
- * nexus-agents/swe-bench - Structured Trace Logger
31556
- *
31557
- * Emits JSONL trace files and status snapshots for SWE-bench runs.
31558
- * All writes are best-effort — errors are caught silently to avoid
31559
- * disrupting the benchmark run.
31560
- *
31561
- * @module swe-bench/trace-logger
31562
- * (Source: Issue #1412 - Structured trace logging)
31563
- */
31564
- /**
31565
- * Event types emitted to the trace file.
31566
- */
31567
- type TraceEventType = 'run_start' | 'run_complete' | 'instance_start' | 'instance_complete' | 'iteration_start' | 'iteration_complete';
31568
- /**
31569
- * A single trace event written as a JSONL line.
31570
- */
31571
- interface TraceEvent {
31572
- readonly type: TraceEventType;
31573
- readonly timestamp: string;
31574
- readonly runId: string;
31575
- readonly instanceId?: string;
31576
- readonly iteration?: number;
31577
- readonly data?: Record<string, unknown>;
31578
- }
31579
- /**
31580
- * Live status snapshot written as JSON.
31581
- */
31582
- interface RunStatus {
31583
- readonly runId: string;
31584
- readonly startedAt: string;
31585
- readonly currentInstance: string;
31586
- readonly currentIteration: number;
31587
- readonly totalInstances: number;
31588
- readonly completedInstances: number;
31589
- readonly successCount: number;
31590
- readonly failureCount: number;
31591
- readonly elapsedMs: number;
31592
- readonly totalTokens: number;
31593
- }
31594
- /**
31595
- * Constructor options for TraceLogger.
31596
- */
31597
- interface TraceLoggerOptions {
31598
- readonly outputPath: string;
31599
- readonly runId: string;
31600
- readonly totalInstances: number;
31601
- }
31602
- /**
31603
- * Structured trace logger for SWE-bench runs.
31604
- *
31605
- * Writes JSONL trace events and a live JSON status snapshot.
31606
- * All I/O is best-effort — failures are silently caught.
31607
- */
31608
- declare class TraceLogger {
31609
- private readonly tracePath;
31610
- private readonly statusPath;
31611
- private readonly runId;
31612
- private readonly totalInstances;
31613
- private readonly startedAt;
31614
- private readonly startTime;
31615
- private currentInstance;
31616
- private currentIteration;
31617
- private completedInstances;
31618
- private successCount;
31619
- private failureCount;
31620
- private totalTokens;
31621
- constructor(options: TraceLoggerOptions);
31622
- /** Get the derived trace file path. */
31623
- getTracePath(): string;
31624
- /** Get the derived status file path. */
31625
- getStatusPath(): string;
31626
- /** Emit a trace event to the JSONL file (best-effort). */
31627
- emit(type: TraceEventType, data?: Record<string, unknown>): Promise<void>;
31628
- /** Record the start of an instance. */
31629
- instanceStart(instanceId: string): Promise<void>;
31630
- /** Record the start of an iteration. */
31631
- iterationStart(iteration: number): Promise<void>;
31632
- /** Record the completion of an iteration. */
31633
- iterationComplete(durationMs: number, tokensUsed: number, patchFound: boolean): Promise<void>;
31634
- /** Record the completion of an instance. */
31635
- instanceComplete(success: boolean, totalIterations: number, durationMs: number): Promise<void>;
31636
- /** Record the start of a benchmark run. */
31637
- runStart(config: Record<string, unknown>): Promise<void>;
31638
- /** Record the completion of a benchmark run. */
31639
- runComplete(): Promise<void>;
31640
- /** Write current status snapshot (best-effort). */
31641
- private updateStatus;
31642
- }
31643
-
31644
- /**
31645
- * nexus-agents/swe-bench - MCP Config Generator
31646
- *
31647
- * Generates MCP server configuration for child Claude CLI sessions.
31648
- * Enables SWE-bench agents to access nexus-agents tools (memory, research).
31649
- *
31650
- * @module swe-bench/mcp-config
31651
- * (Source: Issue #1413 - MCP tools in SWE-bench CLI sessions)
31652
- */
31653
- /**
31654
- * Options for generating MCP config.
31655
- */
31656
- interface McpConfigOptions {
31657
- /** Path to nexus-agents CLI entry point. */
31658
- readonly cliPath?: string;
31659
- /** Additional environment variables for the MCP server. */
31660
- readonly env?: Readonly<Record<string, string>>;
31661
- /** Custom allowed tools (default: read-only subset). */
31662
- readonly allowedTools?: readonly string[];
31663
- }
31664
- /**
31665
- * Generated MCP config with path and cleanup function.
31666
- */
31667
- interface GeneratedMcpConfig {
31668
- /** Path to the generated config file. */
31669
- readonly configPath: string;
31670
- /** Cleanup function to remove temp files. */
31671
- readonly cleanup: () => Promise<void>;
31672
- /** Allowed tools list for --allowedTools flag. */
31673
- readonly allowedTools: readonly string[];
31674
- }
31675
- /**
31676
- * Generates an MCP config file for Claude CLI child sessions.
31677
- *
31678
- * Creates a temporary JSON file that can be passed to `claude --mcp-config`.
31679
- * Returns the file path and a cleanup function.
31680
- */
31681
- declare function generateMcpConfig(options?: McpConfigOptions): Promise<GeneratedMcpConfig>;
31682
- /**
31683
- * Gets the default allowed tools for SWE-bench MCP sessions.
31684
- */
31685
- declare function getDefaultAllowedTools(): readonly string[];
31686
-
31687
- /**
31688
- * nexus-agents/swe-bench - Cross-Iteration Context
31689
- *
31690
- * Accumulates structured context across SWE-bench retry iterations
31691
- * so agents do not re-explore the codebase from scratch.
31692
- *
31693
- * @module swe-bench/iteration-context
31694
- * (Source: Issue #1417 - Cross-Iteration Context)
31695
- */
31696
-
31697
- /**
31698
- * Creates an empty cross-iteration context.
31699
- */
31700
- declare function createEmptyContext(): IterationContext;
31701
- /**
31702
- * Extracts file paths mentioned in an agent response.
31703
- * Deduplicates by path.
31704
- */
31705
- declare function extractFilesFromResponse(response: string): ExploredFile[];
31706
- /**
31707
- * Extracts a root cause hypothesis from an agent response.
31708
- * Returns null if none found. Truncates to 200 chars.
31709
- */
31710
- declare function extractHypothesis(response: string): string | null;
31711
- declare function extractApproach(response: string, iteration: number, hadPatch: boolean, patchApplied: boolean): ApproachRecord;
31712
- /**
31713
- * Merges new findings into existing context.
31714
- * Deduplicates files. Keeps last MAX_APPROACH_HISTORY approaches.
31715
- */
31716
- declare function updateContext(prev: IterationContext, response: string, iteration: number, hadPatch: boolean, patchApplied: boolean): IterationContext;
31717
- /**
31718
- * Formats context as markdown for inclusion in a retry prompt.
31719
- * Returns empty string for empty context.
31720
- */
31721
- declare function formatContextForPrompt(ctx: IterationContext, maxChars?: number): string;
31722
-
31723
- /**
31724
- * nexus-agents/benchmarks - Type Definitions
31725
- *
31726
- * Types for performance benchmarking and metrics collection.
31727
- *
31728
- * @module benchmarks/benchmark-types
31729
- * (Source: Issue #156, Mem0 metrics validation)
31730
- */
31731
- /**
31732
- * Latency percentile metrics.
31733
- */
31734
- interface LatencyMetrics {
31735
- /** Minimum latency in milliseconds. */
31736
- readonly min: number;
31737
- /** Maximum latency in milliseconds. */
31738
- readonly max: number;
31739
- /** Mean latency in milliseconds. */
31740
- readonly mean: number;
31741
- /** 50th percentile (median) in milliseconds. */
31742
- readonly p50: number;
31743
- /** 75th percentile in milliseconds. */
31744
- readonly p75: number;
31745
- /** 90th percentile in milliseconds. */
31746
- readonly p90: number;
31747
- /** 95th percentile in milliseconds. */
31748
- readonly p95: number;
31749
- /** 99th percentile in milliseconds. */
31750
- readonly p99: number;
31751
- /** Standard deviation in milliseconds. */
31752
- readonly stdDev: number;
31753
- /** Total number of samples. */
31754
- readonly sampleCount: number;
31755
- }
31756
- /**
31757
- * Throughput metrics.
31758
- */
31759
- interface ThroughputMetrics {
31760
- /** Operations per second. */
31761
- readonly opsPerSecond: number;
31762
- /** Total operations completed. */
31763
- readonly totalOps: number;
31764
- /** Total duration in milliseconds. */
31765
- readonly durationMs: number;
31766
- }
31767
- /**
31768
- * Token usage metrics.
31769
- */
31770
- interface TokenMetrics {
31771
- /** Total input tokens. */
31772
- readonly inputTokens: number;
31773
- /** Total output tokens. */
31774
- readonly outputTokens: number;
31775
- /** Total tokens (input + output). */
31776
- readonly totalTokens: number;
31777
- /** Average tokens per operation. */
31778
- readonly avgTokensPerOp: number;
31779
- }
31780
- /**
31781
- * Quality metrics for retrieval operations.
31782
- */
31783
- interface QualityMetrics {
31784
- /** Precision: relevant retrieved / total retrieved. */
31785
- readonly precision: number;
31786
- /** Recall: relevant retrieved / total relevant. */
31787
- readonly recall: number;
31788
- /** F1 score: harmonic mean of precision and recall. */
31789
- readonly f1Score: number;
31790
- /** Mean reciprocal rank. */
31791
- readonly mrr: number;
31792
- /** Normalized discounted cumulative gain at k. */
31793
- readonly ndcgAtK: number;
31794
- }
31795
- /**
31796
- * Resource usage metrics.
31797
- */
31798
- interface ResourceMetrics {
31799
- /** Peak memory usage in bytes. */
31800
- readonly peakMemoryBytes: number;
31801
- /** Average memory usage in bytes. */
31802
- readonly avgMemoryBytes: number;
31803
- /** CPU time in milliseconds. */
31804
- readonly cpuTimeMs: number;
31805
- /** Database file size in bytes (if applicable). */
31806
- readonly dbSizeBytes?: number;
31807
- }
31808
- /**
31809
- * Benchmark result for a single operation type.
31810
- */
31811
- interface OperationBenchmark {
31812
- /** Operation name. */
31813
- readonly operation: string;
31814
- /** Dataset size used. */
31815
- readonly datasetSize: number;
31816
- /** Latency metrics. */
31817
- readonly latency: LatencyMetrics;
31818
- /** Throughput metrics. */
31819
- readonly throughput: ThroughputMetrics;
31820
- /** Resource metrics. */
31821
- readonly resources: ResourceMetrics;
31822
- /** Quality metrics (for retrieval operations). */
31823
- readonly quality?: QualityMetrics;
31824
- /** Timestamp when benchmark was run. */
31825
- readonly timestamp: string;
31826
- }
31827
- /**
31828
- * Complete benchmark suite result.
31829
- */
31830
- interface BenchmarkSuiteResult {
31831
- /** Suite name. */
31832
- readonly name: string;
31833
- /** Component being benchmarked. */
31834
- readonly component: string;
31835
- /** Version of the component. */
31836
- readonly version: string;
31837
- /** Individual operation benchmarks. */
31838
- readonly operations: readonly OperationBenchmark[];
31839
- /** Environment information. */
31840
- readonly environment: BenchmarkEnvironment;
31841
- /** Overall summary. */
31842
- readonly summary: BenchmarkSummary;
31843
- }
31844
- /**
31845
- * Benchmark environment information.
31846
- */
31847
- interface BenchmarkEnvironment {
31848
- /** Node.js version. */
31849
- readonly nodeVersion: string;
31850
- /** Platform. */
31851
- readonly platform: string;
31852
- /** Architecture. */
31853
- readonly arch: string;
31854
- /** CPU model. */
31855
- readonly cpuModel: string;
31856
- /** CPU cores. */
31857
- readonly cpuCores: number;
31858
- /** Total memory in bytes. */
31859
- readonly totalMemory: number;
31860
- }
31861
- /**
31862
- * Benchmark summary.
28863
+ * Benchmark summary.
31863
28864
  */
31864
28865
  interface BenchmarkSummary {
31865
28866
  /** Total benchmark duration in milliseconds. */
@@ -32412,8 +29413,8 @@ declare const ArtifactRefSchema: z.ZodObject<{
32412
29413
  review: "review";
32413
29414
  test: "test";
32414
29415
  spec: "spec";
32415
- vote: "vote";
32416
29416
  report: "report";
29417
+ vote: "vote";
32417
29418
  }>;
32418
29419
  }, z.core.$strip>;
32419
29420
  /** Unified task lifecycle contract. */
@@ -32462,8 +29463,8 @@ declare const TaskContractSchema: z.ZodObject<{
32462
29463
  review: "review";
32463
29464
  test: "test";
32464
29465
  spec: "spec";
32465
- vote: "vote";
32466
29466
  report: "report";
29467
+ vote: "vote";
32467
29468
  }>;
32468
29469
  }, z.core.$strip>>;
32469
29470
  metadata: z.ZodRecord<z.ZodString, z.ZodUnknown>;
@@ -32629,8 +29630,8 @@ declare const StageResultSchema: z.ZodObject<{
32629
29630
  review: "review";
32630
29631
  test: "test";
32631
29632
  spec: "spec";
32632
- vote: "vote";
32633
29633
  report: "report";
29634
+ vote: "vote";
32634
29635
  }>;
32635
29636
  }, z.core.$strip>>;
32636
29637
  metadata: z.ZodRecord<z.ZodString, z.ZodUnknown>;
@@ -34210,4 +31211,4 @@ declare function createScmProvider(config: CreateScmProviderConfig): Promise<Res
34210
31211
  */
34211
31212
  declare function createGitHubProvider(repo: string): IScmProvider;
34212
31213
 
34213
- export { ALLOWED_COMMANDS, ARTIFACT_TYPES, AUDIT_PIPELINE_TEMPLATE, AbTestTracker, type ActionContext, type ActionRecord, type ActionValidationResult, type ActivationOptions, type ActivationStrategy, ActivationStrategySchema, type ActivityItem, type AdapterConfig, AdapterConfigSchema, type AdapterCreator, AdapterFactory, type AdapterLatencyConfig, type AdapterLatencyResult, AdapterModelError, RateLimiter$1 as AdapterRateLimiter, type RateLimiterConfig$1 as AdapterRateLimiterConfig, type RegisterOptions$1 as AdapterRegisterOptions, type AdapterScenarioResult, type AdaptiveOrchestratorOptions, type AdaptiveOrchestratorResult, type AdaptiveThresholdResult, type AgentAction, AgentActionSchema, type AgentActionType, AgentCapability, type AgentCluster, type AgentContext, AgentError, type AgentEvent, AgentEventSchema, type AgentExecutionResult, type AgentExecutorConfig, type AgentFinding, AgentFindingSchema, type AgentId, type AgentMessage, AgentMessageSchema, type AgentMessageType, type AgentPairKey, type AgentPerformance, AgentPerformanceSchema, type AgentResponse, type AgentRole, AgentRoleSchema, type AgentRoleType, AgentRunnerError, type AgentState$2 as AgentState, AgentStateMachine, type AgentStatus, StepExecutor as AgentStepExecutor, type AgentVoteResult, type AgentVoteSummary, type AggregatedResult, type AggregationMetadata, type AggregationStrategy, type AggregatorInput, type AggregatorOptions, type ApiDocumentation, type ApiEndpoint, type ApiType, type AppConfig, AppConfigSchema, type ApproachOutcome, type ApproachRecord, type ArchitectureAnalysisResult, type ArchitectureDecision, ArchitectureExpert, type ArchitectureExpertOptions, type ArchitecturePattern, type ArchitectureStyle, type Artifact, type ArtifactFilter, type ArtifactRef, ArtifactRefSchema, ArtifactStore, type ArtifactStoreOptions, type ArtifactType, type AuditActor, AuditActorSchema, type AuditCategory, AuditCategorySchema, AuditError, type AuditEvent$1 as AuditEvent, type AuditEventInput, AuditEventInputSchema, AuditEventSchema, type AuditHandlerConfig, type AuditLogConfig, AuditLogConfigSchema, AuditLogger, type AuditOutcome, AuditOutcomeSchema, type AuditQueryCriteria, AuditQueryCriteriaSchema, type AuditResource, AuditResourceSchema, type AuditSeverity, AuditSeveritySchema, AuditTrail, type AuthorizationMethod, AuthorizationMethodSchema, AvailabilityCache, type AvailabilityCacheConfig, BIAS_CATEGORY, BUILT_IN_EXPERTS, BUILT_IN_RULES, BUILT_IN_TEMPLATES, BaseAdapter, type BaseAdapterConfig, type BaseAdapterOptions, BaseAgent, type BaseAgentOptions, BaseAgentOptionsSchema, BaseCliAdapter, type BaseMcpToolDeps, type BenchmarkAdapter, type BenchmarkComparison, type BenchmarkConfig, type BenchmarkEnvironment, type BenchmarkOperation, type BenchmarkOrchestratorOptions, type BenchmarkReport, type BenchmarkRunContext, type BenchmarkRunOptions, type BenchmarkRunResult, type BenchmarkRunSummary, type BenchmarkSuiteResult, type BenchmarkSummary, type BenchmarkThresholds, type BestSolution, BestSolutionSchema, type BottleneckInfo, type BuiltInExpertType, BuiltInExpertTypeSchema, CHECKPOINT_SCHEMA_VERSION, CLAUDE_MODELS, CLAUDE_MODEL_ALIASES, DEFAULT_CACHE_CONFIG as CLI_DEFAULT_CACHE_CONFIG, DEFAULT_CAPABILITIES$1 as CLI_DEFAULT_CAPABILITIES, DEFAULT_COMPOSITE_CONFIG as CLI_DEFAULT_COMPOSITE_CONFIG, CLI_TIMEOUT_PROFILES, CLI_VERSION_REQUIREMENTS, COMPLEXITY_ORDER, CORE_PLUGINS, type CancelledResultFactory, type CapabilityProfile, type CapacityStatus, type Checkpoint, type PipelineStage as CheckpointPipelineStage, type CheckpointSummary, type FailureCategory$1 as CircuitBreakerFailureCategory, type CircuitProtectedResult, type CircuitState, type ClaimValidation, type ClassifyInput, type ClassifyResult, ClaudeAdapter, type ClaudeAdapterConfig, ClaudeCliAdapter, type ClaudeCliResponse, ClaudeResponseParser, type CliAdapterConfig, CliAgentExecutor, type CliAgentExecutorConfig, type CacheStats as CliCacheStats, type CapabilityProfile$1 as CliCapabilityProfile, type CliCircuitBreakerConfig, CliCircuitBreakerIntegration, type CliCircuitHealthStatus, CliDetectionCache, type CliDetectionCacheConfig, CliDetectionCacheConfigSchema, type CliError, type CliErrorCode, type ExecutionOptions$1 as CliExecutionOptions, type CliHealthResult, type ModelInfo as CliModelInfo, type CliName, type CliResponse, type CliRetryLoopConfig, type CliRetryResult, type CliTask, type TaskComplexity as CliTaskComplexity, type TokenUsage$2 as CliTokenUsage, type CliTransport, type CodeAnalysisResult, type CodeChange, CodeChangeSchema, CodeExpert, type CodeExpertOptions, CodexCliAdapter, type CodexCliResponse, CodexMcpAdapter, CodexResponseParser, type CollaborationConfig, CollaborationConfigSchema, type CollaborationMessage, type CollaborationPattern, CollaborationPatternSchema, type CollaborationResult, CollaborationSession, type CollaborationSessionOptions, type CollectRealVotesOptions, CompactDashboardRenderer, type ComparisonReport, type ComparisonResult, type CompetitorResult, type CompetitorSystem, type CompileOptions, type CompileResult$2 as CompileResult, type CompiledGraph, type CompiledPipeline, type CompletionRequest, type CompletionResponse, type ComplexityLevel, ComplexityLevelSchema, type ComplianceStatus, CompositeRouter, type CompositeRouterConfig, CompositeRouterConfigSchema, type CompositeRouterStats, type CompositeRoutingDecision, CompositeRoutingError, type CompositionStep, type CompositionValidation, type ComputedReward, type ConfidenceInterval, ConfigError, type ExpertConfig$1 as ConfigExpertConfig, ExpertConfigSchema$1 as ConfigExpertConfigSchema, type ExpertDefinition$1 as ConfigExpertDefinition, ExpertDefinitionSchema as ConfigExpertDefinitionSchema, type Conflict, type ConflictResolver, type ConflictWarning, type ConsensusAlgorithm, ConsensusAlgorithmSchema, ConsensusEngine, type ConsensusEngineConfig, ConsensusEngineConfigSchema, ConsensusError, type ConsensusMetrics, ConsensusMetricsSchema, ConsensusProtocol, type ConsensusResult, ConsensusResultSchema, type ConsensusStats, type ConsensusVoteDeps, type ConsensusVoteInput, ConsensusVoteInputSchema, type ConsensusVoteResponse, type ConsolidatedFinding, type ConsolidationBenchmarkResult, type ConsolidationOperation, type ContentBlock, ContentPriority, type ContextBudget, ContextBudgetSchema, type ContextFilter, ContextFilterSchema, type ContextItem, ContextManager, type ContextManagerConfig, ContextManagerConfigSchema, type ContextPruneStrategy, ContextPruneStrategySchema, ContextPruner, type ContextPrunerConfig, ContextPrunerConfigSchema, type ContextStats, type ContributionScore, type CorePluginRegistrationResult, type CorrelationCoefficient, CorrelationCoefficientSchema, type CorrelationMatrix, CorrelationTracker, type CorrelationTrackerStats, CorrelationTrackerStatsSchema, type CorroborationEvent, type CorroborationResult, type CorroborationRule, type CostEstimate, CostEstimateSchema, type CoverageAnalysis, type CoverageMetrics, CoverageMetricsSchema, type CreateExecutionContextOptions, type CreateExpertDeps, type CreateExpertInput, CreateExpertInputSchema, type CreateExpertOptions, type CreateExpertResponse, type CreateForestInput, type CreateNodeInput, type CreatePROptions, type CreateScmProviderConfig, type CreateSkillOptions, type CreateStreamOptions, type CreateTreeInput, type CriterionFailure, type CriterionResult, CriterionResultSchema, CriterionType, CriterionTypeSchema, type CriterionTypeType, type CrossTreeInfo, CrossTreeInfoSchema, type CrossTreeStrategy, CrossTreeStrategySchema, type CuratedContextItem, type CurationResult, DECEPTION_CATEGORY, DEFAULT_ACTIVATION_OPTIONS, DEFAULT_ADAPTER_LATENCY_CONFIG, DEFAULT_BENCHMARK_CONFIG, DEFAULT_BUDGET, DEFAULT_COLLECT_STREAM_MAX_CHUNKS, DEFAULT_COMPOSER_CONFIG, DEFAULT_CONSENSUS_CONFIG, DEFAULT_DASHBOARD_CONFIG, DEFAULT_DASHBOARD_RENDER_OPTIONS, DEFAULT_DISTILLER_CONFIG, DEFAULT_EVALUATION_CONFIG, DEFAULT_EXECUTION_TIME_MS, DEFAULT_FEEDBACK_COLLECTOR_CONFIG, DEFAULT_FEEDBACK_INTEGRATION_CONFIG, DEFAULT_FOREST_CONFIG, DEFAULT_HARNESS_EXECUTION_CONFIG, DEFAULT_HIGHER_ORDER_CONFIG, DEFAULT_MAX_RETRIES, DEFAULT_MEMORY_BENCHMARK_CONFIG, DEFAULT_OUTCOME_STORAGE_CONFIG, DEFAULT_PATCH_OPTIONS, DEFAULT_PATH_SCORING_OPTIONS, DEFAULT_PERMISSIONS, DEFAULT_POLICIES, DEFAULT_PREFERENCE_ROUTER_CONFIG, DEFAULT_RBAC, DEFAULT_REPORT_CONFIG, DEFAULT_RESOURCE_LIMITS, DEFAULT_RETRY_CONFIG, DEFAULT_ROLE_MAPPINGS, DEFAULT_SCENARIOS, DEFAULT_SKILL_LIBRARY_CONFIG, DEFAULT_SKILL_LOADER_CONFIG, DEFAULT_STATISTICAL_OPTIONS, DEFAULT_SWARM_OBSERVER_CONFIG, DEFAULT_SWE_BENCH_CONFIG, DEFAULT_TEST_RUNNER_CONFIG, DEFAULT_TIMEOUTS, DEFAULT_TIMEOUT_PROFILE, DEFAULT_TRINITY_CONFIG, DEFAULT_VOTING_PROTOCOL_CONFIG, DEFAULT_WAVE_CONFIG, DEFAULT_WEIGHTED_VOTING_CONFIG, DEV_PIPELINE_TEMPLATE, type DagEdge, DagEdgeSchema, Dashboard, type DashboardConfig, DashboardConfigSchema, type DashboardFilter, type DashboardFormat, type DashboardHealthIndicators, type DashboardOutcome, type DashboardRenderOptions, type DashboardSnapshot, type DashboardSummary, type DashboardUpdateOptions, DatasetLoadError, type DatasetLoadOptions, type DatasetLoadResult, type DecomposeError, type DelegateDeps, type DelegateInput, type DelegateInputLike, DelegateInputSchema, type DelegateOutput, DelegateOutputSchema, type DependencyError, type DependencyErrorCode, DependencyErrorCodeSchema, DependencyErrorSchema, DependencyGraph, type DependencyStructure, type DevPipelineOptions, type DevPipelineResult, type DevPipelineStages, DirectedInteractionGraph, type DiskSpaceValidation, type DistilledRule, type DistillerConfig, type DistillerStats, type DistributionStats, type DockerExecutionState, type DockerValidation, DocumentationExpert, type DocumentationExpertOptions, type DocumentationResult, type DocumentationSection, type DryRunResult, type DynamicExpert, DynamicExpertManager, type DynamicExpertSpec, END, EXPERT_CAPABILITIES, EXPERT_DEFAULT_CAPABILITIES, EXPERT_DEFAULT_TEMPERATURES, EXPERT_TYPE_TO_ROLE, type EnvValidationResult, type EnvironmentValidationResult, ErrorCode, type ErrorHandler, type ErrorPayload, type EvaluationCacheLevel, type EvaluationCriterion, EvaluationCriterionSchema, type EvaluationErrorCode, EvaluationHarness, type EvaluationHarnessConfig, EvaluationHarnessError, type EvaluationMetrics, type EvaluationMode, type EvaluationPhase, type EvaluationProgress, type EvaluationProgressCallback, type EvaluationReport, type EvaluationRunResult, type EvaluationValidationResult, EventBus, type EventBusBridgeOptions, type EventBusBridgeResult, type EventBusOptions, type EventFilter, type EventHandler, type EventPayload, type EventType, type ExecuteExpertDeps, type ExecuteExpertInput, ExecuteExpertInputSchema, type ExecuteExpertResponse, type ExecuteSpecDeps, type ExecuteSpecInput, ExecuteSpecInputSchema, type ExecutionContext$1 as ExecutionContext, type ExecutionMode, type ExecutionPhase$1 as ExecutionPhase, type ExecutionPlan$2 as ExecutionPlan, type ExecutionStage, type ExecutorWithModel, ExpectedOutcome, ExpectedOutcomeSchema, type ExpectedOutcomeType, type ExperienceRecord, type ExperienceStep, type ExperimentDefinition, type ExperimentExport, type ExperimentOutcome, type ExperimentResult, type ExperimentStatus, type ExperimentSummary, type ExperimentVariant, Expert, type ExpertAssignment, ExpertAssignmentSchema, type ExpertBridgeResult, ExpertCollaborationPattern, type ExpertCollaborationPatternType, type ExpertConfig, ExpertConfigSchema, type ExpertDefinition, type ExpertDomain, ExpertDomainSchema, ExpertFactory, ExpertFactoryAdapter, type ExpertInfo, type ExpertMatch, ExpertMatchSchema, type ExpertOptions, ExpertOptionsSchema, type ExpertOutput, ExpertOutputSchema, type ExpertParticipation, ExpertParticipationSchema, type RegisterOptions as ExpertRegisterOptions, ExpertRegistry$1 as ExpertRegistry, type ExpertResult, type ExpertResultSummary, type ExplorationEvent, ExplorationEventSchema, type ExplorationEventType, ExplorationEventTypeSchema, type ExploredFile, type ExpressionType, type ExtractSymbolsDeps, ExtractSymbolsInputSchema, FALLBACK_SCANNER_DATA, FactoryError, type FailureAnalysis$1 as FailureAnalysis, type AnalysisError as FailureAnalysisError, type FailureCategory, type FailurePattern$1 as FailurePattern, FailurePatternSchema, type FailureStatistics, type FailureType, type FallbackBehavior, type FallbackEntry, type FeedbackCollectorConfig, FeedbackCollectorConfigSchema, FeedbackIntegration, type FeedbackIntegrationConfig, type FeedbackLoopStats, type FeedbackMessage, type RoutingDecision as FeedbackRoutingDecision, RoutingDecisionSchema as FeedbackRoutingDecisionSchema, FileAuditStorage, type FileReference, FileReferenceSchema, type FileRelevance, type FindingVote, FindingVoteSchema, type Artifact$1 as FirewallArtifact, type PolicyContext$1 as FirewallPolicyContext, type PolicyDecision$2 as FirewallPolicyDecision, type PolicyRule$1 as FirewallPolicyRule, type FirewallResult, type Forest, type ForestConfig, ForestConfigSchema, type ForestId, type ForestPruningStrategy, ForestPruningStrategySchema, type ForestResult, ForestResultSchema, type ForestState, ForestStateSchema, type ForestStatistics, ForestStatisticsSchema, type FrameworkDetectionResult, type FullCapableProvider, GEMINI_MODELS, GEMINI_MODEL_ALIASES, GENERAL_PIPELINE_TEMPLATE, GeminiAdapter, type GeminiAdapterConfig, GeminiCliAdapter, type GeminiCliResponse, GeminiResponseParser, type GeneratedMcpConfig, type GeneratedTest, GeneratedTestSchema, type GitHubInput, GitHubProvider, GitHubReviewer, GitHubUserInfo, type GitHubUserMetadata, type GitHubUserRole, GitHubUserRoleSchema, GraphBuilder, type GraphCompileError, type GraphEdge, type GraphEdgeDisplay, type GraphEvent, type GraphExecuteOptions, type GraphExecutionAuditEvent, type GraphExecutionResult, type GraphNode, type GraphPipelineOptions, type GraphPipelineResult, type GraphState, type GraphStats, type GraphSummary, type GraphWorkflowInfo, HARM_EMOTIONAL_CATEGORY, HARM_FINANCIAL_CATEGORY, HARM_PHYSICAL_CATEGORY, type HarnessErrorCode, type HarnessExecutionConfig, type HarnessExecutionProgress, type HarnessExecutionResult, type HarnessExecutionState, HarnessExecutor, HarnessExecutorError, type HarnessProgressCallback, type HarnessValidationResult, HarnessVerifyAdapter, type HealthStatus, type HigherOrderVotingConfig, HigherOrderVotingConfigSchema, type HigherOrderVotingResult, HigherOrderVotingResultSchema, HigherOrderVotingStrategy, type HookError, HostileInputFirewall, type IAbTestTracker, type IAgent, type IAgentExecutor, type IArtifactStore, type IAuditLogger, type IAuditStorage, type IBenchmarkWriter, type ICTMConfig, ICTMConfigSchema, type ICTMInferenceResult, ICTMInferenceResultSchema, type ICheckpointStore, type ICircuitBreaker, type ICliAdapter, type ICliCircuitBreakerIntegration, type ICliDetectionCache, type ICliResponseParser, type ICollaborationProtocol, type ICompositeRouter, type IConsensusEngine, type ICorrelationTracker, type IDashboard, type IDashboardRenderer, type IEvaluationHarness, type IEventBus, type IFeedbackIntegration, type IHarnessExecutor, type IHigherOrderVoting, type ISwarmObserver as IInteractionObserver, type ILogger, type IMcpNotifier, type IMemoryBackend, type IModelAdapter, INSTRUCTION_SAFETY_CATEGORY, type IOrchestrationObserver, type IOrchestrator, type IOrchestratorFactory, type IOutcomeFeedback, type IOutcomeStorage, type IPatchApplicator, type IPipelineStage, type IPluginRegistry, type IPolicyEngine, type IPolicyFirewall, type IPreferenceDataStore, type IReportGenerator, type IRoutingMemory$1 as IRoutingMemory, type ISQLiteDatabase, type ISQLiteStatement, type ISandboxExecutor, type IScmProvider, type IScmReviewer, type IScmUserInfo, type ISkillDependencyGraph, type ISkillLoader, type ITaskTracker, type ITemplateRegistry, type ITestRunner, type ITokenCounter, type IVerifyAdapter, type IVotingProtocol, type IVotingStrategy, type IWeightedVoting, type IWorkflowEngine, type IWorkflowRouter, type ImprovementSuggestion, InMemoryAuditStorage, InMemoryCheckpointStore, InMemoryPreferenceStore, type IncompleteResult, type IncompleteSeverity, type IndependentSubset, IndependentSubsetSchema, type InjectionFlag, InjectionFlagSchema, type InputBinding, type InputDefinition, type InputDefinitionInput, type InputDefinitionOutput, InputDefinitionSchema, type InputType, InputTypeSchema, type InstanceEvaluationResult, type InteractionEdge, type InteractionGraph, type SwarmObserverConfig as InteractionObserverConfig, SwarmObserverConfigSchema as InteractionObserverConfigSchema, type InteractionOutcome, SwarmObserver as InteractionSwarmObserver, type InvalidVar, type IssueFilters, type IssueReference, IssueReferenceSchema, type IssueTriageDeps, type IssueTriageInput, IssueTriageInputSchema, type IssueTriageResponse, type IterationContext, type IterativeConsensusConfig, type IterativeConsensusResult, JsonDashboardRenderer, KNOWN_SECTIONS, type KnownSection, type LanguageMatrixEntry, type LatencyMetrics, LatencySampler, type LatencyScenario, type LeaderboardEntry, type LeaderboardSnapshot, type LearningProgress, type LibraryStatistics, type ListExpertsDeps, type ListExpertsInput, ListExpertsInputSchema, type ListExpertsResponse, type ListWorkflowsDeps, type ListWorkflowsInput, ListWorkflowsInputSchema, type ListWorkflowsResponse, type LoadedSkillSet, LoadedSkillSetSchema, LockedWriter, type LogContext, type LogEntry, type LogLevel, type LogPolicyAuditOpts, type LogRateLimitAuditOpts, type LogToolInvocationOpts, type LoggingConfig, LoggingConfigSchema, MANIPULATION_CATEGORY, MAX_DIFF_LENGTH, MAX_DYNAMIC_EXPERTS, MAX_EXECUTION_TIME_MS, MEM0_TARGETS, MIN_EXPERTS_FOR_PATTERN, MODEL_CAPABILITIES, type McpConfigOptions, type IExpertFactory as McpExpertFactory, type McpLogContext, type McpLogLevel, RateLimiter as McpRateLimiter, type RateLimiterConfig as McpRateLimiterConfig, type MemoryBenchmarkConfig, type MemoryEntry, MemoryError, MemoryImportance, type MemoryInfo, type MemoryMetadata, type MemoryPayload, type MemoryQueryInput, MemoryQueryInputSchema, MemoryStatsInputSchema, type MemoryWriteInput, MemoryWriteInputSchema, type MergePROptions, type Message, type MessagePayload, type MessageRole, ModelCapability, type ModelConfig, ModelConfigSchema, ModelError, type ModelMetrics, type ModelPerformanceSummary, type ModelPreference, ModelPreferenceSchema, type ModelPricing, type ModelSelection, ModelSelectionSchema, type ModelTiers, ModelTiersSchema, NOOP_NOTIFIER, NOOP_PROGRESS, NexusAgentExecutor, type NexusAgentExecutorConfig, NexusError, type NexusErrorOptions, NoAdapterError, type NodeHandler$1 as NodeHandler, type NodeHandlerFactory, type NodeHook, type NodeHookContext, type NodeId, type NodeResult, type NodeState, NodeStateSchema, OLLAMA_MODELS, OPENAI_MODELS, OPENAI_MODEL_ALIASES, OWVoting, type OWVotingOptions, type AgentState$1 as ObserverAgentState, type CostMetrics as ObserverCostMetrics, type RoutingDecision$2 as ObserverRoutingDecision, type SessionMetrics as ObserverSessionMetrics, type TokenUsage$1 as ObserverTokenUsage, type TrackedAgent as ObserverTrackedAgent, OllamaAdapter, type OllamaAdapterConfig, OpenAIAdapter, type OpenAIAdapterConfig, OpenCodeCliAdapter, type OperationBenchmark, type OperationComparison, type OrchestrateDeps, type OrchestrateInput, type OrchestrateInputLike, OrchestrateInputSchema, type OrchestrateOutput, OrchestrateOutputSchema, OrchestrationError, type OrchestrationObserverEvent, type OrchestrationObserverListener, type OrchestrationStats, OrchestrationUnavailableError, Orchestrator, type OrchestratorDefinition, OrchestratorError, type OrchestratorErrorCode, type OrchestratorExecuteOptions, OrchestratorFactory, type OrchestratorFactoryConfig, type OrchestratorOptions, OrchestratorOptionsSchema, type OrchestratorResult, type OrchestratorStep, type OrchestratorType, type OutcomeClass, type OutcomeFailureCategory, OutcomeFailureCategorySchema, OutcomeFeedbackCollector, type OutcomeProcessedCallback, type OutcomeRecord, type OutcomeStorageConfig, OutcomeStorageConfigSchema, OutcomeStorageError, OutcomeStore, type OutcomeStoreConfig, type TaskOutcome$2 as OutcomeTaskRecord, TaskOutcomeSchema$2 as OutcomeTaskSchema, PIPELINE_EVENT_TYPES, PIPELINE_STATE_KEYS, PIPELINE_TEMPLATES, PLUGIN_TRUST_LEVELS, PRIVACY_CATEGORY, PROMPT_DEFINITIONS, PR_REVIEW_ROLES, type PairwiseVotingHistory, PairwiseVotingHistorySchema, type ParallelOptions, ParallelProtocol, ParseError, type ParsedExpression, type ParsedSpec, ParsedSpecSchema, type ParsedTemplate, type PatchApplicationOptions, type PatchApplicationResult, PatchApplicator, PatchApplicatorError, type PatchErrorCode, type PatchFormat, type PatchValidationResult, type PathAccessRule, type PathScore, type PathScoreBreakdown, PathScoreBreakdownSchema, PathScoreSchema, type PathScoringOptions, type PatternMetrics, type PatternOutcome, type PatternType, type PerformanceMatrixEntry, type PerformanceSummary, type PersistentDistillerConfig, PersistentOutcomeStore, type PersistentOutcomeStoreConfig, PersistentStrategyDistiller, type PipelineBridgeResult, type PipelineCheckpointState, type PipelineContext, type PipelineEdge, type PipelineError, type PipelineEvent, type PipelineEventType, type PipelineExecuteOptions, type PipelineGraphResult, type PipelineMetrics, type PipelineMode, type PipelinePlugin, type PolicyMode as PipelinePolicyMode, type PolicyViolation as PipelinePolicyViolation, type PipelineResult, type PipelineRole, PipelineRunner, type PipelineStage$1 as PipelineStage, type PipelineStageData, type PipelineTask, type PipelineTemplate, type PipelineType, type PlanCompileOptions, type PlanContract, PlanContractSchema, type PluginManifest, PluginManifestSchema, PluginRegistry, type PluginRegistryOptions, type PluginTrustLevel, type ValidationError as PluginValidationError, type PolicyConfig, PolicyConfigSchema, type PolicyContext, type PolicyDecision, type PolicyDecisionAuditOpts, PolicyEngine, PolicyError, type PolicyEvalResult, type PolicyEvaluation, type PolicyEvaluatorOptions, PolicyFirewall, type PolicyFirewallConfig, type PolicyGateEvent, type PolicyGateSpec, PolicyGateSpecSchema, type PolicyMode$1 as PolicyMode, type PolicyRule, type PolicyViolation$1 as PolicyViolation, type PrReviewDecision, type PrReviewDeps, type PrReviewInput, PrReviewInputSchema, type PrReviewResponse, type PrReviewVote, type PreconditionConfig, type PreconditionOutcome, type PreconditionResult, PredictionWriteError, PredictionWriter, type PredictionWriterOptions, type PreferenceDataPoint, type PreferenceFilter, type PreferenceModelStats, type PreferencePrediction, type PreferenceRecord, PreferenceRouter, type PreferenceRouterConfig, PreferenceRouterConfigSchema, type PreferenceRoutingDecision, type PreferenceSignal, type PreferredCapability, type ProbeFn, type ProbeResult, type ProgressCallback, type PromptDefinition, type PromptMessage, type PromptRegistrationResult, ProofOfLearningStrategy, type Proposal, type ProposalId, ProposalSchema, type ProposalState, type ProposalStatus, ProposalStatusSchema, ProtocolFactory, type ProtocolOptions, type ProvenanceEntry, type ProviderConfig, ProviderConfigSchema, type PruneOptions, type PruneResult, PruningStrategy, type PythonValidation, type QaReviewResult, type QualityAttribute, type QualityMetrics, type QualityRequirement, type QualityScorer, type QualitySignals, QualitySignalsSchema, QueryFeatureExtractor, type QueryFeatures, type QueryOptions, type QueryTraceInput, QueryTraceInputSchema, REJECTION_CATEGORIES, REPO_COMPLEXITY, RESEARCH_PIPELINE_TEMPLATE, RISK_AWARENESS_CATEGORY, ROBUSTNESS_CATEGORY, ROLE_DEFAULT_TRUST, type RateLimitAuditOpts, RateLimitError, type RateLimitExceeded, type RateLimiterState, type RawHarnessOutput, type RawHarnessProgress, type RawInstanceResult, type RawTestResult, type ReasoningDepth, ReasoningDepthSchema, type ReasoningNode, type ReasoningNodeMetadata, ReasoningNodeMetadataSchema, ReasoningNodeSchema, type ReasoningStepType, ReasoningStepTypeSchema, type ReasoningTree, ReasoningTreeSchema, type RecordExecutionOptions, type RecordInteractionOptions, type RecordOutcomeParams, type RegistrationError, RegistryError, type RegistryImportInput, RegistryImportInputSchema, type RegistryRelationship, type RegistryScanner, type RegistryStats, type RegretAnalysis, type RejectionCategory, RejectionCategorySchema, type RepoAnalysis, type RepoAnalyzeDeps, type RepoAnalyzeInput, RepoAnalyzeInputSchema, type RepoSecurityPlan, type RepoSecurityPlanDeps, type RepoSecurityPlanInput, RepoSecurityPlanInputSchema, type ReportComparison, type ReportConfig, type ReportDetailLevel, type ReportFormat, ReportGenerationError, ReportGenerator, type ReportInstanceDetails, type ReportMetadata, type ReportMetrics, type ReportOptions, type ReportRepositoryBreakdown, type ReportSummary, type RepositoryMetrics, type ReputationAssessment, ReputationCache, type ReputationEvent, type ResearchAddDeps, type ResearchAddInput, ResearchAddInputSchema, type ResearchAddResponse, type ResearchAddSourceDeps, type ResearchAddSourceInput, ResearchAddSourceInputSchema, type ResearchAddSourceResponse, type ResearchAnalyzeDeps, type ResearchAnalyzeInput, ResearchAnalyzeInputSchema, type ResearchAnalyzeResponse, type ResearchCatalogReviewDeps, ResearchCatalogReviewInputSchema, type ResearchDiscoverDeps, type ResearchDiscoverInput, ResearchDiscoverInputSchema, type ResearchDiscoverResponse, type ResearchQueryDeps, type ResearchQueryInput, ResearchQueryInputSchema, type ResearchQueryResponse, type ResearchSynthesizeDeps, type ResearchSynthesizeInput, ResearchSynthesizeInputSchema, type ResearchTriggerConfig, type ResolutionStatus, type ResolveResult, type ResourceLimits, type ResourceMetrics, type ResourceStatistics, type ResourceUsage, type Result, ResultAggregator, type ResultConflict, type ResultSubmissionMessage, type ResultSummary, type RetryAttemptInfo, type RetryConfig, RetryExhaustedError, type ReviewCapableProvider, ReviewProtocol, type ReviewRequestMessage, type ReviewResponseMessage, ReviewResponseMessageSchema, RiskLevel, RiskLevelSchema, type RiskLevelType, type RoleSkillMapping, type RoundSummary, type RouterType, type DashboardConfig$1 as RoutingDashboardConfig, type RoutingDecisionRecord, RoutingMemoryError, type RoutingMemoryExport, type RoutingMemoryStats$1 as RoutingMemoryStats, type RoutingMetrics, RoutingMetricsCollector, type RoutingMetricsConfig, type RoutingRecord, type RuleStatus, type RulesSnapshot, RulesSnapshotSchema, type RunGraphWorkflowDeps, type RunGraphWorkflowInput, RunGraphWorkflowInputSchema, type RunGraphWorkflowResponse, type RunOptions, type RunProgress, type RunStatus, type RunWorkflowDeps, type RunWorkflowInput, RunWorkflowInputSchema, type RunnerConfig, type RunnerErrorCode, SAFETY_CATEGORIES, SAFETY_CATEGORY_MAP, PROVIDER_ENV_KEYS as SDK_PROVIDER_ENV_KEYS, DEFAULT_CAPABILITIES as SKILL_DEFAULT_CAPABILITIES, SKILL_PERMISSIONS, SQLiteOutcomeStorage, STAGE_TYPES, START, type SWEBenchCheckpoint, type SWEBenchConfig, type CostEstimate$1 as SWEBenchCostEstimate, type SWEBenchDatasetInfo, type SWEBenchEvalResult, type FailureAnalysis as SWEBenchFailureAnalysis, type FailurePattern as SWEBenchFailurePattern, type SWEBenchInstance, type SWEBenchPrediction, type SWEBenchRunResult, SWEBenchRunner, SWEBenchRunnerError, type SWEBenchSummary, type SWEBenchVariant, SWE_BENCH_DATASETS, SWE_BENCH_SYSTEM_PROMPT, type SafetyCategory, SafetyCategoryId, SafetyCategoryIdSchema, type SafetyCategoryIdType, SafetyCategorySchema, type SafetyTaxonomySummary, type SafetyTestCase, SafetyTestCaseSchema, type SandboxConfig, type SandboxExecutionOptions, type SandboxMode, type SandboxPolicy, type SandboxResult, type SanitizationEvent, type SanitizedInput, SanitizedInputSchema, type SanitizerConfig, SanitizerConfigSchema, type ScannerData, type ScannerEntry, type ScannerRecommendation, type ScannerRegistryManifest, type ScenarioError, type ScenarioResult, ScenarioResultSchema, type ScmComment, type ScmCommentDetail, ScmError, type ScmFileChange, type ScmIssue, type ScmIssueDetail, type PRStatus as ScmPRStatus, type ScmPlatform, type ScmPullRequest, type ScmPullRequestDetail, type ScmReviewDecision, type ScmToken, type ScmUserMetadata, type ScoreBreakdown, ScoreBreakdownSchema, SdkAdapter, type SdkAdapterConfig, type SdkProviderId, type SearchCodebaseDeps, SearchCodebaseInputSchema, type SecurityAnalysisResult, type AuditEvent as SecurityAuditEvent, type AuditQuery as SecurityAuditQuery, type SecurityCapability, type SecurityConfig, SecurityConfigSchema, SecurityError, type SecurityErrorCode, SecurityErrorCodeSchema, type SecurityEventAuditOpts, SecurityExpert, type SecurityExpertOptions, type SecurityFocusArea, type PolicyDecision$1 as SecurityPolicyDecision, SelectionError, type ExpertRegistry as SelectionExpertRegistry, type SelectionOptions, SelectionOptionsSchema, type SelectionResult$1 as SelectionResult, SelectionResultSchema, SequentialProtocol, type SerializedError, type ServerConfig, type ServerError, type ServerInstance, type SessionEvent, type SessionState, type SessionStatus, SessionStatusSchema, type SharedConclusion, SharedConclusionSchema, type SharedInsight, SharedInsightSchema, type SharedMemoryEntry, SharedMemoryStore, type SharedMemoryTag, SimpleAgent, SimpleMajorityStrategy, type Skill, AgentRoleSchema$2 as SkillAgentRoleSchema, type SkillAttestation, SkillAttestationSchema, type SkillCapabilities, SkillCapabilitiesSchema, type SkillCategory, type SkillComplexity, SkillComposer, type SkillComposerConfig, type SkillComposition, type SkillCompositionRequest, type SkillDependency, SkillDependencyGraph, SkillDependencySchema, type SkillDependencyType, SkillDependencyTypeSchema, type SkillExample, type SkillExecution, type SkillExecutionStatus, SkillLibrary, type SkillLibraryConfig, SkillLoader, type SkillLoaderConfig, SkillLoaderConfigSchema, type SkillLoaderError, type SkillLoaderErrorCode, SkillLoaderErrorSchema, type SkillMetrics, type SkillParameter, type SkillPermission, SkillPermissionSchema, type SkillProvenance, SkillProvenanceSchema, type SkillQuery, type SkillRBAC, SkillRBACSchema, type SkillSearchResult, type SkillSecurityError, SkillSecurityErrorSchema, type SkillStore, type SkillWithMetrics, type SortOptions, type SourceCitation, SourceCitationSchema, type SpanId, type SpecExecutionError, type SpecExecutionOptions, type SpecExecutionResult, type SpecParseError, type StageCompletedOptions, type StageContext, type StageFailedOptions, type StageOutput, type StageRegistry, type StageResult, StageResultSchema, type StageSpec, StageSpecSchema, type StageStartedOptions, type StageType, type StateChangeCallback, type StateChangePayload, type StateFieldSchema, type StateMachineOptions, type StateReducer, type StateSchema, type StateTransition, type StateTransitionEvent, type StatisticalOptions, type StatisticalSummary, type StatusUpdateMessage, type StepExecutionOptions, type StepExecutor$1 as StepExecutor, type StepExecutorDeps, type StepResult, type StepResultSummary, type StopReason, type StoredModelStats, type StoredReward, type StoredRoutingDecision, type StoredTaskOutcome, type StrategyAction, StrategyDistiller, StreamCancelledError, type StreamChunk, StreamController, StreamError, type StreamState, AgentRoleSchema$1 as StrictAgentRoleSchema, InputDefinitionSchema$1 as StrictInputDefinitionSchema, WorkflowDefinitionSchema$1 as StrictWorkflowDefinitionSchema, WorkflowStepSchema$1 as StrictWorkflowStepSchema, type StrippedElement, StrippedElementSchema, type SubTask, SubTaskSchema, SubprocessCliAdapter, type SubtaskNode, SubtaskNodeSchema, type SubtaskPriority, SubtaskPrioritySchema, type SubtaskStatus, SubtaskStatusSchema, type SubtaskType, SubtaskTypeSchema, SupermajorityStrategy, type SuspiciousSignal, SuspiciousSignalSchema, type AgentState as SwarmAgentState, type SwarmHealthMetrics$1 as SwarmHealthMetrics, type SwebenchValidation, type SycophancyIndicator, type SycophancyReport, type SynthesizedResult, SynthesizedResultSchema, type SystemComponent, TASK_STATUSES, TASK_TYPE_EXPERTS, TEMPLATE_CATEGORIES, TEMPLATE_KEYWORDS, TRINITY_ROLE_MAX_TOKENS, TRINITY_ROLE_PROMPTS, TRINITY_ROLE_TEMPERATURES, TRUST_TIER_NUMERIC, type Task$1 as Task, type TaskAnalysis, TaskAnalysisSchema, type TaskAssignmentMessage, type TaskClassification, type TaskCommitment, type TaskContext, type TaskContract, TaskContractSchema, type TaskDag, TaskDagSchema, type TaskId, type TaskOutcome$1 as TaskOutcome, type TaskOutcomeRecord, TaskOutcomeSchema$1 as TaskOutcomeSchema, type TaskPayload, type TaskProfileSummary, TaskQueue, type TaskRequirements, type TaskResult, TaskSchema, type TaskSignals, type TaskStatus, type TaskToolResponse, type TaskTypePerformance, type TemplateCategory, TemplateCategorySchema, type TemplateMetadata, TemplateMetadataSchema, TemplateRegistry, type TerminationReason, TerminationReasonSchema, type TestCaseResult, type TestFramework, type TestQuality, TestRunner, type TestRunnerConfig, TestRunnerError, type TestRunnerErrorCode, type TestStatus, type TestSuiteResult, type TestingAnalysisResult, TestingExpert, type TestingExpertOptions, type TextContent, TextDashboardRenderer, type ThinkerOutput, type ThresholdUpdateDetail, type ThroughputMetrics, type TimeConstraint, type TimePeriod, TimeoutError, type TimeoutProfile, type TimingStatistics, type TokenBenchmarkResult, TokenCountError, type TokenCountResult, TokenCounter, type TokenCounterConfig, TokenCounterProvider, type TokenMetrics, type TokenResolverConfig, type TokenStrategy, type TokenUsage, type TokenUsageBreakdown, type TokensByPhase, type ToolCompletedEvent, type ToolDefinition, type ToolInvocationAuditOpts, type ToolInvokedEvent, type ToolPayload, type ToolRegistrationOptions, type ToolRegistrationResult, type ToolResult, type ToolSet, ToolSetSchema, type TraceEvent, type TraceEventType, type TraceId, TraceLogger, type TraceLoggerOptions, type TrackedTask, type TransitionErrorCallback, type TreeId, type TreeState, TreeStateSchema, type TreeStatistics, TreeStatisticsSchema, type Trend, type TrendDetectedDetail, type TrinityConfig, TrinityConfigSchema, TrinityCoordinator, type TrinityExecuteOptions, type TrinityPhase, type TrinityPhaseResult, TrinityPhaseSchema, type TrinityResult, type TrinityRole, type TrinityRoleConfig, TrinityRoleSchema, TrinityStopReasonSchema, type TrustClassificationEvent, type TrustTier, TrustTierSchema, UnanimousStrategy, type UnknownVar, type Unsubscribe, type V2Config, type V2Mode, VERSION, VOTING_THRESHOLDS, ValidationDashboard, ValidationError$1 as ValidationError, type ValidationIssue, type VariantStats, type VerificationResult, type VerifierOutput, VerifierVerdictSchema, type VerifyResult, type VersionRequirements, type VersionStatus, type Violation, ViolationSchema, type Vote, type VoteCounts, type VoteDecision$1 as VoteDecision, VoteDecisionSchema$1 as VoteDecisionSchema, type VoteDecisionStatus, type VoteMessage, VoteMessageSchema, type VoteResult, VoteSchema, type VotingObservation, VotingObservationSchema, type VotingOutcome, VotingProtocol, type VotingProtocolConfig, VotingProtocolConfigSchema, type VotingProtocolResult, type VotingRound, type VotingRoundPhase, VotingRoundPhaseSchema, type VotingRoundStatus, VotingRoundStatusSchema, type VotingSession, VotingStrategyFactory, type Vulnerability, VulnerabilitySchema, VulnerabilitySeveritySchema, type WaveExecutionResult, type WaveResult, WaveScheduler, type WaveSchedulerConfig, type WaveTask, type WaveTaskExecutor, type WaveTaskResult, WeatherReportInputSchema, type WeightedAgentRecord, type WeightedConsensusResult, type WeightedVoteCounts, WeightedVoting, type WeightedVotingConfig, type WeightedVotingOptions, type WinLossAnalysis, type WithRetryOptions, type WorkChunk, type WorkerOutput, type WorkflowAdapterConfig, type WorkflowConfig, WorkflowConfigSchema, type WorkflowDefinition, type WorkflowDefinitionInput, type WorkflowDefinitionOutput, WorkflowDefinitionSchema, type WorkflowEngineFactoryConfig, WorkflowError, type WorkflowExecutionContext, type ExecutionPlan$1 as WorkflowExecutionPlan, type IExpertFactory$1 as WorkflowExpertFactory, type WorkflowInfo, WorkflowInputsSchema, WorkflowOrchestratorAdapter, type WorkflowPattern, type WorkflowRouterOptions, type RoutingDecision$1 as WorkflowRoutingDecision, type WorkflowStep$1 as WorkflowStep, type WorkflowStepInput, type WorkflowStepOutput, WorkflowStepSchema, type WorkflowTemplate, type WorkflowToolResult, actorFromContext, aggregatePrDecisions, aggregateResults, analysisToTaskContract, analyzeTask as analyzeDelegateTask, analyzeFailures, analyzeGitHubRepo, analyzeRepo, append, applyPatch, areStepsCompleted, assessReputation, bufferStream, buildDependencyGraph, buildDockerArgs, buildEnrichedPrompt, buildFinalResult, buildHarnessArgs, buildHarnessCommand, buildPendingResult, buildPlanFromAnalysis, buildPrReviewProposal, buildDependencyGraph$1 as buildSkillDependencyGraph, buildTimeoutResult, calculateDelay, calculateDistributionStats, calculateEstimatedRemaining, calculateMetrics, calculateMetricsTotals, calculateMinSampleSize, calculateRegret, calculateRepositoryMetrics, calculateRoutingDistribution, calculateTokenCost, calculateTokenMetrics, calculateVoteWeight, calculateWinLoss, canApplyPatch, canExecuteSkill, canInfluenceDecisions, canPipelineProceed, canProceed, cancelExecution, categorizeOutcomeError, categorizeOutcomeErrorMessage, checkForResearchTriggers, checkPermissionBoundary, checkPipelinePolicy, checkpointToResult, chunkByDirectory, classifyTask, classifyTrust, cleanupCheckpoint, clearRegistryCache, clearTemplateCache, calculateBackoffDelay as cliCalculateBackoffDelay, categorizeError as cliCategorizeError, closeServer, collectRealVotes, collectStream, compareBenchmarks, compareProportions, compilePipelineGraph, compilePlan, compileSpecToGraph, computeAdaptiveThresholds, computeOutcomeReward, concatStreams, connectTransport, containsExpressions, countActiveSessions, createAbTestTracker, createAgentPairKey, createAgentStages, createStepExecutor as createAgentStepExecutor, createAllAdapters, createArchitectureExpert, createAttestation, createAuditLogger, createAuditTrail, createBenchmarkMemory, createBenchmarkSummary, createCheckpoint, createCheckpointStore, createClaudeAdapter, createCliAdapter, createCliCircuitBreakerIntegration, createCliDetectionCache, createCliExecutor, createCodeExpert, createCollaborationSession, createCompositeRouter, createConsensusEngine, createContextItem, createCorePluginRegistry, createCorrelationTracker, createDashboard, createDashboardRenderer, createDecayOp, createDefaultDeps, createDefaultPolicyEngine, createDefaultPolicyFirewall, createDefaultRateLimiter, createDefaultRegistry, createDelegatePipeline, createDependencyError, createDevStageRegistry, createDocumentationExpert, createDryRunHandler, createEmptyContext, createEvaluationHarness, createEventBusBridge, createExecutionContext, createExecutionPlan, createExecutor, createExplorationPrompt, createFeedbackIntegration, createFeedbackSubscriber, createFullGitHubProvider, createGeminiAdapter, createGitHubAdapter, createGitHubProvider, createGraphAuditBridge, createHarnessExecutor, createHigherOrderVotingStrategy, createIncompleteResult, createInitialCostMetrics, createInitialProgress, createInitialSessionMetrics, createInitialTokenUsage, createInitializedWorkflowEngine, createInstancePrompt, createInteractionGraph, createSwarmObserver as createInteractionSwarmObserver, createIsolatedRegistry, createLogger, createMcpLogger, createMcpNotifier, createMockExecutor, createNexusExecutorFromEnv, createOWVoting, createOllamaAdapter, createOpenAIAdapter, createOrchestrator, createOrchestratorFactory, createOutcomeFeedbackCollector, createOutcomeStorage, createPatchApplicator, createPolicyContext, createPrediction, createPreferenceRouter, createProductionWorkflowEngine, createProgressAdapter, createPromotionOp, createProtocolFactory, createRateLimiter, createRealWorkflowEngine, createReportGenerator, createResultAggregator, createRetryPrompt, createRoutingDecision, createRoutingMetricsCollector, createRunner, createSandboxExecutor, createScmProvider, createSecurityError, createSecurityExpert, createServer, createSkillComposer, createSkillDependencyGraph, createSkillLibrary, createSkillLoader, createStateComparisonVerifier, createStateGuard, createStateMachine, createStrategyDistiller, createStrategyFactory, createStream, createSummaryPrompt, createTaskOutcome, createTaskQueue, createTemplateRegistry, createTestRunner, createTestingExpert, createTimer, createTokenCounter, createToolLogger, createTrackedAgent, createTrinityCoordinator, createValidatedExecutor, createValidatedHarness, createValidationDashboard, createValidator, createVariantRunner, createVotingProtocol, createWaveScheduler, createWeightedVoting, createWorkflowEngineDeps, createWorkflowEngineDepsAsync, createWorkflowRouter, curateContext, customReducer, decomposeSpec, defaultConfig, delegateInputToTaskContract, denyMutationsWithoutModeRule, detectFailurePatterns, detectLatencyPatterns, detectSuccessPatterns, detectTestFramework, detectTrend, determineFinalStatus, emitCorroborationEvent, emitExecutionComplete, emitGraphExecutionEvent, emitNodeResults, emitNodeStarted, emitPipelineStageEvent, emitPolicyEvent, emitReputationEvent, emitSanitizationEvent, emitStageCompleted, emitStageFailed, emitStageStarted, emitStateUpdated, emitStepCompleted, emitThresholdUpdate, emitTrendDetected, emitTrustEvent, err, estimateTokens as estimateBenchmarkTokens, estimateDifficulty, estimateTaskComplexity, estimateTokens$1 as estimateTokens, evaluatePolicy as evaluatePipelinePolicy, evaluatePolicy$2 as evaluatePolicy, evaluatePredictions, evaluatePolicy$1 as evaluateSecurityPolicy, executeCliRetryLoop, executeDelegatePipeline, executeExpert, executeGraph, executeHarness, executeInDocker, executeOrchestratePipeline, executeParallel, executeSpec, exportReport, extractApproach, extractBooleanField, extractExpressions, extractFilesFromResponse, extractHypothesis, extractModelName, extractNonErrorMessage, extractNumberField, extractPastSuccessRates, extractPatch, extractRepoFromInstanceId, extractRepoName, extractSessionId, extractStateValue, extractStringArrayField, extractStringField, filterAvailableModels, filterByRepo, filterBySeverity, filterByVersion, filterStream, findActiveSession, findMissingDependencies, flushPipelineMemory, formatAdapterLatencyReport, formatBenchmarkReport, formatBenchmarkResults, formatComparisonResults, formatCompileError, formatContextForPrompt, formatValidationResult, fromArray, generateATL, generateBenchmarkReport, generateMcpConfig, generateProposalId, generateReport, generateSecurityPlan, generateWeatherReport, getAllTestCases, getAvailabilityCache, getAvailableClis, getAvailableRoles, getBenchmarkEnvironment, getBuiltInTemplates, getBuiltInTemplatesPath, getBuiltInTemplatesWithMetadata, getCapabilitiesForRole, getCategoriesByMinRiskLevel, getCliForModelId, getCompletedInstanceIds, getCompletedSteps, getCorroborationRules, getCpuCores, getDatasetInfo, getDefaultAllowedTools, getDockerVersion, getEventBusStats, getExecutionDuration, getExecutionOrder, getExpertRegistry, getFallbackChain, getGraphRegistry, getGraphWorkflowList, getInstance, getKnownNexusVarNames, getMemoryInfo, getOutcomeStore, getPipelineArtifactStore, getPipelinePluginRegistry, getPolicy, getPolicyMode, getPythonVersion, getRecommendedRole, getReferencedSteps, getRegistryManifest, getRequiredTrustTier, getResultsFilePath, getSafetyCategory, getSafetyTaxonomySummary, getSkillSetForTask, getSkillsForTask, getStepResult, getSwarmObserver, getSwebenchVersion, getTemplate, getTestCasesByTags, getTimeoutForTask, getTimeoutForTaskAuto, getTokenEnvVars, getTopologicalOrder, getVariable, hasToken, ictmToExpertConfig, identifySessionsToRemove, inferICTM, initializeAgentSkills, initializeBuiltInTemplates, initializeEventBusBridge, isCancelled, isCliAvailable$1 as isCliAvailable, isRetryableError as isCliRetryableError, isErr, isIncompleteResult, isMutatingAction, isOk, isReadOnlyAction, isRetryableError$1 as isRetryableError, isCliAvailable as isSWEBenchCliAvailable, isStepCompleted, isZodError, listInstances, listTemplateIds, loadCheckpointState, loadDataset, loadTemplateFile, loadTemplatesFromDirectory, loadWorkflowFile, logPolicyAudit, logRateLimitAudit, logToolError, logToolInvocationAudit, logToolStart, logToolSuccess, logger, map, mapAuthorAssociation, mapErr, mapResolutionStatus, mapStateToPhase, mapTestStatus, mapVoteDecisionToPrDecision, meanConfidenceInterval, mergeStreams, normalizeRepoId, ok, orchestrateInputToTaskContract, overwrite, parseATL, parseAgentPairKey, parseExpression, parseJsonResults, parseProgressLine, parseSpec, parseStdoutResults, parseTemplateContent, parseTestResults, parseWorkflowJson, parseWorkflowYaml, proportionConfidenceInterval, quickRun, quickSelect, readJsonResults, readPredictions, recordOutcome, reduceStream, registerConsensusVoteTool, registerCorePlugins, registerCreateExpertTool, registerDelegateToModelTool, registerExecuteExpertTool, registerExecuteSpecTool, registerExpertsResource, registerExtractSymbolsTool, registerIssueTriageTool, registerListExpertsTool, registerListWorkflowsTool, registerMemoryQueryTool, registerMemoryStatsTool, registerMemoryWriteTool, registerModelsResource, registerOrchestrateTool, registerPrReviewTool, registerPrompts, registerQueryTraceTool, registerRegistryImportTool, registerRepoAnalyzeTool, registerRepoSecurityPlanTool, registerResearchAddSourceTool, registerResearchAddTool, registerResearchAnalyzeTool, registerResearchCatalogReviewTool, registerResearchDiscoverTool, registerResearchQueryTool, registerResearchResource, registerResearchSynthesizeTool, registerResources, registerRunGraphWorkflowTool, registerRunWorkflowTool, registerSearchCodebaseTool, registerTools, registerWeatherReportTool, requiresCitation, requiresCorroboration, resetAvailabilityCache, resetPipelineArtifactStore, resetPipelinePluginRegistry, resetRegistry, resolveExpression, resolveFallback, resolveInput, resolveScannerData, resolveStringExpressions, resolveToken, resolveV2Config, resolveWithFallbacks, resultToOutcome, runAdapterLatencyBenchmark, runAdaptiveOrchestrator, runAgentOnInstance, runBenchmark, runBenchmarkInstances, runBenchmarkParallel, runConsolidationBenchmark, runDevPipeline, runGraphPipeline, runIterativeConsensus, runMemoryBenchmarks, runOperationBenchmark, runPreconditions, runSingleInstance, runTests, runTokenBenchmark, runVerification, safePathsRule, safeValidateExpertConfig, sanitize, sanitizeInput, saveStageCheckpoint, scoreByHybrid, scoreByImportance, scoreByRecency, selectExperts, selectModel, setSwarmObserver, setVariable, sigmoidConfidence, skip, sleep, snapshotContext, sortByPriority, startStdioServer, storeStepResult, take, takeUntil, tapStream, taskContractToToolResponse, toSuiteResult, toolError, toolSuccess, toolSuccessStructured, transformHarnessOutput, transformHarnessProgress, transformInstanceResult, transformStream, transformTestResult, translateEvaluationResult, unwrap, unwrapOr, updateContext, validateAgentAction, validateCommand, validateCorroboration, validateDependencyGraph, validateDiskSpace, validateDocker, validateEnvironment, validateEvaluationCriterion, validateExpertConfig, validateExpressions, validateICTM, validateNexusEnv, validatePatch, validatePatchFormat, validatePrediction, validatePredictionsFile, validatePython, validateRequiredInputs, validateSafetyCategory, validateScenario, validateCapabilities as validateSkillCapabilities, validateSkillExecution, validateSkillProvenance, validateRBAC as validateSkillRBAC, validateSwebench, validateTestCase, validateToolInput, validateWorkflow, validateWorkflowDependencies, withLogging, withRetry, withRetryWrapper, withTimeout, writePredictions };
31214
+ export { ALLOWED_COMMANDS, ARTIFACT_TYPES, AUDIT_PIPELINE_TEMPLATE, AbTestTracker, type ActionContext, type ActionRecord, type ActionValidationResult, type ActivationOptions, type ActivationStrategy, ActivationStrategySchema, type ActivityItem, type AdapterConfig, AdapterConfigSchema, type AdapterCreator, AdapterFactory, type AdapterLatencyConfig, type AdapterLatencyResult, AdapterModelError, RateLimiter$1 as AdapterRateLimiter, type RateLimiterConfig$1 as AdapterRateLimiterConfig, type RegisterOptions$1 as AdapterRegisterOptions, type AdapterScenarioResult, type AdaptiveOrchestratorOptions, type AdaptiveOrchestratorResult, type AdaptiveThresholdResult, type AgentAction, AgentActionSchema, type AgentActionType, AgentCapability, type AgentCluster, AgentError, type AgentEvent, AgentEventSchema, type AgentExecutorConfig, type AgentFinding, AgentFindingSchema, type AgentId, type AgentMessage, AgentMessageSchema, type AgentMessageType, type AgentPairKey, type AgentPerformance, AgentPerformanceSchema, type AgentResponse, type AgentRole, AgentRoleSchema, type AgentRoleType, type AgentState$2 as AgentState, AgentStateMachine, type AgentStatus, StepExecutor as AgentStepExecutor, type AgentVoteResult, type AgentVoteSummary, type AggregatedResult, type AggregationMetadata, type AggregationStrategy, type AggregatorInput, type AggregatorOptions, type ApiDocumentation, type ApiEndpoint, type ApiType, type AppConfig, AppConfigSchema, type ArchitectureAnalysisResult, type ArchitectureDecision, ArchitectureExpert, type ArchitectureExpertOptions, type ArchitecturePattern, type ArchitectureStyle, type Artifact, type ArtifactFilter, type ArtifactRef, ArtifactRefSchema, ArtifactStore, type ArtifactStoreOptions, type ArtifactType, type AuditActor, AuditActorSchema, type AuditCategory, AuditCategorySchema, AuditError, type AuditEvent$1 as AuditEvent, type AuditEventInput, AuditEventInputSchema, AuditEventSchema, type AuditHandlerConfig, type AuditLogConfig, AuditLogConfigSchema, AuditLogger, type AuditOutcome, AuditOutcomeSchema, type AuditQueryCriteria, AuditQueryCriteriaSchema, type AuditResource, AuditResourceSchema, type AuditSeverity, AuditSeveritySchema, AuditTrail, type AuthorizationMethod, AuthorizationMethodSchema, AvailabilityCache, type AvailabilityCacheConfig, BIAS_CATEGORY, BUILT_IN_EXPERTS, BUILT_IN_RULES, BUILT_IN_TEMPLATES, BaseAdapter, type BaseAdapterConfig, type BaseAdapterOptions, BaseAgent, type BaseAgentOptions, BaseAgentOptionsSchema, BaseCliAdapter, type BaseMcpToolDeps, type BenchmarkAdapter, type BenchmarkComparison, type BenchmarkConfig, type BenchmarkEnvironment, type BenchmarkOperation, type BenchmarkOrchestratorOptions, type BenchmarkReport, type BenchmarkRunContext, type BenchmarkRunSummary, type BenchmarkSuiteResult, type BenchmarkSummary, type BenchmarkThresholds, type BestSolution, BestSolutionSchema, type BottleneckInfo, type BuiltInExpertType, BuiltInExpertTypeSchema, CHECKPOINT_SCHEMA_VERSION, CLAUDE_MODELS, CLAUDE_MODEL_ALIASES, DEFAULT_CACHE_CONFIG as CLI_DEFAULT_CACHE_CONFIG, DEFAULT_CAPABILITIES$1 as CLI_DEFAULT_CAPABILITIES, DEFAULT_COMPOSITE_CONFIG as CLI_DEFAULT_COMPOSITE_CONFIG, CLI_TIMEOUT_PROFILES, CLI_VERSION_REQUIREMENTS, COMPLEXITY_ORDER, CORE_PLUGINS, type CapabilityProfile, type CapacityStatus, type Checkpoint, type PipelineStage as CheckpointPipelineStage, type CheckpointSummary, type FailureCategory as CircuitBreakerFailureCategory, type CircuitProtectedResult, type CircuitState, type ClaimValidation, type ClassifyInput, type ClassifyResult, ClaudeAdapter, type ClaudeAdapterConfig, ClaudeCliAdapter, type ClaudeCliResponse, ClaudeResponseParser, type CliAdapterConfig, type CacheStats as CliCacheStats, type CapabilityProfile$1 as CliCapabilityProfile, type CliCircuitBreakerConfig, CliCircuitBreakerIntegration, type CliCircuitHealthStatus, CliDetectionCache, type CliDetectionCacheConfig, CliDetectionCacheConfigSchema, type CliError, type CliErrorCode, type ExecutionOptions$1 as CliExecutionOptions, type CliHealthResult, type ModelInfo as CliModelInfo, type CliName, type CliResponse, type CliRetryLoopConfig, type CliRetryResult, type CliTask, type TaskComplexity as CliTaskComplexity, type TokenUsage$2 as CliTokenUsage, type CliTransport, type CodeAnalysisResult, type CodeChange, CodeChangeSchema, CodeExpert, type CodeExpertOptions, CodexCliAdapter, type CodexCliResponse, CodexMcpAdapter, CodexResponseParser, type CollaborationConfig, CollaborationConfigSchema, type CollaborationMessage, type CollaborationPattern, CollaborationPatternSchema, type CollaborationResult, CollaborationSession, type CollaborationSessionOptions, type CollectRealVotesOptions, CompactDashboardRenderer, type ComparisonResult, type CompileOptions, type CompileResult$2 as CompileResult, type CompiledGraph, type CompiledPipeline, type CompletionRequest, type CompletionResponse, type ComplexityLevel, ComplexityLevelSchema, type ComplianceStatus, CompositeRouter, type CompositeRouterConfig, CompositeRouterConfigSchema, type CompositeRouterStats, type CompositeRoutingDecision, CompositeRoutingError, type CompositionStep, type CompositionValidation, type ComputedReward, type ConfidenceInterval, ConfigError, type ExpertConfig$1 as ConfigExpertConfig, ExpertConfigSchema$1 as ConfigExpertConfigSchema, type ExpertDefinition$1 as ConfigExpertDefinition, ExpertDefinitionSchema as ConfigExpertDefinitionSchema, type Conflict, type ConflictResolver, type ConflictWarning, type ConsensusAlgorithm, ConsensusAlgorithmSchema, ConsensusEngine, type ConsensusEngineConfig, ConsensusEngineConfigSchema, ConsensusError, type ConsensusMetrics, ConsensusMetricsSchema, ConsensusProtocol, type ConsensusResult, ConsensusResultSchema, type ConsensusStats, type ConsensusVoteDeps, type ConsensusVoteInput, ConsensusVoteInputSchema, type ConsensusVoteResponse, type ConsolidatedFinding, type ConsolidationBenchmarkResult, type ConsolidationOperation, type ContentBlock, ContentPriority, type ContextBudget, ContextBudgetSchema, type ContextFilter, ContextFilterSchema, type ContextItem, ContextManager, type ContextManagerConfig, ContextManagerConfigSchema, type ContextPruneStrategy, ContextPruneStrategySchema, ContextPruner, type ContextPrunerConfig, ContextPrunerConfigSchema, type ContextStats, type ContributionScore, type CorePluginRegistrationResult, type CorrelationCoefficient, CorrelationCoefficientSchema, type CorrelationMatrix, CorrelationTracker, type CorrelationTrackerStats, CorrelationTrackerStatsSchema, type CorroborationEvent, type CorroborationResult, type CorroborationRule, type CostEstimate, CostEstimateSchema, type CoverageAnalysis, type CoverageMetrics, CoverageMetricsSchema, type CreateExecutionContextOptions, type CreateExpertDeps, type CreateExpertInput, CreateExpertInputSchema, type CreateExpertOptions, type CreateExpertResponse, type CreateForestInput, type CreateNodeInput, type CreatePROptions, type CreateScmProviderConfig, type CreateSkillOptions, type CreateStreamOptions, type CreateTreeInput, type CriterionFailure, type CriterionResult, CriterionResultSchema, CriterionType, CriterionTypeSchema, type CriterionTypeType, type CrossTreeInfo, CrossTreeInfoSchema, type CrossTreeStrategy, CrossTreeStrategySchema, type CuratedContextItem, type CurationResult, DECEPTION_CATEGORY, DEFAULT_ACTIVATION_OPTIONS, DEFAULT_ADAPTER_LATENCY_CONFIG, DEFAULT_BENCHMARK_CONFIG, DEFAULT_BUDGET, DEFAULT_COLLECT_STREAM_MAX_CHUNKS, DEFAULT_COMPOSER_CONFIG, DEFAULT_CONSENSUS_CONFIG, DEFAULT_DASHBOARD_CONFIG, DEFAULT_DASHBOARD_RENDER_OPTIONS, DEFAULT_DISTILLER_CONFIG, DEFAULT_EXECUTION_TIME_MS, DEFAULT_FEEDBACK_COLLECTOR_CONFIG, DEFAULT_FEEDBACK_INTEGRATION_CONFIG, DEFAULT_FOREST_CONFIG, DEFAULT_HIGHER_ORDER_CONFIG, DEFAULT_MAX_RETRIES, DEFAULT_MEMORY_BENCHMARK_CONFIG, DEFAULT_OUTCOME_STORAGE_CONFIG, DEFAULT_PATH_SCORING_OPTIONS, DEFAULT_PERMISSIONS, DEFAULT_POLICIES, DEFAULT_PREFERENCE_ROUTER_CONFIG, DEFAULT_RBAC, DEFAULT_RESOURCE_LIMITS, DEFAULT_RETRY_CONFIG, DEFAULT_ROLE_MAPPINGS, DEFAULT_SCENARIOS, DEFAULT_SKILL_LIBRARY_CONFIG, DEFAULT_SKILL_LOADER_CONFIG, DEFAULT_STATISTICAL_OPTIONS, DEFAULT_SWARM_OBSERVER_CONFIG, DEFAULT_TIMEOUTS, DEFAULT_TIMEOUT_PROFILE, DEFAULT_TRINITY_CONFIG, DEFAULT_VOTING_PROTOCOL_CONFIG, DEFAULT_WAVE_CONFIG, DEFAULT_WEIGHTED_VOTING_CONFIG, DEV_PIPELINE_TEMPLATE, type DagEdge, DagEdgeSchema, Dashboard, type DashboardConfig, DashboardConfigSchema, type DashboardFilter, type DashboardFormat, type DashboardHealthIndicators, type DashboardOutcome, type DashboardRenderOptions, type DashboardSnapshot, type DashboardSummary, type DashboardUpdateOptions, type DecomposeError, type DelegateDeps, type DelegateInput, type DelegateInputLike, DelegateInputSchema, type DelegateOutput, DelegateOutputSchema, type DependencyError, type DependencyErrorCode, DependencyErrorCodeSchema, DependencyErrorSchema, DependencyGraph, type DependencyStructure, type DevPipelineOptions, type DevPipelineResult, type DevPipelineStages, DirectedInteractionGraph, type DistilledRule, type DistillerConfig, type DistillerStats, type DistributionStats, DocumentationExpert, type DocumentationExpertOptions, type DocumentationResult, type DocumentationSection, type DryRunResult, type DynamicExpert, DynamicExpertManager, type DynamicExpertSpec, END, EXPERT_CAPABILITIES, EXPERT_DEFAULT_CAPABILITIES, EXPERT_DEFAULT_TEMPERATURES, EXPERT_TYPE_TO_ROLE, type EnvValidationResult, ErrorCode, type ErrorPayload, type EvaluationCriterion, EvaluationCriterionSchema, EventBus, type EventBusBridgeOptions, type EventBusBridgeResult, type EventBusOptions, type EventFilter, type EventHandler, type EventPayload, type EventType, type ExecuteExpertDeps, type ExecuteExpertInput, ExecuteExpertInputSchema, type ExecuteExpertResponse, type ExecuteSpecDeps, type ExecuteSpecInput, ExecuteSpecInputSchema, type ExecutionContext$1 as ExecutionContext, type ExecutionMode, type ExecutionPhase$1 as ExecutionPhase, type ExecutionPlan$2 as ExecutionPlan, type ExecutionStage, ExpectedOutcome, ExpectedOutcomeSchema, type ExpectedOutcomeType, type ExperienceRecord, type ExperienceStep, type ExperimentDefinition, type ExperimentExport, type ExperimentOutcome, type ExperimentResult, type ExperimentStatus, type ExperimentSummary, type ExperimentVariant, Expert, type ExpertAssignment, ExpertAssignmentSchema, type ExpertBridgeResult, ExpertCollaborationPattern, type ExpertCollaborationPatternType, type ExpertConfig, ExpertConfigSchema, type ExpertDefinition, type ExpertDomain, ExpertDomainSchema, ExpertFactory, ExpertFactoryAdapter, type ExpertInfo, type ExpertMatch, ExpertMatchSchema, type ExpertOptions, ExpertOptionsSchema, type ExpertOutput, ExpertOutputSchema, type ExpertParticipation, ExpertParticipationSchema, type RegisterOptions as ExpertRegisterOptions, ExpertRegistry$1 as ExpertRegistry, type ExpertResult, type ExpertResultSummary, type ExplorationEvent, ExplorationEventSchema, type ExplorationEventType, ExplorationEventTypeSchema, type ExpressionType, type ExtractSymbolsDeps, ExtractSymbolsInputSchema, FALLBACK_SCANNER_DATA, FactoryError, type FailureAnalysis, type AnalysisError as FailureAnalysisError, type FailurePattern, FailurePatternSchema, type FailureType, type FallbackBehavior, type FallbackEntry, type FeedbackCollectorConfig, FeedbackCollectorConfigSchema, FeedbackIntegration, type FeedbackIntegrationConfig, type FeedbackLoopStats, type FeedbackMessage, type RoutingDecision as FeedbackRoutingDecision, RoutingDecisionSchema as FeedbackRoutingDecisionSchema, FileAuditStorage, type FileReference, FileReferenceSchema, type FindingVote, FindingVoteSchema, type Artifact$1 as FirewallArtifact, type PolicyContext$1 as FirewallPolicyContext, type PolicyDecision$2 as FirewallPolicyDecision, type PolicyRule$1 as FirewallPolicyRule, type FirewallResult, type Forest, type ForestConfig, ForestConfigSchema, type ForestId, type ForestPruningStrategy, ForestPruningStrategySchema, type ForestResult, ForestResultSchema, type ForestState, ForestStateSchema, type ForestStatistics, ForestStatisticsSchema, type FullCapableProvider, GEMINI_MODELS, GEMINI_MODEL_ALIASES, GENERAL_PIPELINE_TEMPLATE, GeminiAdapter, type GeminiAdapterConfig, GeminiCliAdapter, type GeminiCliResponse, GeminiResponseParser, type GeneratedTest, GeneratedTestSchema, type GitHubInput, GitHubProvider, GitHubReviewer, GitHubUserInfo, type GitHubUserMetadata, type GitHubUserRole, GitHubUserRoleSchema, GraphBuilder, type GraphCompileError, type GraphEdge, type GraphEdgeDisplay, type GraphEvent, type GraphExecuteOptions, type GraphExecutionAuditEvent, type GraphExecutionResult, type GraphNode, type GraphPipelineOptions, type GraphPipelineResult, type GraphState, type GraphStats, type GraphSummary, type GraphWorkflowInfo, HARM_EMOTIONAL_CATEGORY, HARM_FINANCIAL_CATEGORY, HARM_PHYSICAL_CATEGORY, type HealthStatus, type HigherOrderVotingConfig, HigherOrderVotingConfigSchema, type HigherOrderVotingResult, HigherOrderVotingResultSchema, HigherOrderVotingStrategy, type HookError, HostileInputFirewall, type IAbTestTracker, type IAgent, type IArtifactStore, type IAuditLogger, type IAuditStorage, type ICTMConfig, ICTMConfigSchema, type ICTMInferenceResult, ICTMInferenceResultSchema, type ICheckpointStore, type ICircuitBreaker, type ICliAdapter, type ICliCircuitBreakerIntegration, type ICliDetectionCache, type ICliResponseParser, type ICollaborationProtocol, type ICompositeRouter, type IConsensusEngine, type ICorrelationTracker, type IDashboard, type IDashboardRenderer, type IEventBus, type IFeedbackIntegration, type IHigherOrderVoting, type ISwarmObserver as IInteractionObserver, type ILogger, type IMcpNotifier, type IMemoryBackend, type IModelAdapter, INSTRUCTION_SAFETY_CATEGORY, type IOrchestrationObserver, type IOrchestrator, type IOrchestratorFactory, type IOutcomeFeedback, type IOutcomeStorage, type IPipelineStage, type IPluginRegistry, type IPolicyEngine, type IPolicyFirewall, type IPreferenceDataStore, type IRoutingMemory$1 as IRoutingMemory, type ISQLiteDatabase, type ISQLiteStatement, type ISandboxExecutor, type IScmProvider, type IScmReviewer, type IScmUserInfo, type ISkillDependencyGraph, type ISkillLoader, type ITaskTracker, type ITemplateRegistry, type ITokenCounter, type IVotingProtocol, type IVotingStrategy, type IWeightedVoting, type IWorkflowEngine, type IWorkflowRouter, type ImprovementSuggestion, InMemoryAuditStorage, InMemoryCheckpointStore, InMemoryPreferenceStore, type IncompleteResult, type IncompleteSeverity, type IndependentSubset, IndependentSubsetSchema, type InjectionFlag, InjectionFlagSchema, type InputBinding, type InputDefinition, type InputDefinitionInput, type InputDefinitionOutput, InputDefinitionSchema, type InputType, InputTypeSchema, type InteractionEdge, type InteractionGraph, type SwarmObserverConfig as InteractionObserverConfig, SwarmObserverConfigSchema as InteractionObserverConfigSchema, type InteractionOutcome, SwarmObserver as InteractionSwarmObserver, type InvalidVar, type IssueFilters, type IssueReference, IssueReferenceSchema, type IssueTriageDeps, type IssueTriageInput, IssueTriageInputSchema, type IssueTriageResponse, type IterativeConsensusConfig, type IterativeConsensusResult, JsonDashboardRenderer, KNOWN_SECTIONS, type KnownSection, type LanguageMatrixEntry, type LatencyMetrics, LatencySampler, type LatencyScenario, type LearningProgress, type LibraryStatistics, type ListExpertsDeps, type ListExpertsInput, ListExpertsInputSchema, type ListExpertsResponse, type ListWorkflowsDeps, type ListWorkflowsInput, ListWorkflowsInputSchema, type ListWorkflowsResponse, type LoadedSkillSet, LoadedSkillSetSchema, type LogContext, type LogEntry, type LogLevel, type LogPolicyAuditOpts, type LogRateLimitAuditOpts, type LogToolInvocationOpts, type LoggingConfig, LoggingConfigSchema, MANIPULATION_CATEGORY, MAX_DIFF_LENGTH, MAX_DYNAMIC_EXPERTS, MAX_EXECUTION_TIME_MS, MEM0_TARGETS, MIN_EXPERTS_FOR_PATTERN, MODEL_CAPABILITIES, type IExpertFactory as McpExpertFactory, type McpLogContext, type McpLogLevel, RateLimiter as McpRateLimiter, type RateLimiterConfig as McpRateLimiterConfig, type MemoryBenchmarkConfig, type MemoryEntry, MemoryError, MemoryImportance, type MemoryMetadata, type MemoryPayload, type MemoryQueryInput, MemoryQueryInputSchema, MemoryStatsInputSchema, type MemoryWriteInput, MemoryWriteInputSchema, type MergePROptions, type Message, type MessagePayload, type MessageRole, ModelCapability, type ModelConfig, ModelConfigSchema, ModelError, type ModelMetrics, type ModelPerformanceSummary, type ModelPreference, ModelPreferenceSchema, type ModelSelection, ModelSelectionSchema, type ModelTiers, ModelTiersSchema, NOOP_NOTIFIER, NOOP_PROGRESS, NexusError, type NexusErrorOptions, NoAdapterError, type NodeHandler$1 as NodeHandler, type NodeHandlerFactory, type NodeHook, type NodeHookContext, type NodeId, type NodeResult, type NodeState, NodeStateSchema, OLLAMA_MODELS, OPENAI_MODELS, OPENAI_MODEL_ALIASES, OWVoting, type OWVotingOptions, type AgentState$1 as ObserverAgentState, type CostMetrics as ObserverCostMetrics, type RoutingDecision$2 as ObserverRoutingDecision, type SessionMetrics as ObserverSessionMetrics, type TokenUsage$1 as ObserverTokenUsage, type TrackedAgent as ObserverTrackedAgent, OllamaAdapter, type OllamaAdapterConfig, OpenAIAdapter, type OpenAIAdapterConfig, OpenCodeCliAdapter, type OperationBenchmark, type OperationComparison, type OrchestrateDeps, type OrchestrateInput, type OrchestrateInputLike, OrchestrateInputSchema, type OrchestrateOutput, OrchestrateOutputSchema, OrchestrationError, type OrchestrationObserverEvent, type OrchestrationObserverListener, type OrchestrationStats, OrchestrationUnavailableError, Orchestrator, type OrchestratorDefinition, OrchestratorError, type OrchestratorErrorCode, type OrchestratorExecuteOptions, OrchestratorFactory, type OrchestratorFactoryConfig, type OrchestratorOptions, OrchestratorOptionsSchema, type OrchestratorResult, type OrchestratorStep, type OrchestratorType, type OutcomeClass, type OutcomeFailureCategory, OutcomeFailureCategorySchema, OutcomeFeedbackCollector, type OutcomeProcessedCallback, type OutcomeRecord, type OutcomeStorageConfig, OutcomeStorageConfigSchema, OutcomeStorageError, OutcomeStore, type OutcomeStoreConfig, type TaskOutcome$2 as OutcomeTaskRecord, TaskOutcomeSchema$2 as OutcomeTaskSchema, PIPELINE_EVENT_TYPES, PIPELINE_STATE_KEYS, PIPELINE_TEMPLATES, PLUGIN_TRUST_LEVELS, PRIVACY_CATEGORY, PROMPT_DEFINITIONS, PR_REVIEW_ROLES, type PairwiseVotingHistory, PairwiseVotingHistorySchema, type ParallelOptions, ParallelProtocol, ParseError, type ParsedExpression, type ParsedSpec, ParsedSpecSchema, type ParsedTemplate, type PathAccessRule, type PathScore, type PathScoreBreakdown, PathScoreBreakdownSchema, PathScoreSchema, type PathScoringOptions, type PatternMetrics, type PatternOutcome, type PatternType, type PerformanceMatrixEntry, type PerformanceSummary, type PersistentDistillerConfig, PersistentOutcomeStore, type PersistentOutcomeStoreConfig, PersistentStrategyDistiller, type PipelineBridgeResult, type PipelineCheckpointState, type PipelineContext, type PipelineEdge, type PipelineError, type PipelineEvent, type PipelineEventType, type PipelineExecuteOptions, type PipelineGraphResult, type PipelineMetrics, type PipelineMode, type PipelinePlugin, type PolicyMode as PipelinePolicyMode, type PolicyViolation as PipelinePolicyViolation, type PipelineResult, type PipelineRole, PipelineRunner, type PipelineStage$1 as PipelineStage, type PipelineStageData, type PipelineTask, type PipelineTemplate, type PipelineType, type PlanCompileOptions, type PlanContract, PlanContractSchema, type PluginManifest, PluginManifestSchema, PluginRegistry, type PluginRegistryOptions, type PluginTrustLevel, type ValidationError as PluginValidationError, type PolicyConfig, PolicyConfigSchema, type PolicyContext, type PolicyDecision, type PolicyDecisionAuditOpts, PolicyEngine, PolicyError, type PolicyEvalResult, type PolicyEvaluation, type PolicyEvaluatorOptions, PolicyFirewall, type PolicyFirewallConfig, type PolicyGateEvent, type PolicyGateSpec, PolicyGateSpecSchema, type PolicyMode$1 as PolicyMode, type PolicyRule, type PolicyViolation$1 as PolicyViolation, type PrReviewDecision, type PrReviewDeps, type PrReviewInput, PrReviewInputSchema, type PrReviewResponse, type PrReviewVote, type PreconditionConfig, type PreconditionOutcome, type PreconditionResult, type PreferenceDataPoint, type PreferenceFilter, type PreferenceModelStats, type PreferencePrediction, type PreferenceRecord, PreferenceRouter, type PreferenceRouterConfig, PreferenceRouterConfigSchema, type PreferenceRoutingDecision, type PreferenceSignal, type PreferredCapability, type ProbeFn, type ProbeResult, type PromptDefinition, type PromptMessage, type PromptRegistrationResult, ProofOfLearningStrategy, type Proposal, type ProposalId, ProposalSchema, type ProposalState, type ProposalStatus, ProposalStatusSchema, ProtocolFactory, type ProtocolOptions, type ProvenanceEntry, type ProviderConfig, ProviderConfigSchema, type PruneOptions, type PruneResult, PruningStrategy, type QaReviewResult, type QualityAttribute, type QualityMetrics, type QualityRequirement, type QualityScorer, type QualitySignals, QualitySignalsSchema, QueryFeatureExtractor, type QueryFeatures, type QueryOptions, type QueryTraceInput, QueryTraceInputSchema, REJECTION_CATEGORIES, RESEARCH_PIPELINE_TEMPLATE, RISK_AWARENESS_CATEGORY, ROBUSTNESS_CATEGORY, ROLE_DEFAULT_TRUST, type RateLimitAuditOpts, RateLimitError, type RateLimitExceeded, type RateLimiterState, type ReasoningDepth, ReasoningDepthSchema, type ReasoningNode, type ReasoningNodeMetadata, ReasoningNodeMetadataSchema, ReasoningNodeSchema, type ReasoningStepType, ReasoningStepTypeSchema, type ReasoningTree, ReasoningTreeSchema, type RecordExecutionOptions, type RecordInteractionOptions, type RecordOutcomeParams, type RegistrationError, RegistryError, type RegistryImportInput, RegistryImportInputSchema, type RegistryRelationship, type RegistryScanner, type RegistryStats, type RegretAnalysis, type RejectionCategory, RejectionCategorySchema, type RepoAnalysis, type RepoAnalyzeDeps, type RepoAnalyzeInput, RepoAnalyzeInputSchema, type RepoSecurityPlan, type RepoSecurityPlanDeps, type RepoSecurityPlanInput, RepoSecurityPlanInputSchema, type ReportOptions, type ReputationAssessment, ReputationCache, type ReputationEvent, type ResearchAddDeps, type ResearchAddInput, ResearchAddInputSchema, type ResearchAddResponse, type ResearchAddSourceDeps, type ResearchAddSourceInput, ResearchAddSourceInputSchema, type ResearchAddSourceResponse, type ResearchAnalyzeDeps, type ResearchAnalyzeInput, ResearchAnalyzeInputSchema, type ResearchAnalyzeResponse, type ResearchCatalogReviewDeps, ResearchCatalogReviewInputSchema, type ResearchDiscoverDeps, type ResearchDiscoverInput, ResearchDiscoverInputSchema, type ResearchDiscoverResponse, type ResearchQueryDeps, type ResearchQueryInput, ResearchQueryInputSchema, type ResearchQueryResponse, type ResearchSynthesizeDeps, type ResearchSynthesizeInput, ResearchSynthesizeInputSchema, type ResearchTriggerConfig, type ResolveResult, type ResourceLimits, type ResourceMetrics, type ResourceUsage, type Result, ResultAggregator, type ResultConflict, type ResultSubmissionMessage, type ResultSummary, type RetryAttemptInfo, type RetryConfig, RetryExhaustedError, type ReviewCapableProvider, ReviewProtocol, type ReviewRequestMessage, type ReviewResponseMessage, ReviewResponseMessageSchema, RiskLevel, RiskLevelSchema, type RiskLevelType, type RoleSkillMapping, type RoundSummary, type RouterType, type DashboardConfig$1 as RoutingDashboardConfig, type RoutingDecisionRecord, RoutingMemoryError, type RoutingMemoryExport, type RoutingMemoryStats$1 as RoutingMemoryStats, type RoutingMetrics, RoutingMetricsCollector, type RoutingMetricsConfig, type RoutingRecord, type RuleStatus, type RulesSnapshot, RulesSnapshotSchema, type RunGraphWorkflowDeps, type RunGraphWorkflowInput, RunGraphWorkflowInputSchema, type RunGraphWorkflowResponse, type RunWorkflowDeps, type RunWorkflowInput, RunWorkflowInputSchema, SAFETY_CATEGORIES, SAFETY_CATEGORY_MAP, PROVIDER_ENV_KEYS as SDK_PROVIDER_ENV_KEYS, DEFAULT_CAPABILITIES as SKILL_DEFAULT_CAPABILITIES, SKILL_PERMISSIONS, SQLiteOutcomeStorage, STAGE_TYPES, START, type SafetyCategory, SafetyCategoryId, SafetyCategoryIdSchema, type SafetyCategoryIdType, SafetyCategorySchema, type SafetyTaxonomySummary, type SafetyTestCase, SafetyTestCaseSchema, type SandboxConfig, type SandboxExecutionOptions, type SandboxMode, type SandboxPolicy, type SandboxResult, type SanitizationEvent, type SanitizedInput, SanitizedInputSchema, type SanitizerConfig, SanitizerConfigSchema, type ScannerData, type ScannerEntry, type ScannerRecommendation, type ScannerRegistryManifest, type ScenarioError, type ScenarioResult, ScenarioResultSchema, type ScmComment, type ScmCommentDetail, ScmError, type ScmFileChange, type ScmIssue, type ScmIssueDetail, type PRStatus as ScmPRStatus, type ScmPlatform, type ScmPullRequest, type ScmPullRequestDetail, type ScmReviewDecision, type ScmToken, type ScmUserMetadata, type ScoreBreakdown, ScoreBreakdownSchema, SdkAdapter, type SdkAdapterConfig, type SdkProviderId, type SearchCodebaseDeps, SearchCodebaseInputSchema, type SecurityAnalysisResult, type AuditEvent as SecurityAuditEvent, type AuditQuery as SecurityAuditQuery, type SecurityCapability, type SecurityConfig, SecurityConfigSchema, SecurityError, type SecurityErrorCode, SecurityErrorCodeSchema, type SecurityEventAuditOpts, SecurityExpert, type SecurityExpertOptions, type SecurityFocusArea, type PolicyDecision$1 as SecurityPolicyDecision, SelectionError, type ExpertRegistry as SelectionExpertRegistry, type SelectionOptions, SelectionOptionsSchema, type SelectionResult$1 as SelectionResult, SelectionResultSchema, SequentialProtocol, type SerializedError, type ServerConfig, type ServerError, type ServerInstance, type SessionEvent, type SessionState, type SessionStatus, SessionStatusSchema, type SharedConclusion, SharedConclusionSchema, type SharedInsight, SharedInsightSchema, type SharedMemoryEntry, SharedMemoryStore, type SharedMemoryTag, SimpleAgent, SimpleMajorityStrategy, type Skill, AgentRoleSchema$2 as SkillAgentRoleSchema, type SkillAttestation, SkillAttestationSchema, type SkillCapabilities, SkillCapabilitiesSchema, type SkillCategory, type SkillComplexity, SkillComposer, type SkillComposerConfig, type SkillComposition, type SkillCompositionRequest, type SkillDependency, SkillDependencyGraph, SkillDependencySchema, type SkillDependencyType, SkillDependencyTypeSchema, type SkillExample, type SkillExecution, type SkillExecutionStatus, SkillLibrary, type SkillLibraryConfig, SkillLoader, type SkillLoaderConfig, SkillLoaderConfigSchema, type SkillLoaderError, type SkillLoaderErrorCode, SkillLoaderErrorSchema, type SkillMetrics, type SkillParameter, type SkillPermission, SkillPermissionSchema, type SkillProvenance, SkillProvenanceSchema, type SkillQuery, type SkillRBAC, SkillRBACSchema, type SkillSearchResult, type SkillSecurityError, SkillSecurityErrorSchema, type SkillStore, type SkillWithMetrics, type SourceCitation, SourceCitationSchema, type SpanId, type SpecExecutionError, type SpecExecutionOptions, type SpecExecutionResult, type SpecParseError, type StageCompletedOptions, type StageContext, type StageFailedOptions, type StageOutput, type StageRegistry, type StageResult, StageResultSchema, type StageSpec, StageSpecSchema, type StageStartedOptions, type StageType, type StateChangeCallback, type StateChangePayload, type StateFieldSchema, type StateMachineOptions, type StateReducer, type StateSchema, type StateTransition, type StateTransitionEvent, type StatisticalOptions, type StatusUpdateMessage, type StepExecutionOptions, type StepExecutor$1 as StepExecutor, type StepExecutorDeps, type StepResult, type StepResultSummary, type StopReason, type StoredModelStats, type StoredReward, type StoredRoutingDecision, type StoredTaskOutcome, type StrategyAction, StrategyDistiller, StreamCancelledError, type StreamChunk, StreamController, StreamError, type StreamState, AgentRoleSchema$1 as StrictAgentRoleSchema, InputDefinitionSchema$1 as StrictInputDefinitionSchema, WorkflowDefinitionSchema$1 as StrictWorkflowDefinitionSchema, WorkflowStepSchema$1 as StrictWorkflowStepSchema, type StrippedElement, StrippedElementSchema, type SubTask, SubTaskSchema, SubprocessCliAdapter, type SubtaskNode, SubtaskNodeSchema, type SubtaskPriority, SubtaskPrioritySchema, type SubtaskStatus, SubtaskStatusSchema, type SubtaskType, SubtaskTypeSchema, SupermajorityStrategy, type SuspiciousSignal, SuspiciousSignalSchema, type AgentState as SwarmAgentState, type SwarmHealthMetrics$1 as SwarmHealthMetrics, type SycophancyIndicator, type SycophancyReport, type SynthesizedResult, SynthesizedResultSchema, type SystemComponent, TASK_STATUSES, TASK_TYPE_EXPERTS, TEMPLATE_CATEGORIES, TEMPLATE_KEYWORDS, TRINITY_ROLE_MAX_TOKENS, TRINITY_ROLE_PROMPTS, TRINITY_ROLE_TEMPERATURES, TRUST_TIER_NUMERIC, type Task$1 as Task, type TaskAnalysis, TaskAnalysisSchema, type TaskAssignmentMessage, type TaskClassification, type TaskCommitment, type TaskContext, type TaskContract, TaskContractSchema, type TaskDag, TaskDagSchema, type TaskId, type TaskOutcome$1 as TaskOutcome, type TaskOutcomeRecord, TaskOutcomeSchema$1 as TaskOutcomeSchema, type TaskPayload, type TaskProfileSummary, TaskQueue, type TaskRequirements, type TaskResult, TaskSchema, type TaskSignals, type TaskStatus, type TaskToolResponse, type TaskTypePerformance, type TemplateCategory, TemplateCategorySchema, type TemplateMetadata, TemplateMetadataSchema, TemplateRegistry, type TerminationReason, TerminationReasonSchema, type TestQuality, type TestingAnalysisResult, TestingExpert, type TestingExpertOptions, type TextContent, TextDashboardRenderer, type ThinkerOutput, type ThresholdUpdateDetail, type ThroughputMetrics, type TimeConstraint, type TimePeriod, TimeoutError, type TimeoutProfile, type TokenBenchmarkResult, TokenCountError, type TokenCountResult, TokenCounter, type TokenCounterConfig, TokenCounterProvider, type TokenMetrics, type TokenResolverConfig, type TokenStrategy, type TokenUsage, type ToolCompletedEvent, type ToolDefinition, type ToolInvocationAuditOpts, type ToolInvokedEvent, type ToolPayload, type ToolRegistrationOptions, type ToolRegistrationResult, type ToolResult, type ToolSet, ToolSetSchema, type TraceId, type TrackedTask, type TransitionErrorCallback, type TreeId, type TreeState, TreeStateSchema, type TreeStatistics, TreeStatisticsSchema, type Trend, type TrendDetectedDetail, type TrinityConfig, TrinityConfigSchema, TrinityCoordinator, type TrinityExecuteOptions, type TrinityPhase, type TrinityPhaseResult, TrinityPhaseSchema, type TrinityResult, type TrinityRole, type TrinityRoleConfig, TrinityRoleSchema, TrinityStopReasonSchema, type TrustClassificationEvent, type TrustTier, TrustTierSchema, UnanimousStrategy, type UnknownVar, type Unsubscribe, type V2Config, type V2Mode, VERSION, VOTING_THRESHOLDS, ValidationDashboard, ValidationError$1 as ValidationError, type ValidationIssue, type VariantStats, type VerificationResult, type VerifierOutput, VerifierVerdictSchema, type VersionRequirements, type VersionStatus, type Violation, ViolationSchema, type Vote, type VoteCounts, type VoteDecision$1 as VoteDecision, VoteDecisionSchema$1 as VoteDecisionSchema, type VoteDecisionStatus, type VoteMessage, VoteMessageSchema, type VoteResult, VoteSchema, type VotingObservation, VotingObservationSchema, type VotingOutcome, VotingProtocol, type VotingProtocolConfig, VotingProtocolConfigSchema, type VotingProtocolResult, type VotingRound, type VotingRoundPhase, VotingRoundPhaseSchema, type VotingRoundStatus, VotingRoundStatusSchema, type VotingSession, VotingStrategyFactory, type Vulnerability, VulnerabilitySchema, VulnerabilitySeveritySchema, type WaveExecutionResult, type WaveResult, WaveScheduler, type WaveSchedulerConfig, type WaveTask, type WaveTaskExecutor, type WaveTaskResult, WeatherReportInputSchema, type WeightedAgentRecord, type WeightedConsensusResult, type WeightedVoteCounts, WeightedVoting, type WeightedVotingConfig, type WeightedVotingOptions, type WinLossAnalysis, type WithRetryOptions, type WorkChunk, type WorkerOutput, type WorkflowAdapterConfig, type WorkflowConfig, WorkflowConfigSchema, type WorkflowDefinition, type WorkflowDefinitionInput, type WorkflowDefinitionOutput, WorkflowDefinitionSchema, type WorkflowEngineFactoryConfig, WorkflowError, type WorkflowExecutionContext, type ExecutionPlan$1 as WorkflowExecutionPlan, type IExpertFactory$1 as WorkflowExpertFactory, type WorkflowInfo, WorkflowInputsSchema, WorkflowOrchestratorAdapter, type WorkflowPattern, type WorkflowRouterOptions, type RoutingDecision$1 as WorkflowRoutingDecision, type WorkflowStep$1 as WorkflowStep, type WorkflowStepInput, type WorkflowStepOutput, WorkflowStepSchema, type WorkflowTemplate, type WorkflowToolResult, actorFromContext, aggregatePrDecisions, aggregateResults, analysisToTaskContract, analyzeTask as analyzeDelegateTask, analyzeFailures, analyzeGitHubRepo, analyzeRepo, append, areStepsCompleted, assessReputation, bufferStream, buildDependencyGraph, buildFinalResult, buildPendingResult, buildPlanFromAnalysis, buildPrReviewProposal, buildDependencyGraph$1 as buildSkillDependencyGraph, buildTimeoutResult, calculateDelay, calculateDistributionStats, calculateMetricsTotals, calculateMinSampleSize, calculateRegret, calculateRoutingDistribution, calculateTokenCost, calculateTokenMetrics, calculateVoteWeight, calculateWinLoss, canExecuteSkill, canInfluenceDecisions, canPipelineProceed, canProceed, cancelExecution, categorizeOutcomeError, categorizeOutcomeErrorMessage, checkForResearchTriggers, checkPermissionBoundary, checkPipelinePolicy, checkpointToResult, chunkByDirectory, classifyTask, classifyTrust, cleanupCheckpoint, clearRegistryCache, clearTemplateCache, calculateBackoffDelay as cliCalculateBackoffDelay, categorizeError as cliCategorizeError, closeServer, collectRealVotes, collectStream, compareBenchmarks, compareProportions, compilePipelineGraph, compilePlan, compileSpecToGraph, computeAdaptiveThresholds, computeOutcomeReward, concatStreams, connectTransport, containsExpressions, countActiveSessions, createAbTestTracker, createAgentPairKey, createAgentStages, createStepExecutor as createAgentStepExecutor, createAllAdapters, createArchitectureExpert, createAttestation, createAuditLogger, createAuditTrail, createBenchmarkSummary, createCheckpoint, createCheckpointStore, createClaudeAdapter, createCliAdapter, createCliCircuitBreakerIntegration, createCliDetectionCache, createCodeExpert, createCollaborationSession, createCompositeRouter, createConsensusEngine, createContextItem, createCorePluginRegistry, createCorrelationTracker, createDashboard, createDashboardRenderer, createDecayOp, createDefaultDeps, createDefaultPolicyEngine, createDefaultPolicyFirewall, createDefaultRateLimiter, createDefaultRegistry, createDelegatePipeline, createDependencyError, createDevStageRegistry, createDocumentationExpert, createDryRunHandler, createEventBusBridge, createExecutionContext, createExecutionPlan, createFeedbackIntegration, createFeedbackSubscriber, createFullGitHubProvider, createGeminiAdapter, createGitHubAdapter, createGitHubProvider, createGraphAuditBridge, createHigherOrderVotingStrategy, createIncompleteResult, createInitialCostMetrics, createInitialSessionMetrics, createInitialTokenUsage, createInitializedWorkflowEngine, createInteractionGraph, createSwarmObserver as createInteractionSwarmObserver, createIsolatedRegistry, createLogger, createMcpLogger, createMcpNotifier, createOWVoting, createOllamaAdapter, createOpenAIAdapter, createOrchestrator, createOrchestratorFactory, createOutcomeFeedbackCollector, createOutcomeStorage, createPolicyContext, createPreferenceRouter, createProductionWorkflowEngine, createPromotionOp, createProtocolFactory, createRateLimiter, createRealWorkflowEngine, createResultAggregator, createRoutingDecision, createRoutingMetricsCollector, createSandboxExecutor, createScmProvider, createSecurityError, createSecurityExpert, createServer, createSkillComposer, createSkillDependencyGraph, createSkillLibrary, createSkillLoader, createStateComparisonVerifier, createStateGuard, createStateMachine, createStrategyDistiller, createStrategyFactory, createStream, createTaskOutcome, createTaskQueue, createTemplateRegistry, createTestingExpert, createTimer, createTokenCounter, createToolLogger, createTrackedAgent, createTrinityCoordinator, createValidationDashboard, createValidator, createVotingProtocol, createWaveScheduler, createWeightedVoting, createWorkflowEngineDeps, createWorkflowEngineDepsAsync, createWorkflowRouter, curateContext, customReducer, decomposeSpec, defaultConfig, delegateInputToTaskContract, denyMutationsWithoutModeRule, detectFailurePatterns, detectLatencyPatterns, detectSuccessPatterns, detectTrend, determineFinalStatus, emitCorroborationEvent, emitExecutionComplete, emitGraphExecutionEvent, emitNodeResults, emitNodeStarted, emitPipelineStageEvent, emitPolicyEvent, emitReputationEvent, emitSanitizationEvent, emitStageCompleted, emitStageFailed, emitStageStarted, emitStateUpdated, emitStepCompleted, emitThresholdUpdate, emitTrendDetected, emitTrustEvent, err, estimateTokens as estimateBenchmarkTokens, estimateTaskComplexity, estimateTokens$1 as estimateTokens, evaluatePolicy as evaluatePipelinePolicy, evaluatePolicy$2 as evaluatePolicy, evaluatePolicy$1 as evaluateSecurityPolicy, executeCliRetryLoop, executeDelegatePipeline, executeExpert, executeGraph, executeOrchestratePipeline, executeParallel, executeSpec, extractBooleanField, extractExpressions, extractNonErrorMessage, extractNumberField, extractSessionId, extractStateValue, extractStringArrayField, extractStringField, filterAvailableModels, filterBySeverity, filterStream, findActiveSession, findMissingDependencies, flushPipelineMemory, formatAdapterLatencyReport, formatBenchmarkReport, formatBenchmarkResults, formatComparisonResults, formatCompileError, fromArray, generateATL, generateBenchmarkReport, generateProposalId, generateSecurityPlan, generateWeatherReport, getAllTestCases, getAvailabilityCache, getAvailableClis, getAvailableRoles, getBenchmarkEnvironment, getBuiltInTemplates, getBuiltInTemplatesPath, getBuiltInTemplatesWithMetadata, getCapabilitiesForRole, getCategoriesByMinRiskLevel, getCliForModelId, getCompletedSteps, getCorroborationRules, getEventBusStats, getExecutionDuration, getExecutionOrder, getExpertRegistry, getFallbackChain, getGraphRegistry, getGraphWorkflowList, getKnownNexusVarNames, getOutcomeStore, getPipelineArtifactStore, getPipelinePluginRegistry, getPolicy, getPolicyMode, getRecommendedRole, getReferencedSteps, getRegistryManifest, getRequiredTrustTier, getSafetyCategory, getSafetyTaxonomySummary, getSkillSetForTask, getSkillsForTask, getStepResult, getSwarmObserver, getTemplate, getTestCasesByTags, getTimeoutForTask, getTimeoutForTaskAuto, getTokenEnvVars, getTopologicalOrder, getVariable, hasToken, ictmToExpertConfig, identifySessionsToRemove, inferICTM, initializeAgentSkills, initializeBuiltInTemplates, initializeEventBusBridge, isCancelled, isCliAvailable, isRetryableError as isCliRetryableError, isErr, isIncompleteResult, isMutatingAction, isOk, isReadOnlyAction, isRetryableError$1 as isRetryableError, isStepCompleted, isZodError, listTemplateIds, loadCheckpointState, loadTemplateFile, loadTemplatesFromDirectory, loadWorkflowFile, logPolicyAudit, logRateLimitAudit, logToolError, logToolInvocationAudit, logToolStart, logToolSuccess, logger, map, mapAuthorAssociation, mapErr, mapVoteDecisionToPrDecision, meanConfidenceInterval, mergeStreams, normalizeRepoId, ok, orchestrateInputToTaskContract, overwrite, parseATL, parseAgentPairKey, parseExpression, parseSpec, parseTemplateContent, parseWorkflowJson, parseWorkflowYaml, proportionConfidenceInterval, quickSelect, reduceStream, registerConsensusVoteTool, registerCorePlugins, registerCreateExpertTool, registerDelegateToModelTool, registerExecuteExpertTool, registerExecuteSpecTool, registerExpertsResource, registerExtractSymbolsTool, registerIssueTriageTool, registerListExpertsTool, registerListWorkflowsTool, registerMemoryQueryTool, registerMemoryStatsTool, registerMemoryWriteTool, registerModelsResource, registerOrchestrateTool, registerPrReviewTool, registerPrompts, registerQueryTraceTool, registerRegistryImportTool, registerRepoAnalyzeTool, registerRepoSecurityPlanTool, registerResearchAddSourceTool, registerResearchAddTool, registerResearchAnalyzeTool, registerResearchCatalogReviewTool, registerResearchDiscoverTool, registerResearchQueryTool, registerResearchResource, registerResearchSynthesizeTool, registerResources, registerRunGraphWorkflowTool, registerRunWorkflowTool, registerSearchCodebaseTool, registerTools, registerWeatherReportTool, requiresCitation, requiresCorroboration, resetAvailabilityCache, resetPipelineArtifactStore, resetPipelinePluginRegistry, resetRegistry, resolveExpression, resolveFallback, resolveInput, resolveScannerData, resolveStringExpressions, resolveToken, resolveV2Config, resolveWithFallbacks, resultToOutcome, runAdapterLatencyBenchmark, runAdaptiveOrchestrator, runBenchmark, runConsolidationBenchmark, runDevPipeline, runGraphPipeline, runIterativeConsensus, runMemoryBenchmarks, runOperationBenchmark, runPreconditions, runTokenBenchmark, runVerification, safePathsRule, safeValidateExpertConfig, sanitize, sanitizeInput, saveStageCheckpoint, scoreByHybrid, scoreByImportance, scoreByRecency, selectExperts, selectModel, setSwarmObserver, setVariable, sigmoidConfidence, skip, sleep, snapshotContext, startStdioServer, storeStepResult, take, takeUntil, tapStream, taskContractToToolResponse, toSuiteResult, toolError, toolSuccess, toolSuccessStructured, transformStream, unwrap, unwrapOr, validateAgentAction, validateCommand, validateCorroboration, validateDependencyGraph, validateEvaluationCriterion, validateExpertConfig, validateExpressions, validateICTM, validateNexusEnv, validateRequiredInputs, validateSafetyCategory, validateScenario, validateCapabilities as validateSkillCapabilities, validateSkillExecution, validateSkillProvenance, validateRBAC as validateSkillRBAC, validateTestCase, validateToolInput, validateWorkflow, validateWorkflowDependencies, withLogging, withRetry, withRetryWrapper, withTimeout };