@tangle-network/agent-eval 0.20.12 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/CHANGELOG.md +76 -0
  2. package/README.md +39 -1
  3. package/dist/{chunk-75MCTH7P.js → chunk-3GN6U53I.js} +198 -3
  4. package/dist/chunk-3GN6U53I.js.map +1 -0
  5. package/dist/chunk-3IX6QTB7.js +1349 -0
  6. package/dist/chunk-3IX6QTB7.js.map +1 -0
  7. package/dist/{chunk-PKCVBYTQ.js → chunk-5IIQKMD5.js} +38 -2
  8. package/dist/chunk-5IIQKMD5.js.map +1 -0
  9. package/dist/{chunk-MCMV7DUL.js → chunk-ARZ6BEV6.js} +2 -2
  10. package/dist/{chunk-HKYRWNHV.js → chunk-HRZELXCR.js} +2 -2
  11. package/dist/{chunk-ODFINDLQ.js → chunk-KRR4VMH7.js} +11 -1
  12. package/dist/chunk-KRR4VMH7.js.map +1 -0
  13. package/dist/chunk-SNUHRBDL.js +154 -0
  14. package/dist/chunk-SNUHRBDL.js.map +1 -0
  15. package/dist/{chunk-KWUAAIHR.js → chunk-WOK2RTWG.js} +157 -1
  16. package/dist/chunk-WOK2RTWG.js.map +1 -0
  17. package/dist/{chunk-HNJLMAJ2.js → chunk-WOPGKVN4.js} +2 -2
  18. package/dist/cli.js +3 -2
  19. package/dist/cli.js.map +1 -1
  20. package/dist/{control-C8NKbF3w.d.ts → control-cxwMOAsy.d.ts} +3 -2
  21. package/dist/control.d.ts +4 -3
  22. package/dist/control.js +2 -2
  23. package/dist/emitter-B2XqDKFU.d.ts +121 -0
  24. package/dist/{feedback-trajectory-BGQ_ANCN.d.ts → feedback-trajectory-CB0A32o3.d.ts} +2 -1
  25. package/dist/index.d.ts +71 -83
  26. package/dist/index.js +48 -60
  27. package/dist/index.js.map +1 -1
  28. package/dist/openapi.json +1 -1
  29. package/dist/optimization.d.ts +3 -2
  30. package/dist/optimization.js +2 -2
  31. package/dist/reporting-Da2ihlcM.d.ts +672 -0
  32. package/dist/reporting.d.ts +5 -426
  33. package/dist/reporting.js +6 -2
  34. package/dist/{emitter-BYO2nSDA.d.ts → store-u47QaJ9G.d.ts} +1 -91
  35. package/dist/traces.d.ts +259 -3
  36. package/dist/traces.js +24 -4
  37. package/dist/wire/index.js +3 -2
  38. package/docs/research-report-methodology.md +155 -0
  39. package/package.json +10 -12
  40. package/dist/chunk-75MCTH7P.js.map +0 -1
  41. package/dist/chunk-IKFVX537.js +0 -717
  42. package/dist/chunk-IKFVX537.js.map +0 -1
  43. package/dist/chunk-KWUAAIHR.js.map +0 -1
  44. package/dist/chunk-ODFINDLQ.js.map +0 -1
  45. package/dist/chunk-PKCVBYTQ.js.map +0 -1
  46. /package/dist/{chunk-MCMV7DUL.js.map → chunk-ARZ6BEV6.js.map} +0 -0
  47. /package/dist/{chunk-HKYRWNHV.js.map → chunk-HRZELXCR.js.map} +0 -0
  48. /package/dist/{chunk-HNJLMAJ2.js.map → chunk-WOPGKVN4.js.map} +0 -0
package/dist/openapi.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "openapi": "3.1.0",
3
3
  "info": {
4
4
  "title": "@tangle-network/agent-eval — wire protocol",
5
- "version": "0.20.12",
5
+ "version": "0.21.0",
6
6
  "description": "HTTP and stdio RPC interface to agent-eval. The TypeScript runtime is the source of truth; this spec is the contract that cross-language clients (Python, Rust, Go) generate from.\n\nWire-protocol version: 1.0.0. Bumps on breaking changes to request/response schemas.",
7
7
  "contact": {
8
8
  "name": "Tangle Network",
@@ -1,9 +1,10 @@
1
1
  import { G as GateDecision } from './multi-shot-optimization-Bvtz294B.js';
2
2
  export { A as ActionableSideInfo, b as AsiSeverity, D as DEFAULT_MUTATION_PRIMITIVES, E as EvolvableVariant, e as GenerationReport, I as InMemoryTrialCache, h as MultiShotGateConfig, i as MultiShotGateResult, j as MultiShotMutateAdapter, k as MultiShotOptimizationConfig, l as MultiShotOptimizationResult, m as MultiShotRun, n as MultiShotRunInput, o as MultiShotRunner, p as MultiShotScore, q as MultiShotScorer, r as MultiShotSplit, s as MultiShotTrace, t as MultiShotTrialResult, u as MultiShotVariant, M as MutateAdapter, v as PromptEvolutionConfig, w as PromptEvolutionEvent, x as PromptEvolutionResult, R as ReflectionContext, y as ReflectionProposal, S as ScenarioAggregate, z as ScoreAdapter, T as TrialCache, a as TrialResult, B as TrialTrace, V as VariantAggregate, C as buildReflectionPrompt, J as defaultMultiShotObjectives, Q as parseReflectionResponse, U as runMultiShotOptimization, W as runPromptEvolution, Y as trialTraceFromMultiShotTrial } from './multi-shot-optimization-Bvtz294B.js';
3
3
  import { a as RunRecord } from './run-record-CX_jcAyr.js';
4
- export { n as FeedbackArtifactType, o as FeedbackAttempt, F as FeedbackLabel, p as FeedbackLabelKind, q as FeedbackLabelSource, r as FeedbackOptimizerRow, s as FeedbackOutcome, t as FeedbackReplayAdapter, u as FeedbackReplayResult, v as FeedbackSeverity, w as FeedbackSplitPolicy, x as FeedbackTask, b as FeedbackTrajectory, y as FeedbackTrajectoryFilter, a as FeedbackTrajectoryStore, z as FileSystemFeedbackTrajectoryStore, I as InMemoryFeedbackTrajectoryStore, P as PreferenceMemoryEntry, A as ProposedSideEffect, D as assignFeedbackSplit, E as controlRunToFeedbackTrajectory, G as createFeedbackTrajectory, H as feedbackTrajectoriesToDatasetScenarios, J as feedbackTrajectoriesToOptimizerRows, K as feedbackTrajectoryToDatasetScenario, L as feedbackTrajectoryToOptimizerRow, N as parseFeedbackTrajectoriesJsonl, O as renderPreferenceMemoryMarkdown, Q as replayFeedbackTrajectories, R as replayFeedbackTrajectory, U as serializeFeedbackTrajectoriesJsonl, Y as summarizePreferenceMemory, Z as withAssignedFeedbackSplit } from './feedback-trajectory-BGQ_ANCN.js';
4
+ export { n as FeedbackArtifactType, o as FeedbackAttempt, F as FeedbackLabel, p as FeedbackLabelKind, q as FeedbackLabelSource, r as FeedbackOptimizerRow, s as FeedbackOutcome, t as FeedbackReplayAdapter, u as FeedbackReplayResult, v as FeedbackSeverity, w as FeedbackSplitPolicy, x as FeedbackTask, b as FeedbackTrajectory, y as FeedbackTrajectoryFilter, a as FeedbackTrajectoryStore, z as FileSystemFeedbackTrajectoryStore, I as InMemoryFeedbackTrajectoryStore, P as PreferenceMemoryEntry, A as ProposedSideEffect, D as assignFeedbackSplit, E as controlRunToFeedbackTrajectory, G as createFeedbackTrajectory, H as feedbackTrajectoriesToDatasetScenarios, J as feedbackTrajectoriesToOptimizerRows, K as feedbackTrajectoryToDatasetScenario, L as feedbackTrajectoryToOptimizerRow, N as parseFeedbackTrajectoriesJsonl, O as renderPreferenceMemoryMarkdown, Q as replayFeedbackTrajectories, R as replayFeedbackTrajectory, U as serializeFeedbackTrajectoriesJsonl, Y as summarizePreferenceMemory, Z as withAssignedFeedbackSplit } from './feedback-trajectory-CB0A32o3.js';
5
5
  import './dataset-B9qvlm_o.js';
6
- import './emitter-BYO2nSDA.js';
6
+ import './emitter-B2XqDKFU.js';
7
+ import './store-u47QaJ9G.js';
7
8
 
8
9
  /**
9
10
  * Researcher interface — stable hook for an external autonomous-research
@@ -25,9 +25,9 @@ import {
25
25
  summarizePreferenceMemory,
26
26
  trialTraceFromMultiShotTrial,
27
27
  withAssignedFeedbackSplit
28
- } from "./chunk-HKYRWNHV.js";
28
+ } from "./chunk-HRZELXCR.js";
29
29
  import "./chunk-YUFXO3TU.js";
30
- import "./chunk-ODFINDLQ.js";
30
+ import "./chunk-KRR4VMH7.js";
31
31
  import "./chunk-PZ5AY32C.js";
32
32
  export {
33
33
  CallbackResearcher,