@tangle-network/agent-eval 0.77.0 → 0.80.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -19
- package/dist/adapters/http.d.ts +2 -2
- package/dist/adapters/langchain.d.ts +2 -2
- package/dist/adapters/otel.d.ts +4 -4
- package/dist/{agent-profile-DYRboYWu.d.ts → agent-profile-aSEaJ9Pl.d.ts} +1 -1
- package/dist/analyst/index.d.ts +42 -8
- package/dist/analyst/index.js +32 -2
- package/dist/analyst/index.js.map +1 -1
- package/dist/authenticity/index.d.ts +54 -1
- package/dist/authenticity/index.js +88 -1
- package/dist/authenticity/index.js.map +1 -1
- package/dist/belief-state/index.d.ts +188 -0
- package/dist/belief-state/index.js +486 -0
- package/dist/belief-state/index.js.map +1 -0
- package/dist/benchmarks/index.d.ts +2 -2
- package/dist/calibration-Cpr3WaX3.d.ts +101 -0
- package/dist/campaign/index.d.ts +11 -11
- package/dist/campaign/index.js +4 -4
- package/dist/chunk-4DIJWVUT.js +131 -0
- package/dist/chunk-4DIJWVUT.js.map +1 -0
- package/dist/{chunk-7W4SM7FD.js → chunk-5LVWPNS5.js} +91 -91
- package/dist/chunk-5LVWPNS5.js.map +1 -0
- package/dist/{chunk-WYIHD6EB.js → chunk-CF67I6QY.js} +1 -1
- package/dist/chunk-CF67I6QY.js.map +1 -0
- package/dist/{chunk-XPILG2CA.js → chunk-GXHLRXDI.js} +2 -2
- package/dist/{chunk-F3SRAAZO.js → chunk-KWRRMR3J.js} +15 -1
- package/dist/chunk-KWRRMR3J.js.map +1 -0
- package/dist/chunk-NPCTHQIO.js +91 -0
- package/dist/chunk-NPCTHQIO.js.map +1 -0
- package/dist/{chunk-JYE3WOTE.js → chunk-RPLZ4OIB.js} +10 -1
- package/dist/chunk-RPLZ4OIB.js.map +1 -0
- package/dist/{chunk-6EKXFFGQ.js → chunk-RTWFUK6A.js} +2 -2
- package/dist/{chunk-XGNCBAVZ.js → chunk-XQL22JDG.js} +2 -2
- package/dist/{chunk-GJJNJVIR.js → chunk-XXNIODOM.js} +2 -2
- package/dist/contract/index.d.ts +128 -15
- package/dist/contract/index.js +118 -2
- package/dist/contract/index.js.map +1 -1
- package/dist/{control-BgA6BYTm.d.ts → control-CehLtoET.d.ts} +1 -1
- package/dist/control.d.ts +2 -2
- package/dist/control.js +2 -2
- package/dist/governance/index.d.ts +1 -1
- package/dist/hosted/index.d.ts +4 -4
- package/dist/{index-DsnOpCO6.d.ts → index-B1RKber3.d.ts} +1 -1
- package/dist/index.d.ts +127 -26
- package/dist/index.js +32 -7
- package/dist/index.js.map +1 -1
- package/dist/{insight-report-Df3lxYXM.d.ts → insight-report-dlpEzQDi.d.ts} +1 -1
- package/dist/{kind-factory-DW9XWPvM.d.ts → kind-factory-DqV2t1Xk.d.ts} +1 -1
- package/dist/meta-eval/index.d.ts +6 -99
- package/dist/meta-eval/index.js +7 -76
- package/dist/meta-eval/index.js.map +1 -1
- package/dist/off-policy-DiwuKKg7.d.ts +132 -0
- package/dist/openapi.json +1 -1
- package/dist/{outcome-store-D6KWmYvj.d.ts → outcome-store-rnXLEqSn.d.ts} +1 -1
- package/dist/{provenance-B-TFszPW.d.ts → provenance-jG-Gngg8.d.ts} +3 -3
- package/dist/{registry-DuVYiTvw.d.ts → registry-BK0Zee01.d.ts} +1 -1
- package/dist/{release-report-CN8hJlhk.d.ts → release-report-CXXZlR8g.d.ts} +2 -2
- package/dist/reporting.d.ts +5 -5
- package/dist/{researcher-C_KJyIGg.d.ts → researcher-rInLj9De.d.ts} +2 -2
- package/dist/rl.d.ts +10 -140
- package/dist/rl.js +8 -122
- package/dist/rl.js.map +1 -1
- package/dist/{rubric-predictive-validity-D_4BSXGV.d.ts → rubric-predictive-validity-CLPuwiUw.d.ts} +2 -2
- package/dist/{run-improvement-loop-BqYH2vCR.d.ts → run-improvement-loop-BAl_aVOZ.d.ts} +2 -4
- package/dist/{run-record-BgTFzO2r.d.ts → run-record-sItO5ftF.d.ts} +11 -0
- package/dist/{semantic-concept-judge-CV9Wlx4t.d.ts → semantic-concept-judge-qXEUV2w7.d.ts} +3 -3
- package/dist/{summary-report-ByiOUrHj.d.ts → summary-report-BTaXq1TS.d.ts} +1 -1
- package/dist/traces.d.ts +1 -1
- package/dist/traces.js +2 -2
- package/dist/{types-Bba0vl1V.d.ts → types-4mm2msnR.d.ts} +12 -4
- package/dist/{types-CRD68aH7.d.ts → types-DRvV0zRo.d.ts} +10 -1
- package/dist/workflow/index.d.ts +4 -4
- package/dist/workflow/index.js +1 -1
- package/docs/auto-research-loop-end-to-end.md +1 -1
- package/docs/feature-guide.md +4 -4
- package/docs/multi-shot-optimization.md +61 -115
- package/docs/product-eval-adoption.md +1 -1
- package/docs/research/belief-state-agent-eval-roadmap.md +558 -0
- package/docs/research/research-roadmap.md +1 -0
- package/docs/three-package-architecture.md +1 -1
- package/docs/trace-analysis.md +19 -0
- package/package.json +7 -2
- package/dist/chunk-7W4SM7FD.js.map +0 -1
- package/dist/chunk-F3SRAAZO.js.map +0 -1
- package/dist/chunk-JYE3WOTE.js.map +0 -1
- package/dist/chunk-WYIHD6EB.js.map +0 -1
- /package/dist/{chunk-XPILG2CA.js.map → chunk-GXHLRXDI.js.map} +0 -0
- /package/dist/{chunk-6EKXFFGQ.js.map → chunk-RTWFUK6A.js.map} +0 -0
- /package/dist/{chunk-XGNCBAVZ.js.map → chunk-XQL22JDG.js.map} +0 -0
- /package/dist/{chunk-GJJNJVIR.js.map → chunk-XXNIODOM.js.map} +0 -0
|
@@ -3,7 +3,7 @@ import { C as ControlEvalResult, a as ControlRunResult, h as ControlRuntimeConfi
|
|
|
3
3
|
import { T as TraceEmitter } from './emitter-DEZwY14K.js';
|
|
4
4
|
import { F as FailureClass } from './schema-m0gsnbt3.js';
|
|
5
5
|
import { T as TraceStore } from './store-CKUAgsJz.js';
|
|
6
|
-
import { b as RunSplitTag, a as RunTokenUsage, R as RunRecord } from './run-record-
|
|
6
|
+
import { b as RunSplitTag, a as RunTokenUsage, R as RunRecord } from './run-record-sItO5ftF.js';
|
|
7
7
|
|
|
8
8
|
interface ActionExecutionPolicy {
|
|
9
9
|
allowedTypes?: string[];
|
package/dist/control.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export { A as ActionExecutionPolicy, a as ActionPolicyDecision, C as ControlRunToRunRecordOptions, e as ProposeReviewConfig, f as ProposeReviewControlAction, g as ProposeReviewControlConfig, h as ProposeReviewControlResult, i as ProposeReviewControlState, j as ProposeReviewReport, p as RunEvidenceMetadata, s as controlRunToRunRecord, u as evaluateActionPolicy, x as runProposeReview, y as runProposeReviewAsControlLoop, z as scoreFromEvals } from './control-
|
|
1
|
+
export { A as ActionExecutionPolicy, a as ActionPolicyDecision, C as ControlRunToRunRecordOptions, e as ProposeReviewConfig, f as ProposeReviewControlAction, g as ProposeReviewControlConfig, h as ProposeReviewControlResult, i as ProposeReviewControlState, j as ProposeReviewReport, p as RunEvidenceMetadata, s as controlRunToRunRecord, u as evaluateActionPolicy, x as runProposeReview, y as runProposeReviewAsControlLoop, z as scoreFromEvals } from './control-CehLtoET.js';
|
|
2
2
|
export { c as ControlActionFailureMode, d as ControlActionOutcome, e as ControlBudget, f as ControlContext, g as ControlDecision, C as ControlEvalResult, a as ControlRunResult, h as ControlRuntimeConfig, i as ControlRuntimeError, j as ControlSeverity, b as ControlStep, k as ControlStopPolicies, S as StopDecision, l as allCriticalPassed, o as objectiveEval, r as runAgentControlLoop, s as stopOnNoProgress, m as stopOnRepeatedAction, n as subjectiveEval } from './control-runtime-DuFBYg7A.js';
|
|
3
3
|
import './feedback-trajectory-B3rErRsh.js';
|
|
4
4
|
import './dataset-B2kL-fSM.js';
|
|
@@ -6,4 +6,4 @@ import './errors-Dwqw-T_m.js';
|
|
|
6
6
|
import './emitter-DEZwY14K.js';
|
|
7
7
|
import './schema-m0gsnbt3.js';
|
|
8
8
|
import './store-CKUAgsJz.js';
|
|
9
|
-
import './run-record-
|
|
9
|
+
import './run-record-sItO5ftF.js';
|
package/dist/control.js
CHANGED
|
@@ -4,7 +4,7 @@ import {
|
|
|
4
4
|
runProposeReview,
|
|
5
5
|
runProposeReviewAsControlLoop,
|
|
6
6
|
scoreFromEvals
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-RTWFUK6A.js";
|
|
8
8
|
import {
|
|
9
9
|
allCriticalPassed,
|
|
10
10
|
objectiveEval,
|
|
@@ -13,7 +13,7 @@ import {
|
|
|
13
13
|
stopOnRepeatedAction,
|
|
14
14
|
subjectiveEval
|
|
15
15
|
} from "./chunk-NCRFYPS3.js";
|
|
16
|
-
import "./chunk-
|
|
16
|
+
import "./chunk-KWRRMR3J.js";
|
|
17
17
|
import "./chunk-TVVP3ZZQ.js";
|
|
18
18
|
import "./chunk-VSMTAMNK.js";
|
|
19
19
|
import "./chunk-3BFEG2F6.js";
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { c as DatasetManifest } from '../dataset-B2kL-fSM.js';
|
|
2
2
|
import { b as CalibrationResult } from '../judge-calibration-DilmB3Ml.js';
|
|
3
|
-
import {
|
|
3
|
+
import { b as OutcomeStore } from '../outcome-store-rnXLEqSn.js';
|
|
4
4
|
import { d as RedTeamReport } from '../red-team-DW9Ca_tj.js';
|
|
5
5
|
import { T as TraceStore } from '../store-CKUAgsJz.js';
|
|
6
6
|
import '../errors-Dwqw-T_m.js';
|
package/dist/hosted/index.d.ts
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import { M as MutableSurface,
|
|
2
|
-
import { I as InsightReport } from '../insight-report-
|
|
3
|
-
import '../run-record-
|
|
1
|
+
import { M as MutableSurface, c as GateDecision } from '../types-4mm2msnR.js';
|
|
2
|
+
import { I as InsightReport } from '../insight-report-dlpEzQDi.js';
|
|
3
|
+
import '../run-record-sItO5ftF.js';
|
|
4
4
|
import '../errors-Dwqw-T_m.js';
|
|
5
5
|
import '../schema-m0gsnbt3.js';
|
|
6
|
-
import '../summary-report-
|
|
6
|
+
import '../summary-report-BTaXq1TS.js';
|
|
7
7
|
import '../failure-cluster-CL7IVgkJ.js';
|
|
8
8
|
import '../store-CKUAgsJz.js';
|
|
9
9
|
import '../judge-calibration-DilmB3Ml.js';
|
package/dist/index.d.ts
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
export { A as ActionExecutionPolicy, a as ActionPolicyDecision, C as ControlRunToRunRecordOptions, L as LlmJsonCall, b as LlmReviewerConfig, P as ProposeFn, c as ProposeInput, d as ProposeOutput, e as ProposeReviewConfig, f as ProposeReviewControlAction, g as ProposeReviewControlConfig, h as ProposeReviewControlResult, i as ProposeReviewControlState, j as ProposeReviewReport, k as ProposeReviewShot, R as Review, l as ReviewFn, m as ReviewInput, n as ReviewMemoryEntry, o as ReviewMemoryStore, p as RunEvidenceMetadata, V as Verification, q as VerifyFn, r as controlFailureClassFromVerification, s as controlRunToRunRecord, t as createLlmReviewer, u as evaluateActionPolicy, v as inMemoryReviewStore, w as jsonlReviewStore, x as runProposeReview, y as runProposeReviewAsControlLoop, z as scoreFromEvals } from './control-
|
|
2
|
-
import { R as RunRecord } from './run-record-
|
|
3
|
-
export { e as AGENT_PROFILE_KINDS, A as AgentProfileCell, d as AgentProfileCellInput, f as AgentProfileCellSchemaVersion, g as AgentProfileCellValidationError, h as AgentProfileDimensionValue, i as AgentProfileHarness, j as AgentProfileJson, k as AgentProfileKind, l as AgentProfileSource, m as AgentProfileSourceInput, J as JudgeScoresRecord, c as RunJudgeMetadata, n as RunOutcome, o as RunRecordValidationError, b as RunSplitTag, a as RunTokenUsage, S as SandboxAgentProfileLike, p as agentProfileCellHashMaterial, q as agentProfileCellKey, r as assertRunAgentProfileCell, s as buildAgentProfileCell, t as buildSandboxAgentProfileCell, u as groupRunsByAgentProfileCell, v as isRunRecord, w as parseRunRecordSafe, x as requireAgentProfileCell, y as roundTripRunRecord, z as toAgentProfileJson, B as validateAgentProfileCell, C as validateRunRecord, D as verifyAgentProfileCell } from './run-record-
|
|
4
|
-
export { B as BehavioralMetrics, z as ConceptComplexity, A as ConceptFinding, E as ConceptSpec, G as ConceptWeightStrategy, C as CreateAnalystAiConfig, H as DEFAULT_COMPLEXITY_WEIGHTS, D as DEFAULT_TRACE_ANALYST_KINDS, b as DefaultAnalystRegistryOptions, c as DiffPolicy, F as FAILURE_MODE_KIND_SPEC, f as FindingSubject, g as FindingSubjectKind, i as FindingsDiff, j as FindingsStore, I as IMPROVEMENT_KIND_SPEC, k as KNOWLEDGE_GAP_KIND_SPEC, l as KNOWLEDGE_POISONING_KIND_SPEC, P as PersistedFinding, J as SEMANTIC_CONCEPT_JUDGE_VERSION, m as SKILL_USAGE_ANALYST, a as SemanticConceptJudgeInput, S as SemanticConceptJudgeOptions, L as SemanticConceptJudgeResult, n as SkillUsageAnalyst, M as SuboptimalCode, N as SuboptimalSignal, r as buildDefaultAnalystRegistry, O as computeTraceMetrics, t as createAnalystAi, Q as createSemanticConceptJudge, u as defaultIsMaterial, v as diffFindings, R as runSemanticConceptJudge } from './semantic-concept-judge-
|
|
5
|
-
|
|
6
|
-
export { A as
|
|
7
|
-
|
|
8
|
-
export { A as
|
|
1
|
+
export { A as ActionExecutionPolicy, a as ActionPolicyDecision, C as ControlRunToRunRecordOptions, L as LlmJsonCall, b as LlmReviewerConfig, P as ProposeFn, c as ProposeInput, d as ProposeOutput, e as ProposeReviewConfig, f as ProposeReviewControlAction, g as ProposeReviewControlConfig, h as ProposeReviewControlResult, i as ProposeReviewControlState, j as ProposeReviewReport, k as ProposeReviewShot, R as Review, l as ReviewFn, m as ReviewInput, n as ReviewMemoryEntry, o as ReviewMemoryStore, p as RunEvidenceMetadata, V as Verification, q as VerifyFn, r as controlFailureClassFromVerification, s as controlRunToRunRecord, t as createLlmReviewer, u as evaluateActionPolicy, v as inMemoryReviewStore, w as jsonlReviewStore, x as runProposeReview, y as runProposeReviewAsControlLoop, z as scoreFromEvals } from './control-CehLtoET.js';
|
|
2
|
+
import { R as RunRecord } from './run-record-sItO5ftF.js';
|
|
3
|
+
export { e as AGENT_PROFILE_KINDS, A as AgentProfileCell, d as AgentProfileCellInput, f as AgentProfileCellSchemaVersion, g as AgentProfileCellValidationError, h as AgentProfileDimensionValue, i as AgentProfileHarness, j as AgentProfileJson, k as AgentProfileKind, l as AgentProfileSource, m as AgentProfileSourceInput, J as JudgeScoresRecord, c as RunJudgeMetadata, n as RunOutcome, o as RunRecordValidationError, b as RunSplitTag, a as RunTokenUsage, S as SandboxAgentProfileLike, p as agentProfileCellHashMaterial, q as agentProfileCellKey, r as assertRunAgentProfileCell, s as buildAgentProfileCell, t as buildSandboxAgentProfileCell, u as groupRunsByAgentProfileCell, v as isRunRecord, w as parseRunRecordSafe, x as requireAgentProfileCell, y as roundTripRunRecord, z as toAgentProfileJson, B as validateAgentProfileCell, C as validateRunRecord, D as verifyAgentProfileCell } from './run-record-sItO5ftF.js';
|
|
4
|
+
export { B as BehavioralMetrics, z as ConceptComplexity, A as ConceptFinding, E as ConceptSpec, G as ConceptWeightStrategy, C as CreateAnalystAiConfig, H as DEFAULT_COMPLEXITY_WEIGHTS, D as DEFAULT_TRACE_ANALYST_KINDS, b as DefaultAnalystRegistryOptions, c as DiffPolicy, F as FAILURE_MODE_KIND_SPEC, f as FindingSubject, g as FindingSubjectKind, i as FindingsDiff, j as FindingsStore, I as IMPROVEMENT_KIND_SPEC, k as KNOWLEDGE_GAP_KIND_SPEC, l as KNOWLEDGE_POISONING_KIND_SPEC, P as PersistedFinding, J as SEMANTIC_CONCEPT_JUDGE_VERSION, m as SKILL_USAGE_ANALYST, a as SemanticConceptJudgeInput, S as SemanticConceptJudgeOptions, L as SemanticConceptJudgeResult, n as SkillUsageAnalyst, M as SuboptimalCode, N as SuboptimalSignal, r as buildDefaultAnalystRegistry, O as computeTraceMetrics, t as createAnalystAi, Q as createSemanticConceptJudge, u as defaultIsMaterial, v as diffFindings, R as runSemanticConceptJudge } from './semantic-concept-judge-qXEUV2w7.js';
|
|
5
|
+
import { l as ChatRequest, p as CreateChatClientOpts } from './types-DRvV0zRo.js';
|
|
6
|
+
export { A as Analyst, a as AnalystContext, g as AnalystCost, c as AnalystFinding, i as AnalystInputKind, j as AnalystRequirements, f as AnalystRunEvent, e as AnalystRunInputs, d as AnalystRunResult, b as AnalystRunSummary, h as AnalystSeverity, k as ChatCallOpts, C as ChatClient, m as ChatResponse, n as ChatTransport, o as CliBridgeTransportOpts, D as DirectProviderTransportOpts, E as EvidenceRef, M as MockTransportOpts, R as RouterTransportOpts, S as SandboxSdkTransportOpts, q as computeFindingId, r as createChatClient, s as makeFinding } from './types-DRvV0zRo.js';
|
|
7
|
+
export { C as CreateTraceAnalystKindOpts, a as RawAnalystFinding, T as TraceAnalystGolden, c as TraceAnalystKindSpec, d as createTraceAnalystKind, r as renderPriorFindings } from './kind-factory-DqV2t1Xk.js';
|
|
8
|
+
export { A as AnalystHooks, a as AnalystRegistry, b as AnalystRegistryOptions, B as BudgetPolicy, R as RegistryRunOpts } from './registry-BK0Zee01.js';
|
|
9
9
|
import { TCloud } from '@tangle-network/tcloud';
|
|
10
10
|
import { B as BenchmarkRunnerConfig, S as Scenario, c as BenchmarkReport, P as ProductClientConfig, C as CheckResult, T as TestResult, d as PersonaConfig, D as DriverResult, e as DriverState, b as JudgeFn, f as CollectedArtifacts, g as ScenarioResult, h as TurnMetrics, i as ScenarioFile, j as CompletionCriterion } from './types-Croy5h7V.js';
|
|
11
11
|
export { A as ArtifactCheck, k as ArtifactResult, E as EvalResult, F as FeedbackPattern, l as JudgeConfig, a as JudgeInput, m as JudgeRubric, J as JudgeScore, n as PersonaRigor, R as RouteMap, o as RubricDimension, p as Turn, q as TurnResult } from './types-Croy5h7V.js';
|
|
@@ -14,11 +14,11 @@ import { A as AgentEvalError } from './errors-Dwqw-T_m.js';
|
|
|
14
14
|
export { a as AgentEvalErrorCode, C as CaptureIntegrityError, b as ConfigError, J as JudgeError, N as NotFoundError, R as ReplayError, V as ValidationError, c as VerificationError } from './errors-Dwqw-T_m.js';
|
|
15
15
|
import { b as FeedbackLabel, F as FeedbackTrajectoryStore, a as FeedbackTrajectory } from './feedback-trajectory-B3rErRsh.js';
|
|
16
16
|
export { c as FeedbackArtifactType, d as FeedbackAttempt, e as FeedbackLabelKind, f as FeedbackLabelSource, g as FeedbackOptimizerRow, h as FeedbackOutcome, i as FeedbackReplayAdapter, j as FeedbackReplayResult, k as FeedbackSeverity, l as FeedbackSplitPolicy, m as FeedbackTask, n as FeedbackTrajectoryFilter, o as FileSystemFeedbackTrajectoryStore, I as InMemoryFeedbackTrajectoryStore, P as PreferenceMemoryEntry, p as ProposedSideEffect, q as assignFeedbackSplit, r as controlRunToFeedbackTrajectory, s as createFeedbackTrajectory, t as feedbackTrajectoriesToDatasetScenarios, u as feedbackTrajectoriesToOptimizerRows, v as feedbackTrajectoryToDatasetScenario, w as feedbackTrajectoryToOptimizerRow, x as parseFeedbackTrajectoriesJsonl, y as renderPreferenceMemoryMarkdown, z as replayFeedbackTrajectories, A as replayFeedbackTrajectory, B as serializeFeedbackTrajectoriesJsonl, C as summarizePreferenceMemory, D as withAssignedFeedbackSplit } from './feedback-trajectory-B3rErRsh.js';
|
|
17
|
-
import { A as AgentProfile$1 } from './agent-profile-
|
|
18
|
-
export { c as ArtifactCheckArtifact, d as ArtifactEventLike, e as ArtifactValidator, f as BackendIntegrityError, B as BackendIntegrityReport, C as CompletionRequirement, a as CompletionVerdict, b as CorrectnessChecker, L as LlmCorrectnessCheckerOpts, g as ProducedProposal, P as ProducedState, h as ProposalEventLike, i as RequirementCheck, R as RuntimeEventLike, S as SatisfiedBy, T as TaskGold, j as ToolCallEventLike, V as ValidationContext, k as ValidationIssue, l as ValidationResult, m as agentProfileHash, n as assertRealBackend, o as byteLengthRange, p as composeValidators, q as containsAll, r as createLlmCorrectnessChecker, s as extractProducedState, t as jsonHasKeys, u as parseCorrectnessResponse, v as regexMatch, w as summarizeBackendIntegrity, x as verifyCompletion } from './agent-profile-
|
|
17
|
+
import { A as AgentProfile$1 } from './agent-profile-aSEaJ9Pl.js';
|
|
18
|
+
export { c as ArtifactCheckArtifact, d as ArtifactEventLike, e as ArtifactValidator, f as BackendIntegrityError, B as BackendIntegrityReport, C as CompletionRequirement, a as CompletionVerdict, b as CorrectnessChecker, L as LlmCorrectnessCheckerOpts, g as ProducedProposal, P as ProducedState, h as ProposalEventLike, i as RequirementCheck, R as RuntimeEventLike, S as SatisfiedBy, T as TaskGold, j as ToolCallEventLike, V as ValidationContext, k as ValidationIssue, l as ValidationResult, m as agentProfileHash, n as assertRealBackend, o as byteLengthRange, p as composeValidators, q as containsAll, r as createLlmCorrectnessChecker, s as extractProducedState, t as jsonHasKeys, u as parseCorrectnessResponse, v as regexMatch, w as summarizeBackendIntegrity, x as verifyCompletion } from './agent-profile-aSEaJ9Pl.js';
|
|
19
19
|
export { DataAcquisitionPlan, KnowledgeAcquisitionMode, KnowledgeBundle, KnowledgeFallbackPolicy, KnowledgeFreshness, KnowledgeImportance, KnowledgeReadinessReport, KnowledgeRecommendedAction, KnowledgeRequirement, KnowledgeRequirementCategory, KnowledgeResponsibleSurface, KnowledgeSensitivity, ScoreKnowledgeReadinessOptions, UserQuestion, acquisitionPlansForKnowledgeGaps, blockingKnowledgeEval, knowledgeReadinessTracePayload, scoreKnowledgeReadiness, userQuestionsForKnowledgeGaps } from './knowledge/index.js';
|
|
20
|
-
import { h as ReleaseConfidenceThresholds, f as ReleaseConfidenceScorecard } from './release-report-
|
|
21
|
-
export { A as ActionableSideInfo, o as AsiSeverity, B as BootstrapOptions, a as BootstrapResult, J as JudgeReplayGateArgs, R as ReleaseConfidenceAxis, b as ReleaseConfidenceAxisName, c as ReleaseConfidenceInput, d as ReleaseConfidenceIssue, e as ReleaseConfidenceMetrics, g as ReleaseConfidenceStatus, i as ReleaseTraceEvidence, j as RenderReleaseReportOptions, V as Verdict, k as assertReleaseConfidence, l as bootstrapCi, m as evaluateReleaseConfidence, n as judgeReplayGate, r as renderReleaseReport } from './release-report-
|
|
20
|
+
import { h as ReleaseConfidenceThresholds, f as ReleaseConfidenceScorecard } from './release-report-CXXZlR8g.js';
|
|
21
|
+
export { A as ActionableSideInfo, o as AsiSeverity, B as BootstrapOptions, a as BootstrapResult, J as JudgeReplayGateArgs, R as ReleaseConfidenceAxis, b as ReleaseConfidenceAxisName, c as ReleaseConfidenceInput, d as ReleaseConfidenceIssue, e as ReleaseConfidenceMetrics, g as ReleaseConfidenceStatus, i as ReleaseTraceEvidence, j as RenderReleaseReportOptions, V as Verdict, k as assertReleaseConfidence, l as bootstrapCi, m as evaluateReleaseConfidence, n as judgeReplayGate, r as renderReleaseReport } from './release-report-CXXZlR8g.js';
|
|
22
22
|
export { C as CliffsMagnitude, c as CorpusAgreementOptions, d as CorpusAgreementPerDimension, e as CorpusAgreementReport, f as CorpusScoreRecord, P as PairedBootstrapOptions, a as PairedBootstrapResult, W as WeightedCompositeInput, g as WeightedCompositeResult, b as benjaminiHochberg, h as bonferroni, i as cliffsDelta, j as cohensD, k as confidenceInterval, l as corpusInterRaterAgreement, m as corpusInterRaterAgreementFromJudgeScores, n as interRaterReliability, o as interpretCliffs, q as mannWhitneyU, r as normalizeScores, p as pairedBootstrap, s as pairedMde, t as pairedTTest, u as partialCredit, v as requiredSampleSize, x as weightedComposite, y as weightedMean, w as wilcoxonSignedRank } from './statistics-B7yCbi9i.js';
|
|
23
23
|
import { a as AnalyzeTracesInput, A as AnalyzeTracesOptions, b as AnalyzeTracesResult } from './analyst-t7zZS3TV.js';
|
|
24
24
|
export { c as AnalyzeTracesTurnSnapshot, d as analyzeTraces } from './analyst-t7zZS3TV.js';
|
|
@@ -58,23 +58,22 @@ import { b as Layer, S as Severity, L as LayerResult, c as VerifyContext } from
|
|
|
58
58
|
export { F as Finding, d as LayerStatus, M as MultiLayerVerifier, a as VerificationReport, V as VerifyOptions, g as gradeSemanticStatus } from './multi-layer-verifier-DlWCXuxL.js';
|
|
59
59
|
import { L as LlmClientOptions } from './llm-client-DbjLfz-K.js';
|
|
60
60
|
export { d as LlmCallError, b as LlmCallRequest, c as LlmCallResult, e as LlmClient, f as LlmMessage, g as LlmRouteAssertionError, a as LlmRouteRequirements, h as LlmUsage, i as assertLlmRoute, j as backoffMs, k as callLlm, l as callLlmJson, m as isTransientLlmError, p as probeLlm, s as stripFencedJson } from './llm-client-DbjLfz-K.js';
|
|
61
|
-
export { B as BENCHMARK_SPLIT_SEED, a as BenchmarkAdapter, b as BenchmarkDatasetItem, c as BenchmarkEvaluation, d as benchmarkDeterministicSplit, i as benchmarks } from './index-
|
|
62
|
-
export { C as CallbackResearcher, d as CallbackResearcherOptions, e as CampaignFactoryParams, f as CampaignIntegrityPolicy, g as CampaignRunContext, h as CampaignRunOutcome, i as CampaignRunner, j as CampaignScenario, k as CampaignVariant, c as EvalCampaignOptions, b as EvalCampaignResult, E as ExperimentPlan, a as ExperimentResult, l as FailedRun, F as FailureMode, N as NoopResearcher, R as Researcher, S as SteeringChange, r as runEvalCampaign } from './researcher-
|
|
63
|
-
export { G as GainDistributionBin, a as GainDistributionFigureSpec, b as GainDistributionOptions, m as GateDecision, n as GateEvidence, H as HeldOutGate, o as HeldOutGateConfig, q as HeldOutGateRejectionCode, P as ParetoFigureSpec, c as ParetoPoint, R as RESEARCH_REPORT_HARD_PAIR_FLOOR, d as ResearchReport, e as ResearchReportCandidate, f as ResearchReportDecision, g as ResearchReportMethodology, h as ResearchReportOptions, i as ResearchReportRecommendation, S as SummaryTable, j as SummaryTableOptions, k as SummaryTableRow, l as gainHistogram, p as paretoChart, r as researchReport, s as summaryTable } from './summary-report-
|
|
61
|
+
export { B as BENCHMARK_SPLIT_SEED, a as BenchmarkAdapter, b as BenchmarkDatasetItem, c as BenchmarkEvaluation, d as benchmarkDeterministicSplit, i as benchmarks } from './index-B1RKber3.js';
|
|
62
|
+
export { C as CallbackResearcher, d as CallbackResearcherOptions, e as CampaignFactoryParams, f as CampaignIntegrityPolicy, g as CampaignRunContext, h as CampaignRunOutcome, i as CampaignRunner, j as CampaignScenario, k as CampaignVariant, c as EvalCampaignOptions, b as EvalCampaignResult, E as ExperimentPlan, a as ExperimentResult, l as FailedRun, F as FailureMode, N as NoopResearcher, R as Researcher, S as SteeringChange, r as runEvalCampaign } from './researcher-rInLj9De.js';
|
|
63
|
+
export { G as GainDistributionBin, a as GainDistributionFigureSpec, b as GainDistributionOptions, m as GateDecision, n as GateEvidence, H as HeldOutGate, o as HeldOutGateConfig, q as HeldOutGateRejectionCode, P as ParetoFigureSpec, c as ParetoPoint, R as RESEARCH_REPORT_HARD_PAIR_FLOOR, d as ResearchReport, e as ResearchReportCandidate, f as ResearchReportDecision, g as ResearchReportMethodology, h as ResearchReportOptions, i as ResearchReportRecommendation, S as SummaryTable, j as SummaryTableOptions, k as SummaryTableRow, l as gainHistogram, p as paretoChart, r as researchReport, s as summaryTable } from './summary-report-BTaXq1TS.js';
|
|
64
64
|
export { I as InterimReleaseConfidence, a as InterimReleaseConfidenceInput, P as PairedEvalueOptions, b as PairedEvalueSequence, c as PairedEvalueStep, S as SequentialDecision, e as evaluateInterimReleaseConfidence, p as pairedEvalueSequence } from './sequential-5iSVfzl2.js';
|
|
65
|
-
import { S as Scenario$1, a as JudgeConfig, G as Gate } from './types-
|
|
66
|
-
import { d as GepaDriverConstraints, R as RunImprovementLoopResult } from './run-improvement-loop-
|
|
65
|
+
import { S as Scenario$1, a as JudgeConfig, G as Gate } from './types-4mm2msnR.js';
|
|
66
|
+
import { d as GepaDriverConstraints, R as RunImprovementLoopResult } from './run-improvement-loop-BAl_aVOZ.js';
|
|
67
67
|
import '@ax-llm/ax';
|
|
68
68
|
import 'zod';
|
|
69
|
-
import './outcome-store-
|
|
69
|
+
import './outcome-store-rnXLEqSn.js';
|
|
70
70
|
|
|
71
71
|
/**
|
|
72
|
-
* Automated pull request opener for the
|
|
72
|
+
* Automated pull request opener for the improvement loop.
|
|
73
73
|
*
|
|
74
|
-
* `
|
|
75
|
-
*
|
|
76
|
-
*
|
|
77
|
-
* exactly that:
|
|
74
|
+
* When `runImprovementLoop` ships a winner (`autoOnPromote: 'pr'`) it produces
|
|
75
|
+
* a promoted surface diff. To close the eval → prod → eval cycle the framework
|
|
76
|
+
* lands that change as a reviewable code change. This module does exactly that:
|
|
78
77
|
*
|
|
79
78
|
* 1. Stage a branch off `baseBranch`.
|
|
80
79
|
* 2. Write each `fileChange` into the worktree.
|
|
@@ -1904,6 +1903,110 @@ declare function collectionPreserved<T, K extends keyof T & string>(key: K, minR
|
|
|
1904
1903
|
/** Common check: a status field advanced in an expected order. */
|
|
1905
1904
|
declare function statusAdvanced<T extends Record<string, unknown>>(key: keyof T & string, progression: readonly string[]): ContinuityCheck<T>;
|
|
1906
1905
|
|
|
1906
|
+
/**
|
|
1907
|
+
* UI audit finding — substrate primitive for "what is wrong with the UI?"
|
|
1908
|
+
*
|
|
1909
|
+
* Used by:
|
|
1910
|
+
* - `@tangle-network/agent-runtime` (ui-auditor profile + delegate) —
|
|
1911
|
+
* produced as the canonical output of an audit iteration, persisted to
|
|
1912
|
+
* disk as GitHub-issue Markdown, surfaced over MCP.
|
|
1913
|
+
* - Downstream ship gates / dashboards / analyst consumers — load and
|
|
1914
|
+
* transform findings without depending on the runtime.
|
|
1915
|
+
*
|
|
1916
|
+
* Repo layering: agent-eval is the substrate (no upward deps). Consumers
|
|
1917
|
+
* read this type from here; the reverse is forbidden. See CLAUDE.md
|
|
1918
|
+
* "Repo layering" for the rule. A UI finding makes sense WITHOUT a running
|
|
1919
|
+
* agent loop (you can load a saved finding, ship-gate against a set of
|
|
1920
|
+
* them, render them in a dashboard), which puts it firmly in substrate.
|
|
1921
|
+
*
|
|
1922
|
+
* The shape is intentionally minimal — runtime-shaped state (capture
|
|
1923
|
+
* timestamps, OTel trace IDs, sandbox placement) lives on auxiliary
|
|
1924
|
+
* runtime types in `agent-runtime`, not on the finding itself.
|
|
1925
|
+
*/
|
|
1926
|
+
/**
|
|
1927
|
+
* Canonical audit lenses. Each lens scopes a finding to a single class of
|
|
1928
|
+
* problem so a single audit pass can iterate them without pile-on findings
|
|
1929
|
+
* under a generic label.
|
|
1930
|
+
*
|
|
1931
|
+
* Naming is fixed for cross-package wire compatibility. Treat additions as
|
|
1932
|
+
* a substrate-level decision — analysts, gates, and writers all branch on
|
|
1933
|
+
* the lens.
|
|
1934
|
+
*/
|
|
1935
|
+
type UiLens = 'consistency' | 'hierarchy' | 'layout' | 'ux-flow' | 'duplication' | 'accessibility' | 'responsive' | 'states' | 'content' | 'interaction' | 'performance-perceived' | 'other';
|
|
1936
|
+
/** Frozen tuple of lenses for validation + iteration. */
|
|
1937
|
+
declare const UI_LENSES: readonly UiLens[];
|
|
1938
|
+
/**
|
|
1939
|
+
* Severity scale — intentionally narrow.
|
|
1940
|
+
*
|
|
1941
|
+
* - `critical` — blocks a core task or is an accessibility blocker.
|
|
1942
|
+
* - `high` — confusing, broken-looking, or noticeable friction.
|
|
1943
|
+
* - `med` — visible polish issue, would be caught in code review.
|
|
1944
|
+
* - `low` — nitpick worth fixing eventually.
|
|
1945
|
+
*/
|
|
1946
|
+
type UiFindingSeverity = 'low' | 'med' | 'high' | 'critical';
|
|
1947
|
+
/** Frozen severity tuple, ordered worst → least bad for sort/report. */
|
|
1948
|
+
declare const UI_FINDING_SEVERITIES: readonly UiFindingSeverity[];
|
|
1949
|
+
/**
|
|
1950
|
+
* Pointer to a screenshot referenced by the finding. The path is
|
|
1951
|
+
* intentionally a relative string (relative to the audit workspace root)
|
|
1952
|
+
* so findings remain portable across machines and into GitHub issues.
|
|
1953
|
+
*/
|
|
1954
|
+
interface UiFindingScreenshot {
|
|
1955
|
+
/** Workspace-relative path to the screenshot file (e.g. `screenshots/home--1280x800--...png`). */
|
|
1956
|
+
path: string;
|
|
1957
|
+
/** Optional viewport the screenshot was taken at, e.g. `1280x800`. */
|
|
1958
|
+
viewport?: string;
|
|
1959
|
+
/** Optional short label that disambiguates multiple captures of the same surface (e.g. `t0`, `step-1`). */
|
|
1960
|
+
label?: string;
|
|
1961
|
+
}
|
|
1962
|
+
/**
|
|
1963
|
+
* A single UI audit finding — the unit of work a contributor can act on.
|
|
1964
|
+
*
|
|
1965
|
+
* Every field except the documented optionals is required. The shape is
|
|
1966
|
+
* deliberately constraining: a finding without a screenshot, a lens, a
|
|
1967
|
+
* concrete title, and a suggested fix is not actionable, and the auditor
|
|
1968
|
+
* validator hard-fails on those gaps.
|
|
1969
|
+
*/
|
|
1970
|
+
interface UiFinding {
|
|
1971
|
+
/**
|
|
1972
|
+
* Stable identifier within a single audit workspace. Monotonically
|
|
1973
|
+
* increasing integer (1, 2, …) assigned by the writer when persisting.
|
|
1974
|
+
* Optional in transit (before persistence) — undefined on freshly minted
|
|
1975
|
+
* findings emitted from a loop iteration.
|
|
1976
|
+
*/
|
|
1977
|
+
id?: number;
|
|
1978
|
+
/** Concrete title — names the offending element AND what's wrong. */
|
|
1979
|
+
title: string;
|
|
1980
|
+
/** Lens this finding belongs to. */
|
|
1981
|
+
lens: UiLens;
|
|
1982
|
+
/** Severity. */
|
|
1983
|
+
severity: UiFindingSeverity;
|
|
1984
|
+
/** Logical route the finding was observed on (e.g. `home`, `checkout-step-2`). */
|
|
1985
|
+
route: string;
|
|
1986
|
+
/** Fully qualified URL the finding was observed at. */
|
|
1987
|
+
url?: string;
|
|
1988
|
+
/** Viewport string the offending capture was taken at (e.g. `1280x800`). */
|
|
1989
|
+
viewport?: string;
|
|
1990
|
+
/** CSS selector pinning the offending element, when one can be identified. */
|
|
1991
|
+
selector?: string;
|
|
1992
|
+
/** 1–3 sentences describing what the screenshot shows that is wrong. */
|
|
1993
|
+
observation: string;
|
|
1994
|
+
/** Who is affected and how. Concrete user impact. */
|
|
1995
|
+
impact: string;
|
|
1996
|
+
/** A specific change a contributor could apply without asking back. */
|
|
1997
|
+
suggestedFix: string;
|
|
1998
|
+
/** Optional explicit reproduction steps. Writer synthesizes from route/url/selector when omitted. */
|
|
1999
|
+
reproSteps?: string;
|
|
2000
|
+
/** Free-form tags. */
|
|
2001
|
+
tags?: readonly string[];
|
|
2002
|
+
/** Screenshot references — required to be non-empty for actionable findings. */
|
|
2003
|
+
screenshots: readonly UiFindingScreenshot[];
|
|
2004
|
+
/** Cross-references to similar findings already on file, by id. */
|
|
2005
|
+
similarTo?: readonly number[];
|
|
2006
|
+
/** ISO-8601 creation timestamp set by the writer when persisted. */
|
|
2007
|
+
createdAt?: string;
|
|
2008
|
+
}
|
|
2009
|
+
|
|
1907
2010
|
/**
|
|
1908
2011
|
* Behavior DSL — pytest-style assertions over a run's trajectory.
|
|
1909
2012
|
*
|
|
@@ -4231,8 +4334,6 @@ declare function createSandboxPool<T>(opts: CreateSandboxPoolOpts<T>): SandboxPo
|
|
|
4231
4334
|
* Pipeline-level OTEL integration — auto-attaches an OTEL exporter when
|
|
4232
4335
|
* OTEL_EXPORTER_OTLP_ENDPOINT is set. Pipelines call `withOtelPipeline()`
|
|
4233
4336
|
* to get a configured exporter + shutdown handle without manual wiring.
|
|
4234
|
-
*
|
|
4235
|
-
* Used by: runEvalCampaign, runProductionLoop, runAgentMatrix.
|
|
4236
4337
|
*/
|
|
4237
4338
|
|
|
4238
4339
|
interface OtelPipelineHandle {
|
|
@@ -4718,4 +4819,4 @@ declare namespace index {
|
|
|
4718
4819
|
export { type index_AgentProfile as AgentProfile, type index_AgentProfileSection as AgentProfileSection, index_BASELINE_ROLES as BASELINE_ROLES, type index_BaselineRoleKey as BaselineRoleKey, type index_ProfileSkill as ProfileSkill, index_applyDomainPatch as applyDomainPatch, index_baselineProfile as baselineProfile, index_baselineProfileFromRole as baselineProfileFromRole, index_engineerRole as engineerRole, index_generalistRole as generalistRole, index_prodProfile as prodProfile, index_profileToSurface as profileToSurface, index_renderProfile as renderProfile, index_researcherRole as researcherRole, index_sectionHash as sectionHash };
|
|
4719
4820
|
}
|
|
4720
4821
|
|
|
4721
|
-
export { type ActiveLearningOptions, type AdapterRun, AgentDriver, type AgentDriverConfig, AgentEvalError, AgentProfile$1 as AgentProfile, type AgreementResult, type AlignmentOp, AnalyzeTracesInput, AnalyzeTracesOptions, AnalyzeTracesResult, type AntiSlopConfig, type AntiSlopIssue, type AntiSlopReport, type AssertCrossFamilyOptions, type AssertSingleBackendOptions, type AutoPrClient, AxGepaSteeringOptimizer, type AxSteeringOptimizerConfig, type BackendDescriptor, BaselineReport, BehaviorAssertion, BenchmarkReport, BenchmarkRunner, BenchmarkRunnerConfig, type BisectOptions, type BisectResult, type BisectStep, BudgetBreachError, BudgetGuard, BudgetLedgerEntry, BudgetSpec, type BuildAgreementJudgeOptions, CallExpectation, type CanaryAlert, type CanaryKind, type CanaryLeak, type CanaryOptions, type CanaryReport, type CanarySeverity, type CandidateScenario, type CausalAttributionReport, type CellVerdict, CheckResult, CollectedArtifacts, type CommandRunner, type CompareLabels, CompletionCriterion, type ContinuityCheck, type ContinuityCheckResult, type ContinuityReport, type ContinuitySnapshotPair, type ContractMetric, type ContractReport, ConvergenceTracker, type CostEntry, type CostSummary, CostTracker, type CounterfactualContext, type CounterfactualMutation, type CounterfactualResult, type CounterfactualRunner, type CreateDefaultReviewerOptions, type CreateSandboxPoolOpts, CrossFamilyError, type CrossTraceDiff, type CrossTraceDiffOptions, D1ExperimentStore, type D1ExperimentStoreOptions, type D1Like, type D1PreparedStatementLike, DEFAULT_AGENT_SLOS, DEFAULT_FINDERS, DEFAULT_MUTATION_PRIMITIVES, DEFAULT_MUTATORS, DEFAULT_PR_REVIEW_SCORE_WEIGHTS, DEFAULT_SEVERITY_WEIGHTS, Dataset, DatasetScenario, type DecideNextUserTurnOpts, type DeployFamily, type DeployGateLayerInput, type DeployRunResult, type DeployRunner, type DiffScorecardOptions, type DirEntry, type DiscoverPersonasOptions, type DiscoveredPersona, DriverResult, DriverState, DualAgentBench, type DualAgentBenchConfig, type DualAgentReport, type DualAgentRound, type DualAgentScenario, type DualAgentScenarioResult, ERROR_COUNT_PATTERNS, type ErrorCountPattern, type EvolutionRound, type ExecutorConfig, type Expectation, type Experiment, type Run as ExperimentRun, type ExperimentStore, ExperimentTracker, type ExportedRewardModel, type ExtractOptions, type ExtractResult, type FactorContribution, type FactorialCell, FeedbackLabel, FeedbackTrajectory, FeedbackTrajectoryStore, type FieldAgreementSpec, type FileChange, FileSystemExperimentStore, type FileSystemExperimentStoreOptions, type FlowAction, type FlowLayerEnv, type FlowLayerFactoryInput, type FlowRunner, type FlowRunnerStepResult, type FlowSpec, type FlowStep, type GhCliClientOptions, type GoldScenario, type GoldSplit, type GoldenSeverity, type GoldenSpec, HarnessConfig, HoldoutAuditor, type HostedJudgeConfig, type HostedJudgeDimension, type HostedJudgeRequest, type HostedJudgeResponse, type HostedRunCriticConfig, type HostedRunScoreRequest, type HostedRunScoreResponse, type HttpGithubClientOptions, type HypothesisManifest, type HypothesisResult, INTENT_MATCH_JUDGE_VERSION, type ImageData, InMemoryExperimentStore, InMemoryWorkspaceInspector, type InferenceScorer, type InspectorContext, type IntentMatchInput, type IntentMatchOptions, type IntentMatchResult, type InteractionContribution, type JudgeFamily, type JudgeFleetOptions, JudgeFn, type JudgeReplayResult, type JudgeRetryOutcome, type JudgeRetryPolicy, JudgeRunner, type KeywordConceptSpec, type KeywordCoverageFinding, type KeywordCoverageOptions, type KeywordCoverageResult, type LangfuseEnvelope, type LangfuseGeneration, type LangfuseScore, Layer, LayerResult, type LiveProofArtifact, type LiveProofConfig, type LiveProofContext, type LiveProofResult, LlmClientOptions, LlmSpan, LockedJsonlAppender, MODEL_PRICING, type MatchResult, type MatcherResult, type MergeOptions, MetricsCollector, type MuffledFinder, type MuffledFinding, type MultiToolchainLayerConfig, type Mutator, Mutex, type Oracle, type OracleObservation, type OracleReport, type OracleResult, type OrthogonalityInput, type OrthogonalityResult, OtelExportConfig, OtelExporter, type OtelPipelineHandle, type OtelPipelineOptions, PairwiseSteeringOptimizer, type ParaphraseRobustnessScenarioInput, type ParaphraseRobustnessScenarioResult, type ParseStudentLabel, PersonaConfig, type Playbook, type PlaybookEntry, type PoolSlot, type PrReviewAuditCase, type PrReviewBenchmarkSummary, type PrReviewComment, type PrReviewMatchedFinding, type PrReviewOutcome, type PrReviewReferenceFinding, type PrReviewScore, type PrReviewScoreWeights, type PrReviewSeverity, type PrReviewSource, ProductClient, ProductClientConfig, type PromptHandle, PromptRegistry, type ProposeAutomatedPullRequestInput, type ProposeAutomatedPullRequestResult, type RecordRunsOptions, type ReferenceMatchResult, type ReferenceReplayAdapter, type ReferenceReplayAdapterFn, type ReferenceReplayAdapterLike, type ReferenceReplayAggregate, type ReferenceReplayCandidate, type ReferenceReplayCase, type ReferenceReplayCaseRun, type ReferenceReplayExecutionScenario, type ReferenceReplayItem, type ReferenceReplayMatch, type ReferenceReplayMatchStrategy, type ReferenceReplayMatcher, type ReferenceReplayPromotionDecision, type ReferenceReplayPromotionPolicy, type ReferenceReplayRun, type ReferenceReplayRunContext, type ReferenceReplayRunOptions, type ReferenceReplayRunStore, type ReferenceReplayScenario, type ReferenceReplayScenarioScore, type ReferenceReplayScore, type ReferenceReplayScoreOptions, type ReferenceReplaySplit, type ReferenceReplaySplitComparison, type ReferenceReplaySteeringRowsOptions, type ReflectionContext, type ReflectionProposal, ReleaseConfidenceScorecard, ReleaseConfidenceThresholds, type RenderStudentPrompt, type RepoRef, type ReviewerMemoryEntry, type ReviewerOutput, type ReviewerPromptInput, type ReviewerSoftFailDefaults, type ReviewerVerificationSummary, type RobustnessResult, Run$1 as Run, type RunCommandInput, type RunCommandResult, type RunConfig, RunCriticOptions, type RunDiff, type RunDistillationOptions, type RunDistillationResult, RunFilter, RunRecord, RunScore, RunScoreWeights, RunTrace, SandboxDriver, SandboxHarnessResult, type SandboxJudgeKind, type SandboxJudgeResult, type SandboxJudgeSpec, type SandboxPool, type ScanOptions, Scenario, type ScenarioCost, ScenarioFile, ScenarioRegistry, ScenarioResult, type Scorecard, type ScorecardCell, type ScorecardCellDiff, type ScorecardDiff, type ScorecardEntry, type ScorecardLogLine, type ScoredTarget, type SelfPlayOptions, type SelfPlayProposer, type SelfPlayScorer, type SeriesConvergenceOptions, type SeriesConvergenceResult, Severity, type SignedManifest, type SignedManifestAlgo, type SingleBackendDivergence, SingleBackendError, type SingleBackendField, type SingleBackendReport, type Slo, type SloCheckResult, type SloComparator, type SloReport, type SloSeverity, type SlopCategory, type SlotFactory, type SplitGoldOptions, SteeringBundle, type SteeringOptimizationResult, type SteeringOptimizationRow, type SteeringOptimizationSelector, type SteeringOptimizerBackend, type SteeringOptimizerConfig, type StepAttribution, type SynthesisReason, type SynthesisTarget, TestResult, type ThresholdContract, TokenCounter, type TokenSpec, TraceEmitter, TraceStore, type TracedAnalystOptions, type TracedJudgeOptions, Trajectory, TrajectoryStep, type TrialTrace, TurnMetrics, UNIVERSAL_FINDERS, VerifyContext, type VisualDiffOptions, type VisualDiffResult, type ViteDeployRunnerInput, type WorkspaceAssertion, type WorkspaceAssertionResult, type WorkspaceInspector, type WorkspaceSnapshot, type WranglerDeployRunnerInput, adversarialJudge, aggregatePrReviewScore, analyzeAntiSlop, analyzeSeries, appendScorecard, assertCrossFamily, assertSingleBackend, attributeCounterfactuals, bisect, buildAgreementJudge, buildDriverSystemPrompt, buildReflectionPrompt, buildReviewerPrompt, canaryLeakView, canonicalize, causalAttribution, checkBehavioralCanary, checkCanaries, checkSlos, codeExecutionJudge, coherenceJudge, collectionPreserved, commentsForSource, commitBisect, compareReferenceReplay, compilerJudge, createAntiSlopJudge, createCustomJudge, createDefaultReviewer, createDomainExpertJudge, createIntentMatchJudge, createSandboxPool, crossTraceDiff, decideNextUserTurn, decideReferenceReplayPromotion, decideReferenceReplayRunPromotion, defaultJudges, defaultParseStudentLabel, defaultReferenceReplayMatcher, defaultRenderStudentPrompt, deployGateLayer, diffScorecard, discoverPersonas, distillPlaybook, estimateCost, estimateTokens, evaluateContract, evaluateHypothesis, evaluateOracles, executeScenario, expectAgent, exportRewardModel, extractAssetUrls, extractErrorCount, fieldAgreement, fileContains, fileExists, findAutoMatchNoExpectation, findConstructorCwdDropped, findFallbackToPass, findLiteralTruePass, findSkipCountsAsPass, flowLayer, formatBenchmarkReport, formatDriverReport, formatFindings, formatScorecardDiff, ghCliClient, precision as goldenPrecision, hashContent, hashJson, htmlContainsElement, httpGithubClient, inMemoryReferenceReplayStore, isModelPriced, isOtelConfigured, jsonShape, jsonlReferenceReplayStore, judgeFamily, keyPreserved, linterJudge, loadGoldScenarios, loadScorecard, loadScorerFromGrader, localCommandRunner, lowercaseMutator, matchGoldens, mergeLayerResults, multiToolchainLayer, notBlocked, paraphraseRobustness, paraphraseRobustnessScenarios, parseGoldJsonl, parseReflectionResponse, passOrthogonality, pixelDeltaRatio, politenessPrefixMutator, printDriverSummary, index as profile, promptBisect, proposeAutomatedPullRequest, proposeSynthesisTargets, recordRuns, recordRunsToScorecard, referenceReplayRunsToSteeringRows, referenceReplayScenarioToRunScore, regexMatches, renderMarkdownReport, renderPlaybookMarkdown, replayScorerOverCorpus, replayTraceThroughJudge, resetLockedAppendersForTesting, resolveModelPricing, rowCount, rowWhere, runAssertions, runBehavioralCanaries, runCanaries, runCounterfactual, runDistillation, runE2EWorkflow, runExpectations, runIntentMatchJudge, runJudgeFleet, runKeywordCoverageJudge, runKeywordCoverageJudgeUrl, runLiveProof, runReferenceReplay, runSelfPlay, scanForMuffledGates, scoreContinuity, scorePrReviewComments, scorePrReviewSource, scoreReferenceReplay, securityJudge, sentenceReorderMutator, signManifest, splitGold, statusAdvanced, summarizePrReviewBenchmark, testJudge, textInSnapshot, toLangfuseEnvelope, toPrometheusText, traceJudge, traceJudgeEnsemble, tracedAnalyzeTraces, typoMutator, urlContains, verifyManifest, visualDiff, viteDeployRunner, weightedRecall, whitespaceCollapseMutator, withJudgeRetry, withOtelPipeline, wranglerDeployRunner };
|
|
4822
|
+
export { type ActiveLearningOptions, type AdapterRun, AgentDriver, type AgentDriverConfig, AgentEvalError, AgentProfile$1 as AgentProfile, type AgreementResult, type AlignmentOp, AnalyzeTracesInput, AnalyzeTracesOptions, AnalyzeTracesResult, type AntiSlopConfig, type AntiSlopIssue, type AntiSlopReport, type AssertCrossFamilyOptions, type AssertSingleBackendOptions, type AutoPrClient, AxGepaSteeringOptimizer, type AxSteeringOptimizerConfig, type BackendDescriptor, BaselineReport, BehaviorAssertion, BenchmarkReport, BenchmarkRunner, BenchmarkRunnerConfig, type BisectOptions, type BisectResult, type BisectStep, BudgetBreachError, BudgetGuard, BudgetLedgerEntry, BudgetSpec, type BuildAgreementJudgeOptions, CallExpectation, type CanaryAlert, type CanaryKind, type CanaryLeak, type CanaryOptions, type CanaryReport, type CanarySeverity, type CandidateScenario, type CausalAttributionReport, type CellVerdict, ChatRequest, CheckResult, CollectedArtifacts, type CommandRunner, type CompareLabels, CompletionCriterion, type ContinuityCheck, type ContinuityCheckResult, type ContinuityReport, type ContinuitySnapshotPair, type ContractMetric, type ContractReport, ConvergenceTracker, type CostEntry, type CostSummary, CostTracker, type CounterfactualContext, type CounterfactualMutation, type CounterfactualResult, type CounterfactualRunner, CreateChatClientOpts, type CreateDefaultReviewerOptions, type CreateSandboxPoolOpts, CrossFamilyError, type CrossTraceDiff, type CrossTraceDiffOptions, D1ExperimentStore, type D1ExperimentStoreOptions, type D1Like, type D1PreparedStatementLike, DEFAULT_AGENT_SLOS, DEFAULT_FINDERS, DEFAULT_MUTATION_PRIMITIVES, DEFAULT_MUTATORS, DEFAULT_PR_REVIEW_SCORE_WEIGHTS, DEFAULT_SEVERITY_WEIGHTS, Dataset, DatasetScenario, type DecideNextUserTurnOpts, type DeployFamily, type DeployGateLayerInput, type DeployRunResult, type DeployRunner, type DiffScorecardOptions, type DirEntry, type DiscoverPersonasOptions, type DiscoveredPersona, DriverResult, DriverState, DualAgentBench, type DualAgentBenchConfig, type DualAgentReport, type DualAgentRound, type DualAgentScenario, type DualAgentScenarioResult, ERROR_COUNT_PATTERNS, type ErrorCountPattern, type EvolutionRound, type ExecutorConfig, type Expectation, type Experiment, type Run as ExperimentRun, type ExperimentStore, ExperimentTracker, type ExportedRewardModel, type ExtractOptions, type ExtractResult, type FactorContribution, type FactorialCell, FeedbackLabel, FeedbackTrajectory, FeedbackTrajectoryStore, type FieldAgreementSpec, type FileChange, FileSystemExperimentStore, type FileSystemExperimentStoreOptions, type FlowAction, type FlowLayerEnv, type FlowLayerFactoryInput, type FlowRunner, type FlowRunnerStepResult, type FlowSpec, type FlowStep, type GhCliClientOptions, type GoldScenario, type GoldSplit, type GoldenSeverity, type GoldenSpec, HarnessConfig, HoldoutAuditor, type HostedJudgeConfig, type HostedJudgeDimension, type HostedJudgeRequest, type HostedJudgeResponse, type HostedRunCriticConfig, type HostedRunScoreRequest, type HostedRunScoreResponse, type HttpGithubClientOptions, type HypothesisManifest, type HypothesisResult, INTENT_MATCH_JUDGE_VERSION, type ImageData, InMemoryExperimentStore, InMemoryWorkspaceInspector, type InferenceScorer, type InspectorContext, type IntentMatchInput, type IntentMatchOptions, type IntentMatchResult, type InteractionContribution, type JudgeFamily, type JudgeFleetOptions, JudgeFn, type JudgeReplayResult, type JudgeRetryOutcome, type JudgeRetryPolicy, JudgeRunner, type KeywordConceptSpec, type KeywordCoverageFinding, type KeywordCoverageOptions, type KeywordCoverageResult, type LangfuseEnvelope, type LangfuseGeneration, type LangfuseScore, Layer, LayerResult, type LiveProofArtifact, type LiveProofConfig, type LiveProofContext, type LiveProofResult, LlmClientOptions, LlmSpan, LockedJsonlAppender, MODEL_PRICING, type MatchResult, type MatcherResult, type MergeOptions, MetricsCollector, type MuffledFinder, type MuffledFinding, type MultiToolchainLayerConfig, type Mutator, Mutex, type Oracle, type OracleObservation, type OracleReport, type OracleResult, type OrthogonalityInput, type OrthogonalityResult, OtelExportConfig, OtelExporter, type OtelPipelineHandle, type OtelPipelineOptions, PairwiseSteeringOptimizer, type ParaphraseRobustnessScenarioInput, type ParaphraseRobustnessScenarioResult, type ParseStudentLabel, PersonaConfig, type Playbook, type PlaybookEntry, type PoolSlot, type PrReviewAuditCase, type PrReviewBenchmarkSummary, type PrReviewComment, type PrReviewMatchedFinding, type PrReviewOutcome, type PrReviewReferenceFinding, type PrReviewScore, type PrReviewScoreWeights, type PrReviewSeverity, type PrReviewSource, ProductClient, ProductClientConfig, type PromptHandle, PromptRegistry, type ProposeAutomatedPullRequestInput, type ProposeAutomatedPullRequestResult, type RecordRunsOptions, type ReferenceMatchResult, type ReferenceReplayAdapter, type ReferenceReplayAdapterFn, type ReferenceReplayAdapterLike, type ReferenceReplayAggregate, type ReferenceReplayCandidate, type ReferenceReplayCase, type ReferenceReplayCaseRun, type ReferenceReplayExecutionScenario, type ReferenceReplayItem, type ReferenceReplayMatch, type ReferenceReplayMatchStrategy, type ReferenceReplayMatcher, type ReferenceReplayPromotionDecision, type ReferenceReplayPromotionPolicy, type ReferenceReplayRun, type ReferenceReplayRunContext, type ReferenceReplayRunOptions, type ReferenceReplayRunStore, type ReferenceReplayScenario, type ReferenceReplayScenarioScore, type ReferenceReplayScore, type ReferenceReplayScoreOptions, type ReferenceReplaySplit, type ReferenceReplaySplitComparison, type ReferenceReplaySteeringRowsOptions, type ReflectionContext, type ReflectionProposal, ReleaseConfidenceScorecard, ReleaseConfidenceThresholds, type RenderStudentPrompt, type RepoRef, type ReviewerMemoryEntry, type ReviewerOutput, type ReviewerPromptInput, type ReviewerSoftFailDefaults, type ReviewerVerificationSummary, type RobustnessResult, Run$1 as Run, type RunCommandInput, type RunCommandResult, type RunConfig, RunCriticOptions, type RunDiff, type RunDistillationOptions, type RunDistillationResult, RunFilter, RunRecord, RunScore, RunScoreWeights, RunTrace, SandboxDriver, SandboxHarnessResult, type SandboxJudgeKind, type SandboxJudgeResult, type SandboxJudgeSpec, type SandboxPool, type ScanOptions, Scenario, type ScenarioCost, ScenarioFile, ScenarioRegistry, ScenarioResult, type Scorecard, type ScorecardCell, type ScorecardCellDiff, type ScorecardDiff, type ScorecardEntry, type ScorecardLogLine, type ScoredTarget, type SelfPlayOptions, type SelfPlayProposer, type SelfPlayScorer, type SeriesConvergenceOptions, type SeriesConvergenceResult, Severity, type SignedManifest, type SignedManifestAlgo, type SingleBackendDivergence, SingleBackendError, type SingleBackendField, type SingleBackendReport, type Slo, type SloCheckResult, type SloComparator, type SloReport, type SloSeverity, type SlopCategory, type SlotFactory, type SplitGoldOptions, SteeringBundle, type SteeringOptimizationResult, type SteeringOptimizationRow, type SteeringOptimizationSelector, type SteeringOptimizerBackend, type SteeringOptimizerConfig, type StepAttribution, type SynthesisReason, type SynthesisTarget, TestResult, type ThresholdContract, TokenCounter, type TokenSpec, TraceEmitter, TraceStore, type TracedAnalystOptions, type TracedJudgeOptions, Trajectory, TrajectoryStep, type TrialTrace, TurnMetrics, UI_FINDING_SEVERITIES, UI_LENSES, UNIVERSAL_FINDERS, type UiFinding, type UiFindingScreenshot, type UiFindingSeverity, type UiLens, VerifyContext, type VisualDiffOptions, type VisualDiffResult, type ViteDeployRunnerInput, type WorkspaceAssertion, type WorkspaceAssertionResult, type WorkspaceInspector, type WorkspaceSnapshot, type WranglerDeployRunnerInput, adversarialJudge, aggregatePrReviewScore, analyzeAntiSlop, analyzeSeries, appendScorecard, assertCrossFamily, assertSingleBackend, attributeCounterfactuals, bisect, buildAgreementJudge, buildDriverSystemPrompt, buildReflectionPrompt, buildReviewerPrompt, canaryLeakView, canonicalize, causalAttribution, checkBehavioralCanary, checkCanaries, checkSlos, codeExecutionJudge, coherenceJudge, collectionPreserved, commentsForSource, commitBisect, compareReferenceReplay, compilerJudge, createAntiSlopJudge, createCustomJudge, createDefaultReviewer, createDomainExpertJudge, createIntentMatchJudge, createSandboxPool, crossTraceDiff, decideNextUserTurn, decideReferenceReplayPromotion, decideReferenceReplayRunPromotion, defaultJudges, defaultParseStudentLabel, defaultReferenceReplayMatcher, defaultRenderStudentPrompt, deployGateLayer, diffScorecard, discoverPersonas, distillPlaybook, estimateCost, estimateTokens, evaluateContract, evaluateHypothesis, evaluateOracles, executeScenario, expectAgent, exportRewardModel, extractAssetUrls, extractErrorCount, fieldAgreement, fileContains, fileExists, findAutoMatchNoExpectation, findConstructorCwdDropped, findFallbackToPass, findLiteralTruePass, findSkipCountsAsPass, flowLayer, formatBenchmarkReport, formatDriverReport, formatFindings, formatScorecardDiff, ghCliClient, precision as goldenPrecision, hashContent, hashJson, htmlContainsElement, httpGithubClient, inMemoryReferenceReplayStore, isModelPriced, isOtelConfigured, jsonShape, jsonlReferenceReplayStore, judgeFamily, keyPreserved, linterJudge, loadGoldScenarios, loadScorecard, loadScorerFromGrader, localCommandRunner, lowercaseMutator, matchGoldens, mergeLayerResults, multiToolchainLayer, notBlocked, paraphraseRobustness, paraphraseRobustnessScenarios, parseGoldJsonl, parseReflectionResponse, passOrthogonality, pixelDeltaRatio, politenessPrefixMutator, printDriverSummary, index as profile, promptBisect, proposeAutomatedPullRequest, proposeSynthesisTargets, recordRuns, recordRunsToScorecard, referenceReplayRunsToSteeringRows, referenceReplayScenarioToRunScore, regexMatches, renderMarkdownReport, renderPlaybookMarkdown, replayScorerOverCorpus, replayTraceThroughJudge, resetLockedAppendersForTesting, resolveModelPricing, rowCount, rowWhere, runAssertions, runBehavioralCanaries, runCanaries, runCounterfactual, runDistillation, runE2EWorkflow, runExpectations, runIntentMatchJudge, runJudgeFleet, runKeywordCoverageJudge, runKeywordCoverageJudgeUrl, runLiveProof, runReferenceReplay, runSelfPlay, scanForMuffledGates, scoreContinuity, scorePrReviewComments, scorePrReviewSource, scoreReferenceReplay, securityJudge, sentenceReorderMutator, signManifest, splitGold, statusAdvanced, summarizePrReviewBenchmark, testJudge, textInSnapshot, toLangfuseEnvelope, toPrometheusText, traceJudge, traceJudgeEnsemble, tracedAnalyzeTraces, typoMutator, urlContains, verifyManifest, visualDiff, viteDeployRunner, weightedRecall, whitespaceCollapseMutator, withJudgeRetry, withOtelPipeline, wranglerDeployRunner };
|
package/dist/index.js
CHANGED
|
@@ -42,7 +42,7 @@ import {
|
|
|
42
42
|
scoreRedTeamOutput,
|
|
43
43
|
surfaceContentHash,
|
|
44
44
|
toolNamesForRun
|
|
45
|
-
} from "./chunk-
|
|
45
|
+
} from "./chunk-RPLZ4OIB.js";
|
|
46
46
|
import {
|
|
47
47
|
BackendIntegrityError,
|
|
48
48
|
assertRealBackend,
|
|
@@ -114,7 +114,7 @@ import {
|
|
|
114
114
|
diffFindings,
|
|
115
115
|
resetLockedAppendersForTesting,
|
|
116
116
|
runSemanticConceptJudge
|
|
117
|
-
} from "./chunk-
|
|
117
|
+
} from "./chunk-5LVWPNS5.js";
|
|
118
118
|
import {
|
|
119
119
|
AnalystRegistry,
|
|
120
120
|
DEFAULT_TRACE_ANALYST_KINDS,
|
|
@@ -126,7 +126,7 @@ import {
|
|
|
126
126
|
createTraceAnalystKind,
|
|
127
127
|
makeFinding,
|
|
128
128
|
renderPriorFindings
|
|
129
|
-
} from "./chunk-
|
|
129
|
+
} from "./chunk-CF67I6QY.js";
|
|
130
130
|
import {
|
|
131
131
|
controlFailureClassFromVerification,
|
|
132
132
|
controlRunToRunRecord,
|
|
@@ -137,7 +137,7 @@ import {
|
|
|
137
137
|
runProposeReview,
|
|
138
138
|
runProposeReviewAsControlLoop,
|
|
139
139
|
scoreFromEvals
|
|
140
|
-
} from "./chunk-
|
|
140
|
+
} from "./chunk-RTWFUK6A.js";
|
|
141
141
|
import {
|
|
142
142
|
allCriticalPassed,
|
|
143
143
|
objectiveEval,
|
|
@@ -155,7 +155,7 @@ import {
|
|
|
155
155
|
} from "./chunk-B26KI423.js";
|
|
156
156
|
import {
|
|
157
157
|
runEvalCampaign
|
|
158
|
-
} from "./chunk-
|
|
158
|
+
} from "./chunk-XXNIODOM.js";
|
|
159
159
|
import {
|
|
160
160
|
LlmCallError,
|
|
161
161
|
LlmClient,
|
|
@@ -233,7 +233,7 @@ import {
|
|
|
233
233
|
scoreTraceInsightReadiness,
|
|
234
234
|
tokenizeDomainWords,
|
|
235
235
|
traceAnalystOnRunComplete
|
|
236
|
-
} from "./chunk-
|
|
236
|
+
} from "./chunk-XQL22JDG.js";
|
|
237
237
|
import {
|
|
238
238
|
DEFAULT_REDACTION_RULES,
|
|
239
239
|
REDACTION_VERSION,
|
|
@@ -312,7 +312,7 @@ import {
|
|
|
312
312
|
validateAgentProfileCell,
|
|
313
313
|
validateRunRecord,
|
|
314
314
|
verifyAgentProfileCell
|
|
315
|
-
} from "./chunk-
|
|
315
|
+
} from "./chunk-KWRRMR3J.js";
|
|
316
316
|
import {
|
|
317
317
|
TraceEmitter,
|
|
318
318
|
llmSpanFromProvider
|
|
@@ -4643,6 +4643,28 @@ function statusAdvanced(key, progression) {
|
|
|
4643
4643
|
};
|
|
4644
4644
|
}
|
|
4645
4645
|
|
|
4646
|
+
// src/ui-finding.ts
|
|
4647
|
+
var UI_LENSES = [
|
|
4648
|
+
"consistency",
|
|
4649
|
+
"hierarchy",
|
|
4650
|
+
"layout",
|
|
4651
|
+
"ux-flow",
|
|
4652
|
+
"duplication",
|
|
4653
|
+
"accessibility",
|
|
4654
|
+
"responsive",
|
|
4655
|
+
"states",
|
|
4656
|
+
"content",
|
|
4657
|
+
"interaction",
|
|
4658
|
+
"performance-perceived",
|
|
4659
|
+
"other"
|
|
4660
|
+
];
|
|
4661
|
+
var UI_FINDING_SEVERITIES = [
|
|
4662
|
+
"critical",
|
|
4663
|
+
"high",
|
|
4664
|
+
"med",
|
|
4665
|
+
"low"
|
|
4666
|
+
];
|
|
4667
|
+
|
|
4646
4668
|
// src/behavior-dsl.ts
|
|
4647
4669
|
var BehaviorAssertion = class {
|
|
4648
4670
|
constructor(store, runId) {
|
|
@@ -8680,6 +8702,8 @@ export {
|
|
|
8680
8702
|
TraceEmitter,
|
|
8681
8703
|
TraceFileMissingError,
|
|
8682
8704
|
TraceNotFoundError,
|
|
8705
|
+
UI_FINDING_SEVERITIES,
|
|
8706
|
+
UI_LENSES,
|
|
8683
8707
|
UNIVERSAL_FINDERS,
|
|
8684
8708
|
ValidationError,
|
|
8685
8709
|
VerificationError,
|
|
@@ -8768,6 +8792,7 @@ export {
|
|
|
8768
8792
|
corpusInterRaterAgreementFromJudgeScores,
|
|
8769
8793
|
createAnalystAi,
|
|
8770
8794
|
createAntiSlopJudge,
|
|
8795
|
+
createChatClient,
|
|
8771
8796
|
createCustomJudge,
|
|
8772
8797
|
createDefaultReviewer,
|
|
8773
8798
|
createDomainExpertJudge,
|