@oro-ai/sdk 1.0.4 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +200 -1
- package/dist/index.d.ts +200 -1
- package/dist/index.js +16 -0
- package/dist/index.mjs +14 -0
- package/package.json +1 -1
- package/src/generated/sdk.gen.ts +23 -1
- package/src/generated/types.gen.ts +201 -1
package/dist/index.d.mts
CHANGED
|
@@ -370,6 +370,23 @@ type AgentVersionPublic = {
|
|
|
370
370
|
*/
|
|
371
371
|
latest_final_score?: (number | null);
|
|
372
372
|
};
|
|
373
|
+
/**
|
|
374
|
+
* Per-validator score for an agent version.
|
|
375
|
+
*/
|
|
376
|
+
type AgentVersionScoreEntry = {
|
|
377
|
+
/**
|
|
378
|
+
* Validator hotkey
|
|
379
|
+
*/
|
|
380
|
+
validator_hotkey: string;
|
|
381
|
+
/**
|
|
382
|
+
* Score from this validator
|
|
383
|
+
*/
|
|
384
|
+
score: number;
|
|
385
|
+
/**
|
|
386
|
+
* Evaluation run ID
|
|
387
|
+
*/
|
|
388
|
+
run_id: string;
|
|
389
|
+
};
|
|
373
390
|
/**
|
|
374
391
|
* State of an agent version evaluation.
|
|
375
392
|
*/
|
|
@@ -441,6 +458,64 @@ type AgentVersionStatus = {
|
|
|
441
458
|
[key: string]: (number);
|
|
442
459
|
} | null);
|
|
443
460
|
};
|
|
461
|
+
/**
|
|
462
|
+
* Score variance across validators for a single agent version.
|
|
463
|
+
*/
|
|
464
|
+
type AgentVersionVariance = {
|
|
465
|
+
/**
|
|
466
|
+
* Agent version ID
|
|
467
|
+
*/
|
|
468
|
+
agent_version_id: string;
|
|
469
|
+
/**
|
|
470
|
+
* Agent name
|
|
471
|
+
*/
|
|
472
|
+
agent_name: string;
|
|
473
|
+
/**
|
|
474
|
+
* Miner hotkey
|
|
475
|
+
*/
|
|
476
|
+
miner_hotkey: string;
|
|
477
|
+
/**
|
|
478
|
+
* Number of validators that scored this version
|
|
479
|
+
*/
|
|
480
|
+
validator_count: number;
|
|
481
|
+
/**
|
|
482
|
+
* Mean score
|
|
483
|
+
*/
|
|
484
|
+
avg_score: number;
|
|
485
|
+
/**
|
|
486
|
+
* Minimum score
|
|
487
|
+
*/
|
|
488
|
+
min_score: number;
|
|
489
|
+
/**
|
|
490
|
+
* Maximum score
|
|
491
|
+
*/
|
|
492
|
+
max_score: number;
|
|
493
|
+
/**
|
|
494
|
+
* max_score - min_score
|
|
495
|
+
*/
|
|
496
|
+
spread: number;
|
|
497
|
+
/**
|
|
498
|
+
* True if spread exceeds threshold (default 10%)
|
|
499
|
+
*/
|
|
500
|
+
is_high_variance: boolean;
|
|
501
|
+
/**
|
|
502
|
+
* Individual validator scores
|
|
503
|
+
*/
|
|
504
|
+
per_validator: Array<AgentVersionScoreEntry>;
|
|
505
|
+
};
|
|
506
|
+
/**
|
|
507
|
+
* Response for agent version score variance analytics.
|
|
508
|
+
*/
|
|
509
|
+
type AgentVersionVarianceResponse = {
|
|
510
|
+
/**
|
|
511
|
+
* Per-version variance data
|
|
512
|
+
*/
|
|
513
|
+
agent_versions: Array<AgentVersionVariance>;
|
|
514
|
+
/**
|
|
515
|
+
* Spread threshold used for flagging
|
|
516
|
+
*/
|
|
517
|
+
variance_threshold: number;
|
|
518
|
+
};
|
|
444
519
|
/**
|
|
445
520
|
* 409 - Resource is already invalidated.
|
|
446
521
|
*/
|
|
@@ -918,6 +993,18 @@ type EvaluationRunDetail = {
|
|
|
918
993
|
* Validator-reported failure reason
|
|
919
994
|
*/
|
|
920
995
|
failure_reason?: (string | null);
|
|
996
|
+
/**
|
|
997
|
+
* Whether run is included in aggregate scoring
|
|
998
|
+
*/
|
|
999
|
+
is_included?: boolean;
|
|
1000
|
+
/**
|
|
1001
|
+
* When run was invalidated
|
|
1002
|
+
*/
|
|
1003
|
+
invalidated_at?: (string | null);
|
|
1004
|
+
/**
|
|
1005
|
+
* Reason for invalidation
|
|
1006
|
+
*/
|
|
1007
|
+
invalidation_reason?: (string | null);
|
|
921
1008
|
};
|
|
922
1009
|
/**
|
|
923
1010
|
* Public representation of an evaluation run.
|
|
@@ -2078,6 +2165,64 @@ type ValidatorPublic = {
|
|
|
2078
2165
|
*/
|
|
2079
2166
|
identity_description?: (string | null);
|
|
2080
2167
|
};
|
|
2168
|
+
/**
|
|
2169
|
+
* Response for validator scoring analytics.
|
|
2170
|
+
*/
|
|
2171
|
+
type ValidatorScoresResponse = {
|
|
2172
|
+
/**
|
|
2173
|
+
* Per-validator summaries
|
|
2174
|
+
*/
|
|
2175
|
+
validators: Array<ValidatorScoreSummary>;
|
|
2176
|
+
/**
|
|
2177
|
+
* Global average score across all validators
|
|
2178
|
+
*/
|
|
2179
|
+
global_avg_score: number;
|
|
2180
|
+
/**
|
|
2181
|
+
* Global standard deviation
|
|
2182
|
+
*/
|
|
2183
|
+
global_stddev: number;
|
|
2184
|
+
};
|
|
2185
|
+
/**
|
|
2186
|
+
* Aggregated scoring stats for a single validator.
|
|
2187
|
+
*/
|
|
2188
|
+
type ValidatorScoreSummary = {
|
|
2189
|
+
/**
|
|
2190
|
+
* Validator hotkey
|
|
2191
|
+
*/
|
|
2192
|
+
validator_hotkey: string;
|
|
2193
|
+
/**
|
|
2194
|
+
* Total completed runs
|
|
2195
|
+
*/
|
|
2196
|
+
total_runs: number;
|
|
2197
|
+
/**
|
|
2198
|
+
* Mean score across runs
|
|
2199
|
+
*/
|
|
2200
|
+
avg_score: number;
|
|
2201
|
+
/**
|
|
2202
|
+
* Median score
|
|
2203
|
+
*/
|
|
2204
|
+
median_score: number;
|
|
2205
|
+
/**
|
|
2206
|
+
* Standard deviation of scores
|
|
2207
|
+
*/
|
|
2208
|
+
stddev_score: number;
|
|
2209
|
+
/**
|
|
2210
|
+
* Minimum score
|
|
2211
|
+
*/
|
|
2212
|
+
min_score: number;
|
|
2213
|
+
/**
|
|
2214
|
+
* Maximum score
|
|
2215
|
+
*/
|
|
2216
|
+
max_score: number;
|
|
2217
|
+
/**
|
|
2218
|
+
* Percentage deviation from the global average (negative = below)
|
|
2219
|
+
*/
|
|
2220
|
+
deviation_from_global: number;
|
|
2221
|
+
/**
|
|
2222
|
+
* True if deviation exceeds 1.5 standard deviations from global mean
|
|
2223
|
+
*/
|
|
2224
|
+
is_outlier: boolean;
|
|
2225
|
+
};
|
|
2081
2226
|
/**
|
|
2082
2227
|
* Status of a validator.
|
|
2083
2228
|
*/
|
|
@@ -2614,6 +2759,50 @@ type ListEvaluationRunsData = {
|
|
|
2614
2759
|
};
|
|
2615
2760
|
type ListEvaluationRunsResponse = (AdminEvaluationRunsResponse);
|
|
2616
2761
|
type ListEvaluationRunsError = (HTTPValidationError);
|
|
2762
|
+
type GetValidatorScoresData = {
|
|
2763
|
+
query?: {
|
|
2764
|
+
/**
|
|
2765
|
+
* Only runs after this time
|
|
2766
|
+
*/
|
|
2767
|
+
since?: (string | null);
|
|
2768
|
+
/**
|
|
2769
|
+
* Suite ID (defaults to active suite)
|
|
2770
|
+
*/
|
|
2771
|
+
suite_id?: (number | null);
|
|
2772
|
+
/**
|
|
2773
|
+
* Only runs before this time
|
|
2774
|
+
*/
|
|
2775
|
+
until?: (string | null);
|
|
2776
|
+
};
|
|
2777
|
+
};
|
|
2778
|
+
type GetValidatorScoresResponse = (ValidatorScoresResponse);
|
|
2779
|
+
type GetValidatorScoresError = (HTTPValidationError);
|
|
2780
|
+
type GetAgentVersionVarianceData = {
|
|
2781
|
+
query?: {
|
|
2782
|
+
/**
|
|
2783
|
+
* Number of agent versions
|
|
2784
|
+
*/
|
|
2785
|
+
limit?: number;
|
|
2786
|
+
/**
|
|
2787
|
+
* Only versions after this time
|
|
2788
|
+
*/
|
|
2789
|
+
since?: (string | null);
|
|
2790
|
+
/**
|
|
2791
|
+
* Suite ID (defaults to active suite)
|
|
2792
|
+
*/
|
|
2793
|
+
suite_id?: (number | null);
|
|
2794
|
+
/**
|
|
2795
|
+
* Only versions before this time
|
|
2796
|
+
*/
|
|
2797
|
+
until?: (string | null);
|
|
2798
|
+
/**
|
|
2799
|
+
* Spread threshold for flagging
|
|
2800
|
+
*/
|
|
2801
|
+
variance_threshold?: number;
|
|
2802
|
+
};
|
|
2803
|
+
};
|
|
2804
|
+
type GetAgentVersionVarianceResponse = (AgentVersionVarianceResponse);
|
|
2805
|
+
type GetAgentVersionVarianceError = (HTTPValidationError);
|
|
2617
2806
|
|
|
2618
2807
|
declare const client: _hey_api_client_fetch.Client<Request, Response, unknown, _hey_api_client_fetch.RequestOptions<boolean, string>>;
|
|
2619
2808
|
/**
|
|
@@ -2886,6 +3075,16 @@ declare const listAgentVersions1: <ThrowOnError extends boolean = false>(options
|
|
|
2886
3075
|
* List evaluation runs with optional filtering and pagination.
|
|
2887
3076
|
*/
|
|
2888
3077
|
declare const listEvaluationRuns: <ThrowOnError extends boolean = false>(options?: OptionsLegacyParser<ListEvaluationRunsData, ThrowOnError>) => _hey_api_client_fetch.RequestResult<AdminEvaluationRunsResponse, HTTPValidationError, ThrowOnError>;
|
|
3078
|
+
/**
|
|
3079
|
+
* Aggregated scoring statistics per validator
|
|
3080
|
+
* Compute per-validator scoring statistics from completed runs.
|
|
3081
|
+
*/
|
|
3082
|
+
declare const getValidatorScores: <ThrowOnError extends boolean = false>(options?: OptionsLegacyParser<GetValidatorScoresData, ThrowOnError>) => _hey_api_client_fetch.RequestResult<ValidatorScoresResponse, HTTPValidationError, ThrowOnError>;
|
|
3083
|
+
/**
|
|
3084
|
+
* Score variance across validators for recent agent versions
|
|
3085
|
+
* Find agent versions with high score variance across validators.
|
|
3086
|
+
*/
|
|
3087
|
+
declare const getAgentVersionVariance: <ThrowOnError extends boolean = false>(options?: OptionsLegacyParser<GetAgentVersionVarianceData, ThrowOnError>) => _hey_api_client_fetch.RequestResult<AgentVersionVarianceResponse, HTTPValidationError, ThrowOnError>;
|
|
2889
3088
|
|
|
2890
3089
|
/**
|
|
2891
3090
|
* Auto-generated error code type.
|
|
@@ -3238,4 +3437,4 @@ declare class SessionAuthManager {
|
|
|
3238
3437
|
*/
|
|
3239
3438
|
declare function configureSessionAuth(baseUrl: string, config: SessionAuthConfig): SessionAuthManager;
|
|
3240
3439
|
|
|
3241
|
-
export { type ActivateSuiteData, type ActivateSuiteError, type ActivateSuiteResponse, type ActivateSuiteResponse2, type AdminAgentVersionEntry, type AdminAgentVersionsResponse, type AdminEvaluationRunEntry, type AdminEvaluationRunsResponse, type AdminMinerEntry, type AdminMinersResponse, type AdmissionReason, type AdmissionStatus, type AgentNotFoundError, type AgentPublic, type AgentVersionHistoryEntry, type AgentVersionNotFoundError, type AgentVersionProblemsResponse, type AgentVersionPublic, type AgentVersionState, type AgentVersionStatus, type AlreadyInvalidatedError, type ArtifactDownloadRequest, type ArtifactDownloadResponse, type ArtifactNotFoundError, type ArtifactNotReleasedError, type ArtifactReleaseState, type ArtifactType, type AtCapacityError, type AuditEventEntry, type AuditEventsResponse, type BanMinerData, type BanMinerError, type BanMinerResponse, type BanRequest, type BanResponse, type BanValidatorData, type BanValidatorError, type BanValidatorResponse, type BittensorAuthConfig, type Body_submit_agent, type CachedSession, type CancelAgentVersionData, type CancelAgentVersionError, type CancelAgentVersionResponse, type CancelRequest, type CancelResponse, type ChallengeRequest, type ChallengeResponse, type ChutesAuthStatusResponse, type ClaimWorkData, type ClaimWorkError, type ClaimWorkResponse, type ClaimWorkResponse2, type ClearMinerCooldownData, type ClearMinerCooldownError, type ClearMinerCooldownResponse, type CodeAnalysisError, type CompleteRunData, type CompleteRunError, type CompleteRunRequest, type CompleteRunResponse, type CompleteRunResponse2, type CooldownActiveError, type CreateSessionEndpointData, type CreateSessionEndpointError, type CreateSessionEndpointResponse, type CreateSuiteData, type CreateSuiteError, type CreateSuiteRequest, type CreateSuiteResponse, type CreateSuiteResponse2, type DiscardAgentVersionData, type DiscardAgentVersionError, type DiscardAgentVersionResponse, type DiscardRequest, type DiscardResponse, type ErrorCategory, type EvalRunNotFoundError, type EvaluationRunDetail, type EvaluationRunPublic, type EvaluationRunStatus, type EvaluationRunStatusPublic, type FileTooLargeError, type GetAgentVersionData, type GetAgentVersionError, type GetAgentVersionProblemsData, type GetAgentVersionProblemsError, type GetAgentVersionProblemsResponse, type GetAgentVersionResponse, type GetAgentVersionRunsData, type GetAgentVersionRunsError, type GetAgentVersionRunsResponse, type GetAgentVersionStatusData, type GetAgentVersionStatusError, type GetAgentVersionStatusResponse, type GetArtifactDownloadUrlData, type GetArtifactDownloadUrlError, type GetArtifactDownloadUrlResponse, type GetAuditEventsData, type GetAuditEventsError, type GetAuditEventsResponse, type GetChutesAuthStatusError, type GetChutesAuthStatusResponse, type GetCurrentSuiteError, type GetCurrentSuiteResponse, type GetEvaluationRunData, type GetEvaluationRunError, type GetEvaluationRunResponse, type GetLeaderboardData, type GetLeaderboardError, type GetLeaderboardResponse, type GetOwnedAgentVersionStatusData, type GetOwnedAgentVersionStatusError, type GetOwnedAgentVersionStatusResponse, type GetPendingEvaluationsData, type GetPendingEvaluationsError, type GetPendingEvaluationsResponse, type GetReaperStatsError, type GetReaperStatsResponse, type GetRunningEvaluationsError, type GetRunningEvaluationsResponse, type GetSuiteProblemsData, type GetSuiteProblemsError, type GetSuiteProblemsResponse, type GetTopAgentError, type GetTopAgentResponse, type GetValidatorsError, type GetValidatorsResponse, type HTTPValidationError, type HealthCheckError, type HealthCheckResponse, type HeartbeatData, type HeartbeatError, type HeartbeatRequest, type HeartbeatResponse, type HeartbeatResponse2, type InvalidAgentNameError, type InvalidArtifactTypeError, type InvalidFileError, type InvalidProblemIdError, type InvalidateEvaluationRunData, type InvalidateEvaluationRunError, type InvalidateEvaluationRunResponse, type InvalidateRunRequest, type JoinWaitlistData, type JoinWaitlistError, type JoinWaitlistResponse, type LeaderboardEntry, type LeaderboardResponse, type LeaseExpiredError, type ListAgentVersions1Data, type ListAgentVersions1Error, type ListAgentVersions1Response, type ListAgentVersionsData, type ListAgentVersionsError, type ListAgentVersionsResponse, type ListEvaluationRunsData, type ListEvaluationRunsError, type ListEvaluationRunsResponse, type ListMinerAgentsError, type ListMinerAgentsResponse, type ListMinersData, type ListMinersError, type ListMinersResponse, type LogoutData, type LogoutError, type LogoutResponse, type LogoutResponse2, type MinerAgentsResponse, type MinerNotFoundError, type MissingParameterError, type MissingScoreError, type NoActiveSuiteError, type NotRunOwnerError, type OroErrorCode, type PendingEvaluation, type PendingEvaluationSummary, type PendingEvaluationsResponse, type PresignUploadData, type PresignUploadError, type PresignUploadRequest, type PresignUploadResponse, type PresignUploadResponse2, type ProblemNotFoundError, type ProblemProgressEntry, type ProblemProgressUpdate, type ProblemPublic, type ProblemStatus, type ProgressUpdateRequest, type ProgressUpdateResponse, type RateLimitExceededError, type ReaperStatsResponse, type ReevaluateAgentVersionData, type ReevaluateAgentVersionError, type ReevaluateAgentVersionResponse, type ReevaluateRequest, type ReevaluateResponse, type ReinstateAgentVersionData, type ReinstateAgentVersionError, type ReinstateAgentVersionResponse, type ReinstateRequest, type RequestChallengeData, type RequestChallengeError, type RequestChallengeResponse, type RetryConfig, type RetryContext, type RunAlreadyCompleteError, type RunningEvaluation, type ScoreBelowThresholdError, type SessionAuthConfig, SessionAuthManager, type SessionInfo, type SessionRequest, type SessionResponse, type SetTopAgentData, type SetTopAgentError, type SetTopAgentResponse, type SetTopRequest, type SetTopResponse, type StoreChutesTokenData, type StoreChutesTokenError, type StoreChutesTokenRequest, type StoreChutesTokenResponse, type SubmitAgentData, type SubmitAgentError, type SubmitAgentResponse, type SubmitAgentResponse2, type SuiteNotFoundError, type SuitePublic, type SuiteWithProblemsResponse, type TerminalStatus, type TopAgentResponse, type UnbanMinerData, type UnbanMinerError, type UnbanMinerResponse, type UnbanValidatorData, type UnbanValidatorError, type UnbanValidatorResponse, type UpdateProgressData, type UpdateProgressError, type UpdateProgressResponse, type ValidationError, type ValidatorCurrentAgent, type ValidatorNotFoundError, type ValidatorProblemResult, type ValidatorPublic, type ValidatorStatus, type WaitlistSignupRequest, type WaitlistSignupResponse, type WorkItemStatus, activateSuite, banMiner, banValidator, cancelAgentVersion, claimWork, classifyError, classifyStatus, clearMinerCooldown, client, completeRun, computeDelay, configureBittensorAuth, configurePublicClient, configureSessionAuth, createRetryFetch, createSessionEndpoint, createSuite, discardAgentVersion, generateAuthHeaders, getAgentVersion, getAgentVersionProblems, getAgentVersionRuns, getAgentVersionStatus, getArtifactDownloadUrl, getAuditEvents, getChutesAuthStatus, getCurrentSuite, getErrorCode, getErrorDetail, getEvaluationRun, getLeaderboard, getOwnedAgentVersionStatus, getPendingEvaluations, getReaperStats, getRunningEvaluations, getSuiteProblems, getTopAgent, getValidators, hasDetail, hasErrorCode, healthCheck, heartbeat, invalidateEvaluationRun, isTransient, isTransientError, joinWaitlist, listAgentVersions, listAgentVersions1, listEvaluationRuns, listMinerAgents, listMiners, logout, parseRetryAfter, presignUpload, reevaluateAgentVersion, reinstateAgentVersion, requestChallenge, setTopAgent, storeChutesToken, submitAgent, unbanMiner, unbanValidator, updateProgress };
|
|
3440
|
+
export { type ActivateSuiteData, type ActivateSuiteError, type ActivateSuiteResponse, type ActivateSuiteResponse2, type AdminAgentVersionEntry, type AdminAgentVersionsResponse, type AdminEvaluationRunEntry, type AdminEvaluationRunsResponse, type AdminMinerEntry, type AdminMinersResponse, type AdmissionReason, type AdmissionStatus, type AgentNotFoundError, type AgentPublic, type AgentVersionHistoryEntry, type AgentVersionNotFoundError, type AgentVersionProblemsResponse, type AgentVersionPublic, type AgentVersionScoreEntry, type AgentVersionState, type AgentVersionStatus, type AgentVersionVariance, type AgentVersionVarianceResponse, type AlreadyInvalidatedError, type ArtifactDownloadRequest, type ArtifactDownloadResponse, type ArtifactNotFoundError, type ArtifactNotReleasedError, type ArtifactReleaseState, type ArtifactType, type AtCapacityError, type AuditEventEntry, type AuditEventsResponse, type BanMinerData, type BanMinerError, type BanMinerResponse, type BanRequest, type BanResponse, type BanValidatorData, type BanValidatorError, type BanValidatorResponse, type BittensorAuthConfig, type Body_submit_agent, type CachedSession, type CancelAgentVersionData, type CancelAgentVersionError, type CancelAgentVersionResponse, type CancelRequest, type CancelResponse, type ChallengeRequest, type ChallengeResponse, type ChutesAuthStatusResponse, type ClaimWorkData, type ClaimWorkError, type ClaimWorkResponse, type ClaimWorkResponse2, type ClearMinerCooldownData, type ClearMinerCooldownError, type ClearMinerCooldownResponse, type CodeAnalysisError, type CompleteRunData, type CompleteRunError, type CompleteRunRequest, type CompleteRunResponse, type CompleteRunResponse2, type CooldownActiveError, type CreateSessionEndpointData, type CreateSessionEndpointError, type CreateSessionEndpointResponse, type CreateSuiteData, type CreateSuiteError, type CreateSuiteRequest, type CreateSuiteResponse, type CreateSuiteResponse2, type DiscardAgentVersionData, type DiscardAgentVersionError, type DiscardAgentVersionResponse, type DiscardRequest, type DiscardResponse, type ErrorCategory, type EvalRunNotFoundError, type EvaluationRunDetail, type EvaluationRunPublic, type EvaluationRunStatus, type EvaluationRunStatusPublic, type FileTooLargeError, type GetAgentVersionData, type GetAgentVersionError, type GetAgentVersionProblemsData, type GetAgentVersionProblemsError, type GetAgentVersionProblemsResponse, type GetAgentVersionResponse, type GetAgentVersionRunsData, type GetAgentVersionRunsError, type GetAgentVersionRunsResponse, type GetAgentVersionStatusData, type GetAgentVersionStatusError, type GetAgentVersionStatusResponse, type GetAgentVersionVarianceData, type GetAgentVersionVarianceError, type GetAgentVersionVarianceResponse, type GetArtifactDownloadUrlData, type GetArtifactDownloadUrlError, type GetArtifactDownloadUrlResponse, type GetAuditEventsData, type GetAuditEventsError, type GetAuditEventsResponse, type GetChutesAuthStatusError, type GetChutesAuthStatusResponse, type GetCurrentSuiteError, type GetCurrentSuiteResponse, type GetEvaluationRunData, type GetEvaluationRunError, type GetEvaluationRunResponse, type GetLeaderboardData, type GetLeaderboardError, type GetLeaderboardResponse, type GetOwnedAgentVersionStatusData, type GetOwnedAgentVersionStatusError, type GetOwnedAgentVersionStatusResponse, type GetPendingEvaluationsData, type GetPendingEvaluationsError, type GetPendingEvaluationsResponse, type GetReaperStatsError, type GetReaperStatsResponse, type GetRunningEvaluationsError, type GetRunningEvaluationsResponse, type GetSuiteProblemsData, type GetSuiteProblemsError, type GetSuiteProblemsResponse, type GetTopAgentError, type GetTopAgentResponse, type GetValidatorScoresData, type GetValidatorScoresError, type GetValidatorScoresResponse, type GetValidatorsError, type GetValidatorsResponse, type HTTPValidationError, type HealthCheckError, type HealthCheckResponse, type HeartbeatData, type HeartbeatError, type HeartbeatRequest, type HeartbeatResponse, type HeartbeatResponse2, type InvalidAgentNameError, type InvalidArtifactTypeError, type InvalidFileError, type InvalidProblemIdError, type InvalidateEvaluationRunData, type InvalidateEvaluationRunError, type InvalidateEvaluationRunResponse, type InvalidateRunRequest, type JoinWaitlistData, type JoinWaitlistError, type JoinWaitlistResponse, type LeaderboardEntry, type LeaderboardResponse, type LeaseExpiredError, type ListAgentVersions1Data, type ListAgentVersions1Error, type ListAgentVersions1Response, type ListAgentVersionsData, type ListAgentVersionsError, type ListAgentVersionsResponse, type ListEvaluationRunsData, type ListEvaluationRunsError, type ListEvaluationRunsResponse, type ListMinerAgentsError, type ListMinerAgentsResponse, type ListMinersData, type ListMinersError, type ListMinersResponse, type LogoutData, type LogoutError, type LogoutResponse, type LogoutResponse2, type MinerAgentsResponse, type MinerNotFoundError, type MissingParameterError, type MissingScoreError, type NoActiveSuiteError, type NotRunOwnerError, type OroErrorCode, type PendingEvaluation, type PendingEvaluationSummary, type PendingEvaluationsResponse, type PresignUploadData, type PresignUploadError, type PresignUploadRequest, type PresignUploadResponse, type PresignUploadResponse2, type ProblemNotFoundError, type ProblemProgressEntry, type ProblemProgressUpdate, type ProblemPublic, type ProblemStatus, type ProgressUpdateRequest, type ProgressUpdateResponse, type RateLimitExceededError, type ReaperStatsResponse, type ReevaluateAgentVersionData, type ReevaluateAgentVersionError, type ReevaluateAgentVersionResponse, type ReevaluateRequest, type ReevaluateResponse, type ReinstateAgentVersionData, type ReinstateAgentVersionError, type ReinstateAgentVersionResponse, type ReinstateRequest, type RequestChallengeData, type RequestChallengeError, type RequestChallengeResponse, type RetryConfig, type RetryContext, type RunAlreadyCompleteError, type RunningEvaluation, type ScoreBelowThresholdError, type SessionAuthConfig, SessionAuthManager, type SessionInfo, type SessionRequest, type SessionResponse, type SetTopAgentData, type SetTopAgentError, type SetTopAgentResponse, type SetTopRequest, type SetTopResponse, type StoreChutesTokenData, type StoreChutesTokenError, type StoreChutesTokenRequest, type StoreChutesTokenResponse, type SubmitAgentData, type SubmitAgentError, type SubmitAgentResponse, type SubmitAgentResponse2, type SuiteNotFoundError, type SuitePublic, type SuiteWithProblemsResponse, type TerminalStatus, type TopAgentResponse, type UnbanMinerData, type UnbanMinerError, type UnbanMinerResponse, type UnbanValidatorData, type UnbanValidatorError, type UnbanValidatorResponse, type UpdateProgressData, type UpdateProgressError, type UpdateProgressResponse, type ValidationError, type ValidatorCurrentAgent, type ValidatorNotFoundError, type ValidatorProblemResult, type ValidatorPublic, type ValidatorScoreSummary, type ValidatorScoresResponse, type ValidatorStatus, type WaitlistSignupRequest, type WaitlistSignupResponse, type WorkItemStatus, activateSuite, banMiner, banValidator, cancelAgentVersion, claimWork, classifyError, classifyStatus, clearMinerCooldown, client, completeRun, computeDelay, configureBittensorAuth, configurePublicClient, configureSessionAuth, createRetryFetch, createSessionEndpoint, createSuite, discardAgentVersion, generateAuthHeaders, getAgentVersion, getAgentVersionProblems, getAgentVersionRuns, getAgentVersionStatus, getAgentVersionVariance, getArtifactDownloadUrl, getAuditEvents, getChutesAuthStatus, getCurrentSuite, getErrorCode, getErrorDetail, getEvaluationRun, getLeaderboard, getOwnedAgentVersionStatus, getPendingEvaluations, getReaperStats, getRunningEvaluations, getSuiteProblems, getTopAgent, getValidatorScores, getValidators, hasDetail, hasErrorCode, healthCheck, heartbeat, invalidateEvaluationRun, isTransient, isTransientError, joinWaitlist, listAgentVersions, listAgentVersions1, listEvaluationRuns, listMinerAgents, listMiners, logout, parseRetryAfter, presignUpload, reevaluateAgentVersion, reinstateAgentVersion, requestChallenge, setTopAgent, storeChutesToken, submitAgent, unbanMiner, unbanValidator, updateProgress };
|
package/dist/index.d.ts
CHANGED
|
@@ -370,6 +370,23 @@ type AgentVersionPublic = {
|
|
|
370
370
|
*/
|
|
371
371
|
latest_final_score?: (number | null);
|
|
372
372
|
};
|
|
373
|
+
/**
|
|
374
|
+
* Per-validator score for an agent version.
|
|
375
|
+
*/
|
|
376
|
+
type AgentVersionScoreEntry = {
|
|
377
|
+
/**
|
|
378
|
+
* Validator hotkey
|
|
379
|
+
*/
|
|
380
|
+
validator_hotkey: string;
|
|
381
|
+
/**
|
|
382
|
+
* Score from this validator
|
|
383
|
+
*/
|
|
384
|
+
score: number;
|
|
385
|
+
/**
|
|
386
|
+
* Evaluation run ID
|
|
387
|
+
*/
|
|
388
|
+
run_id: string;
|
|
389
|
+
};
|
|
373
390
|
/**
|
|
374
391
|
* State of an agent version evaluation.
|
|
375
392
|
*/
|
|
@@ -441,6 +458,64 @@ type AgentVersionStatus = {
|
|
|
441
458
|
[key: string]: (number);
|
|
442
459
|
} | null);
|
|
443
460
|
};
|
|
461
|
+
/**
|
|
462
|
+
* Score variance across validators for a single agent version.
|
|
463
|
+
*/
|
|
464
|
+
type AgentVersionVariance = {
|
|
465
|
+
/**
|
|
466
|
+
* Agent version ID
|
|
467
|
+
*/
|
|
468
|
+
agent_version_id: string;
|
|
469
|
+
/**
|
|
470
|
+
* Agent name
|
|
471
|
+
*/
|
|
472
|
+
agent_name: string;
|
|
473
|
+
/**
|
|
474
|
+
* Miner hotkey
|
|
475
|
+
*/
|
|
476
|
+
miner_hotkey: string;
|
|
477
|
+
/**
|
|
478
|
+
* Number of validators that scored this version
|
|
479
|
+
*/
|
|
480
|
+
validator_count: number;
|
|
481
|
+
/**
|
|
482
|
+
* Mean score
|
|
483
|
+
*/
|
|
484
|
+
avg_score: number;
|
|
485
|
+
/**
|
|
486
|
+
* Minimum score
|
|
487
|
+
*/
|
|
488
|
+
min_score: number;
|
|
489
|
+
/**
|
|
490
|
+
* Maximum score
|
|
491
|
+
*/
|
|
492
|
+
max_score: number;
|
|
493
|
+
/**
|
|
494
|
+
* max_score - min_score
|
|
495
|
+
*/
|
|
496
|
+
spread: number;
|
|
497
|
+
/**
|
|
498
|
+
* True if spread exceeds threshold (default 10%)
|
|
499
|
+
*/
|
|
500
|
+
is_high_variance: boolean;
|
|
501
|
+
/**
|
|
502
|
+
* Individual validator scores
|
|
503
|
+
*/
|
|
504
|
+
per_validator: Array<AgentVersionScoreEntry>;
|
|
505
|
+
};
|
|
506
|
+
/**
|
|
507
|
+
* Response for agent version score variance analytics.
|
|
508
|
+
*/
|
|
509
|
+
type AgentVersionVarianceResponse = {
|
|
510
|
+
/**
|
|
511
|
+
* Per-version variance data
|
|
512
|
+
*/
|
|
513
|
+
agent_versions: Array<AgentVersionVariance>;
|
|
514
|
+
/**
|
|
515
|
+
* Spread threshold used for flagging
|
|
516
|
+
*/
|
|
517
|
+
variance_threshold: number;
|
|
518
|
+
};
|
|
444
519
|
/**
|
|
445
520
|
* 409 - Resource is already invalidated.
|
|
446
521
|
*/
|
|
@@ -918,6 +993,18 @@ type EvaluationRunDetail = {
|
|
|
918
993
|
* Validator-reported failure reason
|
|
919
994
|
*/
|
|
920
995
|
failure_reason?: (string | null);
|
|
996
|
+
/**
|
|
997
|
+
* Whether run is included in aggregate scoring
|
|
998
|
+
*/
|
|
999
|
+
is_included?: boolean;
|
|
1000
|
+
/**
|
|
1001
|
+
* When run was invalidated
|
|
1002
|
+
*/
|
|
1003
|
+
invalidated_at?: (string | null);
|
|
1004
|
+
/**
|
|
1005
|
+
* Reason for invalidation
|
|
1006
|
+
*/
|
|
1007
|
+
invalidation_reason?: (string | null);
|
|
921
1008
|
};
|
|
922
1009
|
/**
|
|
923
1010
|
* Public representation of an evaluation run.
|
|
@@ -2078,6 +2165,64 @@ type ValidatorPublic = {
|
|
|
2078
2165
|
*/
|
|
2079
2166
|
identity_description?: (string | null);
|
|
2080
2167
|
};
|
|
2168
|
+
/**
|
|
2169
|
+
* Response for validator scoring analytics.
|
|
2170
|
+
*/
|
|
2171
|
+
type ValidatorScoresResponse = {
|
|
2172
|
+
/**
|
|
2173
|
+
* Per-validator summaries
|
|
2174
|
+
*/
|
|
2175
|
+
validators: Array<ValidatorScoreSummary>;
|
|
2176
|
+
/**
|
|
2177
|
+
* Global average score across all validators
|
|
2178
|
+
*/
|
|
2179
|
+
global_avg_score: number;
|
|
2180
|
+
/**
|
|
2181
|
+
* Global standard deviation
|
|
2182
|
+
*/
|
|
2183
|
+
global_stddev: number;
|
|
2184
|
+
};
|
|
2185
|
+
/**
|
|
2186
|
+
* Aggregated scoring stats for a single validator.
|
|
2187
|
+
*/
|
|
2188
|
+
type ValidatorScoreSummary = {
|
|
2189
|
+
/**
|
|
2190
|
+
* Validator hotkey
|
|
2191
|
+
*/
|
|
2192
|
+
validator_hotkey: string;
|
|
2193
|
+
/**
|
|
2194
|
+
* Total completed runs
|
|
2195
|
+
*/
|
|
2196
|
+
total_runs: number;
|
|
2197
|
+
/**
|
|
2198
|
+
* Mean score across runs
|
|
2199
|
+
*/
|
|
2200
|
+
avg_score: number;
|
|
2201
|
+
/**
|
|
2202
|
+
* Median score
|
|
2203
|
+
*/
|
|
2204
|
+
median_score: number;
|
|
2205
|
+
/**
|
|
2206
|
+
* Standard deviation of scores
|
|
2207
|
+
*/
|
|
2208
|
+
stddev_score: number;
|
|
2209
|
+
/**
|
|
2210
|
+
* Minimum score
|
|
2211
|
+
*/
|
|
2212
|
+
min_score: number;
|
|
2213
|
+
/**
|
|
2214
|
+
* Maximum score
|
|
2215
|
+
*/
|
|
2216
|
+
max_score: number;
|
|
2217
|
+
/**
|
|
2218
|
+
* Percentage deviation from the global average (negative = below)
|
|
2219
|
+
*/
|
|
2220
|
+
deviation_from_global: number;
|
|
2221
|
+
/**
|
|
2222
|
+
* True if deviation exceeds 1.5 standard deviations from global mean
|
|
2223
|
+
*/
|
|
2224
|
+
is_outlier: boolean;
|
|
2225
|
+
};
|
|
2081
2226
|
/**
|
|
2082
2227
|
* Status of a validator.
|
|
2083
2228
|
*/
|
|
@@ -2614,6 +2759,50 @@ type ListEvaluationRunsData = {
|
|
|
2614
2759
|
};
|
|
2615
2760
|
type ListEvaluationRunsResponse = (AdminEvaluationRunsResponse);
|
|
2616
2761
|
type ListEvaluationRunsError = (HTTPValidationError);
|
|
2762
|
+
type GetValidatorScoresData = {
|
|
2763
|
+
query?: {
|
|
2764
|
+
/**
|
|
2765
|
+
* Only runs after this time
|
|
2766
|
+
*/
|
|
2767
|
+
since?: (string | null);
|
|
2768
|
+
/**
|
|
2769
|
+
* Suite ID (defaults to active suite)
|
|
2770
|
+
*/
|
|
2771
|
+
suite_id?: (number | null);
|
|
2772
|
+
/**
|
|
2773
|
+
* Only runs before this time
|
|
2774
|
+
*/
|
|
2775
|
+
until?: (string | null);
|
|
2776
|
+
};
|
|
2777
|
+
};
|
|
2778
|
+
type GetValidatorScoresResponse = (ValidatorScoresResponse);
|
|
2779
|
+
type GetValidatorScoresError = (HTTPValidationError);
|
|
2780
|
+
type GetAgentVersionVarianceData = {
|
|
2781
|
+
query?: {
|
|
2782
|
+
/**
|
|
2783
|
+
* Number of agent versions
|
|
2784
|
+
*/
|
|
2785
|
+
limit?: number;
|
|
2786
|
+
/**
|
|
2787
|
+
* Only versions after this time
|
|
2788
|
+
*/
|
|
2789
|
+
since?: (string | null);
|
|
2790
|
+
/**
|
|
2791
|
+
* Suite ID (defaults to active suite)
|
|
2792
|
+
*/
|
|
2793
|
+
suite_id?: (number | null);
|
|
2794
|
+
/**
|
|
2795
|
+
* Only versions before this time
|
|
2796
|
+
*/
|
|
2797
|
+
until?: (string | null);
|
|
2798
|
+
/**
|
|
2799
|
+
* Spread threshold for flagging
|
|
2800
|
+
*/
|
|
2801
|
+
variance_threshold?: number;
|
|
2802
|
+
};
|
|
2803
|
+
};
|
|
2804
|
+
type GetAgentVersionVarianceResponse = (AgentVersionVarianceResponse);
|
|
2805
|
+
type GetAgentVersionVarianceError = (HTTPValidationError);
|
|
2617
2806
|
|
|
2618
2807
|
declare const client: _hey_api_client_fetch.Client<Request, Response, unknown, _hey_api_client_fetch.RequestOptions<boolean, string>>;
|
|
2619
2808
|
/**
|
|
@@ -2886,6 +3075,16 @@ declare const listAgentVersions1: <ThrowOnError extends boolean = false>(options
|
|
|
2886
3075
|
* List evaluation runs with optional filtering and pagination.
|
|
2887
3076
|
*/
|
|
2888
3077
|
declare const listEvaluationRuns: <ThrowOnError extends boolean = false>(options?: OptionsLegacyParser<ListEvaluationRunsData, ThrowOnError>) => _hey_api_client_fetch.RequestResult<AdminEvaluationRunsResponse, HTTPValidationError, ThrowOnError>;
|
|
3078
|
+
/**
|
|
3079
|
+
* Aggregated scoring statistics per validator
|
|
3080
|
+
* Compute per-validator scoring statistics from completed runs.
|
|
3081
|
+
*/
|
|
3082
|
+
declare const getValidatorScores: <ThrowOnError extends boolean = false>(options?: OptionsLegacyParser<GetValidatorScoresData, ThrowOnError>) => _hey_api_client_fetch.RequestResult<ValidatorScoresResponse, HTTPValidationError, ThrowOnError>;
|
|
3083
|
+
/**
|
|
3084
|
+
* Score variance across validators for recent agent versions
|
|
3085
|
+
* Find agent versions with high score variance across validators.
|
|
3086
|
+
*/
|
|
3087
|
+
declare const getAgentVersionVariance: <ThrowOnError extends boolean = false>(options?: OptionsLegacyParser<GetAgentVersionVarianceData, ThrowOnError>) => _hey_api_client_fetch.RequestResult<AgentVersionVarianceResponse, HTTPValidationError, ThrowOnError>;
|
|
2889
3088
|
|
|
2890
3089
|
/**
|
|
2891
3090
|
* Auto-generated error code type.
|
|
@@ -3238,4 +3437,4 @@ declare class SessionAuthManager {
|
|
|
3238
3437
|
*/
|
|
3239
3438
|
declare function configureSessionAuth(baseUrl: string, config: SessionAuthConfig): SessionAuthManager;
|
|
3240
3439
|
|
|
3241
|
-
export { type ActivateSuiteData, type ActivateSuiteError, type ActivateSuiteResponse, type ActivateSuiteResponse2, type AdminAgentVersionEntry, type AdminAgentVersionsResponse, type AdminEvaluationRunEntry, type AdminEvaluationRunsResponse, type AdminMinerEntry, type AdminMinersResponse, type AdmissionReason, type AdmissionStatus, type AgentNotFoundError, type AgentPublic, type AgentVersionHistoryEntry, type AgentVersionNotFoundError, type AgentVersionProblemsResponse, type AgentVersionPublic, type AgentVersionState, type AgentVersionStatus, type AlreadyInvalidatedError, type ArtifactDownloadRequest, type ArtifactDownloadResponse, type ArtifactNotFoundError, type ArtifactNotReleasedError, type ArtifactReleaseState, type ArtifactType, type AtCapacityError, type AuditEventEntry, type AuditEventsResponse, type BanMinerData, type BanMinerError, type BanMinerResponse, type BanRequest, type BanResponse, type BanValidatorData, type BanValidatorError, type BanValidatorResponse, type BittensorAuthConfig, type Body_submit_agent, type CachedSession, type CancelAgentVersionData, type CancelAgentVersionError, type CancelAgentVersionResponse, type CancelRequest, type CancelResponse, type ChallengeRequest, type ChallengeResponse, type ChutesAuthStatusResponse, type ClaimWorkData, type ClaimWorkError, type ClaimWorkResponse, type ClaimWorkResponse2, type ClearMinerCooldownData, type ClearMinerCooldownError, type ClearMinerCooldownResponse, type CodeAnalysisError, type CompleteRunData, type CompleteRunError, type CompleteRunRequest, type CompleteRunResponse, type CompleteRunResponse2, type CooldownActiveError, type CreateSessionEndpointData, type CreateSessionEndpointError, type CreateSessionEndpointResponse, type CreateSuiteData, type CreateSuiteError, type CreateSuiteRequest, type CreateSuiteResponse, type CreateSuiteResponse2, type DiscardAgentVersionData, type DiscardAgentVersionError, type DiscardAgentVersionResponse, type DiscardRequest, type DiscardResponse, type ErrorCategory, type EvalRunNotFoundError, type EvaluationRunDetail, type EvaluationRunPublic, type EvaluationRunStatus, type EvaluationRunStatusPublic, type FileTooLargeError, type GetAgentVersionData, type GetAgentVersionError, type GetAgentVersionProblemsData, type GetAgentVersionProblemsError, type GetAgentVersionProblemsResponse, type GetAgentVersionResponse, type GetAgentVersionRunsData, type GetAgentVersionRunsError, type GetAgentVersionRunsResponse, type GetAgentVersionStatusData, type GetAgentVersionStatusError, type GetAgentVersionStatusResponse, type GetArtifactDownloadUrlData, type GetArtifactDownloadUrlError, type GetArtifactDownloadUrlResponse, type GetAuditEventsData, type GetAuditEventsError, type GetAuditEventsResponse, type GetChutesAuthStatusError, type GetChutesAuthStatusResponse, type GetCurrentSuiteError, type GetCurrentSuiteResponse, type GetEvaluationRunData, type GetEvaluationRunError, type GetEvaluationRunResponse, type GetLeaderboardData, type GetLeaderboardError, type GetLeaderboardResponse, type GetOwnedAgentVersionStatusData, type GetOwnedAgentVersionStatusError, type GetOwnedAgentVersionStatusResponse, type GetPendingEvaluationsData, type GetPendingEvaluationsError, type GetPendingEvaluationsResponse, type GetReaperStatsError, type GetReaperStatsResponse, type GetRunningEvaluationsError, type GetRunningEvaluationsResponse, type GetSuiteProblemsData, type GetSuiteProblemsError, type GetSuiteProblemsResponse, type GetTopAgentError, type GetTopAgentResponse, type GetValidatorsError, type GetValidatorsResponse, type HTTPValidationError, type HealthCheckError, type HealthCheckResponse, type HeartbeatData, type HeartbeatError, type HeartbeatRequest, type HeartbeatResponse, type HeartbeatResponse2, type InvalidAgentNameError, type InvalidArtifactTypeError, type InvalidFileError, type InvalidProblemIdError, type InvalidateEvaluationRunData, type InvalidateEvaluationRunError, type InvalidateEvaluationRunResponse, type InvalidateRunRequest, type JoinWaitlistData, type JoinWaitlistError, type JoinWaitlistResponse, type LeaderboardEntry, type LeaderboardResponse, type LeaseExpiredError, type ListAgentVersions1Data, type ListAgentVersions1Error, type ListAgentVersions1Response, type ListAgentVersionsData, type ListAgentVersionsError, type ListAgentVersionsResponse, type ListEvaluationRunsData, type ListEvaluationRunsError, type ListEvaluationRunsResponse, type ListMinerAgentsError, type ListMinerAgentsResponse, type ListMinersData, type ListMinersError, type ListMinersResponse, type LogoutData, type LogoutError, type LogoutResponse, type LogoutResponse2, type MinerAgentsResponse, type MinerNotFoundError, type MissingParameterError, type MissingScoreError, type NoActiveSuiteError, type NotRunOwnerError, type OroErrorCode, type PendingEvaluation, type PendingEvaluationSummary, type PendingEvaluationsResponse, type PresignUploadData, type PresignUploadError, type PresignUploadRequest, type PresignUploadResponse, type PresignUploadResponse2, type ProblemNotFoundError, type ProblemProgressEntry, type ProblemProgressUpdate, type ProblemPublic, type ProblemStatus, type ProgressUpdateRequest, type ProgressUpdateResponse, type RateLimitExceededError, type ReaperStatsResponse, type ReevaluateAgentVersionData, type ReevaluateAgentVersionError, type ReevaluateAgentVersionResponse, type ReevaluateRequest, type ReevaluateResponse, type ReinstateAgentVersionData, type ReinstateAgentVersionError, type ReinstateAgentVersionResponse, type ReinstateRequest, type RequestChallengeData, type RequestChallengeError, type RequestChallengeResponse, type RetryConfig, type RetryContext, type RunAlreadyCompleteError, type RunningEvaluation, type ScoreBelowThresholdError, type SessionAuthConfig, SessionAuthManager, type SessionInfo, type SessionRequest, type SessionResponse, type SetTopAgentData, type SetTopAgentError, type SetTopAgentResponse, type SetTopRequest, type SetTopResponse, type StoreChutesTokenData, type StoreChutesTokenError, type StoreChutesTokenRequest, type StoreChutesTokenResponse, type SubmitAgentData, type SubmitAgentError, type SubmitAgentResponse, type SubmitAgentResponse2, type SuiteNotFoundError, type SuitePublic, type SuiteWithProblemsResponse, type TerminalStatus, type TopAgentResponse, type UnbanMinerData, type UnbanMinerError, type UnbanMinerResponse, type UnbanValidatorData, type UnbanValidatorError, type UnbanValidatorResponse, type UpdateProgressData, type UpdateProgressError, type UpdateProgressResponse, type ValidationError, type ValidatorCurrentAgent, type ValidatorNotFoundError, type ValidatorProblemResult, type ValidatorPublic, type ValidatorStatus, type WaitlistSignupRequest, type WaitlistSignupResponse, type WorkItemStatus, activateSuite, banMiner, banValidator, cancelAgentVersion, claimWork, classifyError, classifyStatus, clearMinerCooldown, client, completeRun, computeDelay, configureBittensorAuth, configurePublicClient, configureSessionAuth, createRetryFetch, createSessionEndpoint, createSuite, discardAgentVersion, generateAuthHeaders, getAgentVersion, getAgentVersionProblems, getAgentVersionRuns, getAgentVersionStatus, getArtifactDownloadUrl, getAuditEvents, getChutesAuthStatus, getCurrentSuite, getErrorCode, getErrorDetail, getEvaluationRun, getLeaderboard, getOwnedAgentVersionStatus, getPendingEvaluations, getReaperStats, getRunningEvaluations, getSuiteProblems, getTopAgent, getValidators, hasDetail, hasErrorCode, healthCheck, heartbeat, invalidateEvaluationRun, isTransient, isTransientError, joinWaitlist, listAgentVersions, listAgentVersions1, listEvaluationRuns, listMinerAgents, listMiners, logout, parseRetryAfter, presignUpload, reevaluateAgentVersion, reinstateAgentVersion, requestChallenge, setTopAgent, storeChutesToken, submitAgent, unbanMiner, unbanValidator, updateProgress };
|
|
3440
|
+
export { type ActivateSuiteData, type ActivateSuiteError, type ActivateSuiteResponse, type ActivateSuiteResponse2, type AdminAgentVersionEntry, type AdminAgentVersionsResponse, type AdminEvaluationRunEntry, type AdminEvaluationRunsResponse, type AdminMinerEntry, type AdminMinersResponse, type AdmissionReason, type AdmissionStatus, type AgentNotFoundError, type AgentPublic, type AgentVersionHistoryEntry, type AgentVersionNotFoundError, type AgentVersionProblemsResponse, type AgentVersionPublic, type AgentVersionScoreEntry, type AgentVersionState, type AgentVersionStatus, type AgentVersionVariance, type AgentVersionVarianceResponse, type AlreadyInvalidatedError, type ArtifactDownloadRequest, type ArtifactDownloadResponse, type ArtifactNotFoundError, type ArtifactNotReleasedError, type ArtifactReleaseState, type ArtifactType, type AtCapacityError, type AuditEventEntry, type AuditEventsResponse, type BanMinerData, type BanMinerError, type BanMinerResponse, type BanRequest, type BanResponse, type BanValidatorData, type BanValidatorError, type BanValidatorResponse, type BittensorAuthConfig, type Body_submit_agent, type CachedSession, type CancelAgentVersionData, type CancelAgentVersionError, type CancelAgentVersionResponse, type CancelRequest, type CancelResponse, type ChallengeRequest, type ChallengeResponse, type ChutesAuthStatusResponse, type ClaimWorkData, type ClaimWorkError, type ClaimWorkResponse, type ClaimWorkResponse2, type ClearMinerCooldownData, type ClearMinerCooldownError, type ClearMinerCooldownResponse, type CodeAnalysisError, type CompleteRunData, type CompleteRunError, type CompleteRunRequest, type CompleteRunResponse, type CompleteRunResponse2, type CooldownActiveError, type CreateSessionEndpointData, type CreateSessionEndpointError, type CreateSessionEndpointResponse, type CreateSuiteData, type CreateSuiteError, type CreateSuiteRequest, type CreateSuiteResponse, type CreateSuiteResponse2, type DiscardAgentVersionData, type DiscardAgentVersionError, type DiscardAgentVersionResponse, type DiscardRequest, type DiscardResponse, type ErrorCategory, type EvalRunNotFoundError, type EvaluationRunDetail, type EvaluationRunPublic, type EvaluationRunStatus, type EvaluationRunStatusPublic, type FileTooLargeError, type GetAgentVersionData, type GetAgentVersionError, type GetAgentVersionProblemsData, type GetAgentVersionProblemsError, type GetAgentVersionProblemsResponse, type GetAgentVersionResponse, type GetAgentVersionRunsData, type GetAgentVersionRunsError, type GetAgentVersionRunsResponse, type GetAgentVersionStatusData, type GetAgentVersionStatusError, type GetAgentVersionStatusResponse, type GetAgentVersionVarianceData, type GetAgentVersionVarianceError, type GetAgentVersionVarianceResponse, type GetArtifactDownloadUrlData, type GetArtifactDownloadUrlError, type GetArtifactDownloadUrlResponse, type GetAuditEventsData, type GetAuditEventsError, type GetAuditEventsResponse, type GetChutesAuthStatusError, type GetChutesAuthStatusResponse, type GetCurrentSuiteError, type GetCurrentSuiteResponse, type GetEvaluationRunData, type GetEvaluationRunError, type GetEvaluationRunResponse, type GetLeaderboardData, type GetLeaderboardError, type GetLeaderboardResponse, type GetOwnedAgentVersionStatusData, type GetOwnedAgentVersionStatusError, type GetOwnedAgentVersionStatusResponse, type GetPendingEvaluationsData, type GetPendingEvaluationsError, type GetPendingEvaluationsResponse, type GetReaperStatsError, type GetReaperStatsResponse, type GetRunningEvaluationsError, type GetRunningEvaluationsResponse, type GetSuiteProblemsData, type GetSuiteProblemsError, type GetSuiteProblemsResponse, type GetTopAgentError, type GetTopAgentResponse, type GetValidatorScoresData, type GetValidatorScoresError, type GetValidatorScoresResponse, type GetValidatorsError, type GetValidatorsResponse, type HTTPValidationError, type HealthCheckError, type HealthCheckResponse, type HeartbeatData, type HeartbeatError, type HeartbeatRequest, type HeartbeatResponse, type HeartbeatResponse2, type InvalidAgentNameError, type InvalidArtifactTypeError, type InvalidFileError, type InvalidProblemIdError, type InvalidateEvaluationRunData, type InvalidateEvaluationRunError, type InvalidateEvaluationRunResponse, type InvalidateRunRequest, type JoinWaitlistData, type JoinWaitlistError, type JoinWaitlistResponse, type LeaderboardEntry, type LeaderboardResponse, type LeaseExpiredError, type ListAgentVersions1Data, type ListAgentVersions1Error, type ListAgentVersions1Response, type ListAgentVersionsData, type ListAgentVersionsError, type ListAgentVersionsResponse, type ListEvaluationRunsData, type ListEvaluationRunsError, type ListEvaluationRunsResponse, type ListMinerAgentsError, type ListMinerAgentsResponse, type ListMinersData, type ListMinersError, type ListMinersResponse, type LogoutData, type LogoutError, type LogoutResponse, type LogoutResponse2, type MinerAgentsResponse, type MinerNotFoundError, type MissingParameterError, type MissingScoreError, type NoActiveSuiteError, type NotRunOwnerError, type OroErrorCode, type PendingEvaluation, type PendingEvaluationSummary, type PendingEvaluationsResponse, type PresignUploadData, type PresignUploadError, type PresignUploadRequest, type PresignUploadResponse, type PresignUploadResponse2, type ProblemNotFoundError, type ProblemProgressEntry, type ProblemProgressUpdate, type ProblemPublic, type ProblemStatus, type ProgressUpdateRequest, type ProgressUpdateResponse, type RateLimitExceededError, type ReaperStatsResponse, type ReevaluateAgentVersionData, type ReevaluateAgentVersionError, type ReevaluateAgentVersionResponse, type ReevaluateRequest, type ReevaluateResponse, type ReinstateAgentVersionData, type ReinstateAgentVersionError, type ReinstateAgentVersionResponse, type ReinstateRequest, type RequestChallengeData, type RequestChallengeError, type RequestChallengeResponse, type RetryConfig, type RetryContext, type RunAlreadyCompleteError, type RunningEvaluation, type ScoreBelowThresholdError, type SessionAuthConfig, SessionAuthManager, type SessionInfo, type SessionRequest, type SessionResponse, type SetTopAgentData, type SetTopAgentError, type SetTopAgentResponse, type SetTopRequest, type SetTopResponse, type StoreChutesTokenData, type StoreChutesTokenError, type StoreChutesTokenRequest, type StoreChutesTokenResponse, type SubmitAgentData, type SubmitAgentError, type SubmitAgentResponse, type SubmitAgentResponse2, type SuiteNotFoundError, type SuitePublic, type SuiteWithProblemsResponse, type TerminalStatus, type TopAgentResponse, type UnbanMinerData, type UnbanMinerError, type UnbanMinerResponse, type UnbanValidatorData, type UnbanValidatorError, type UnbanValidatorResponse, type UpdateProgressData, type UpdateProgressError, type UpdateProgressResponse, type ValidationError, type ValidatorCurrentAgent, type ValidatorNotFoundError, type ValidatorProblemResult, type ValidatorPublic, type ValidatorScoreSummary, type ValidatorScoresResponse, type ValidatorStatus, type WaitlistSignupRequest, type WaitlistSignupResponse, type WorkItemStatus, activateSuite, banMiner, banValidator, cancelAgentVersion, claimWork, classifyError, classifyStatus, clearMinerCooldown, client, completeRun, computeDelay, configureBittensorAuth, configurePublicClient, configureSessionAuth, createRetryFetch, createSessionEndpoint, createSuite, discardAgentVersion, generateAuthHeaders, getAgentVersion, getAgentVersionProblems, getAgentVersionRuns, getAgentVersionStatus, getAgentVersionVariance, getArtifactDownloadUrl, getAuditEvents, getChutesAuthStatus, getCurrentSuite, getErrorCode, getErrorDetail, getEvaluationRun, getLeaderboard, getOwnedAgentVersionStatus, getPendingEvaluations, getReaperStats, getRunningEvaluations, getSuiteProblems, getTopAgent, getValidatorScores, getValidators, hasDetail, hasErrorCode, healthCheck, heartbeat, invalidateEvaluationRun, isTransient, isTransientError, joinWaitlist, listAgentVersions, listAgentVersions1, listEvaluationRuns, listMinerAgents, listMiners, logout, parseRetryAfter, presignUpload, reevaluateAgentVersion, reinstateAgentVersion, requestChallenge, setTopAgent, storeChutesToken, submitAgent, unbanMiner, unbanValidator, updateProgress };
|
package/dist/index.js
CHANGED
|
@@ -54,6 +54,7 @@ __export(index_exports, {
|
|
|
54
54
|
getAgentVersionProblems: () => getAgentVersionProblems,
|
|
55
55
|
getAgentVersionRuns: () => getAgentVersionRuns,
|
|
56
56
|
getAgentVersionStatus: () => getAgentVersionStatus,
|
|
57
|
+
getAgentVersionVariance: () => getAgentVersionVariance,
|
|
57
58
|
getArtifactDownloadUrl: () => getArtifactDownloadUrl,
|
|
58
59
|
getAuditEvents: () => getAuditEvents,
|
|
59
60
|
getChutesAuthStatus: () => getChutesAuthStatus,
|
|
@@ -68,6 +69,7 @@ __export(index_exports, {
|
|
|
68
69
|
getRunningEvaluations: () => getRunningEvaluations,
|
|
69
70
|
getSuiteProblems: () => getSuiteProblems,
|
|
70
71
|
getTopAgent: () => getTopAgent,
|
|
72
|
+
getValidatorScores: () => getValidatorScores,
|
|
71
73
|
getValidators: () => getValidators,
|
|
72
74
|
hasDetail: () => hasDetail,
|
|
73
75
|
hasErrorCode: () => hasErrorCode,
|
|
@@ -387,6 +389,18 @@ var listEvaluationRuns = (options) => {
|
|
|
387
389
|
url: "/v1/admin/evaluation-runs"
|
|
388
390
|
});
|
|
389
391
|
};
|
|
392
|
+
var getValidatorScores = (options) => {
|
|
393
|
+
return (options?.client ?? client).get({
|
|
394
|
+
...options,
|
|
395
|
+
url: "/v1/admin/analytics/validator-scores"
|
|
396
|
+
});
|
|
397
|
+
};
|
|
398
|
+
var getAgentVersionVariance = (options) => {
|
|
399
|
+
return (options?.client ?? client).get({
|
|
400
|
+
...options,
|
|
401
|
+
url: "/v1/admin/analytics/agent-version-variance"
|
|
402
|
+
});
|
|
403
|
+
};
|
|
390
404
|
|
|
391
405
|
// src/errors.ts
|
|
392
406
|
function classifyStatus(status) {
|
|
@@ -773,6 +787,7 @@ function configureSessionAuth(baseUrl, config) {
|
|
|
773
787
|
getAgentVersionProblems,
|
|
774
788
|
getAgentVersionRuns,
|
|
775
789
|
getAgentVersionStatus,
|
|
790
|
+
getAgentVersionVariance,
|
|
776
791
|
getArtifactDownloadUrl,
|
|
777
792
|
getAuditEvents,
|
|
778
793
|
getChutesAuthStatus,
|
|
@@ -787,6 +802,7 @@ function configureSessionAuth(baseUrl, config) {
|
|
|
787
802
|
getRunningEvaluations,
|
|
788
803
|
getSuiteProblems,
|
|
789
804
|
getTopAgent,
|
|
805
|
+
getValidatorScores,
|
|
790
806
|
getValidators,
|
|
791
807
|
hasDetail,
|
|
792
808
|
hasErrorCode,
|
package/dist/index.mjs
CHANGED
|
@@ -288,6 +288,18 @@ var listEvaluationRuns = (options) => {
|
|
|
288
288
|
url: "/v1/admin/evaluation-runs"
|
|
289
289
|
});
|
|
290
290
|
};
|
|
291
|
+
var getValidatorScores = (options) => {
|
|
292
|
+
return (options?.client ?? client).get({
|
|
293
|
+
...options,
|
|
294
|
+
url: "/v1/admin/analytics/validator-scores"
|
|
295
|
+
});
|
|
296
|
+
};
|
|
297
|
+
var getAgentVersionVariance = (options) => {
|
|
298
|
+
return (options?.client ?? client).get({
|
|
299
|
+
...options,
|
|
300
|
+
url: "/v1/admin/analytics/agent-version-variance"
|
|
301
|
+
});
|
|
302
|
+
};
|
|
291
303
|
|
|
292
304
|
// src/errors.ts
|
|
293
305
|
function classifyStatus(status) {
|
|
@@ -673,6 +685,7 @@ export {
|
|
|
673
685
|
getAgentVersionProblems,
|
|
674
686
|
getAgentVersionRuns,
|
|
675
687
|
getAgentVersionStatus,
|
|
688
|
+
getAgentVersionVariance,
|
|
676
689
|
getArtifactDownloadUrl,
|
|
677
690
|
getAuditEvents,
|
|
678
691
|
getChutesAuthStatus,
|
|
@@ -687,6 +700,7 @@ export {
|
|
|
687
700
|
getRunningEvaluations,
|
|
688
701
|
getSuiteProblems,
|
|
689
702
|
getTopAgent,
|
|
703
|
+
getValidatorScores,
|
|
690
704
|
getValidators,
|
|
691
705
|
hasDetail,
|
|
692
706
|
hasErrorCode,
|
package/package.json
CHANGED
package/src/generated/sdk.gen.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
// This file is auto-generated by @hey-api/openapi-ts
|
|
2
2
|
|
|
3
3
|
import { createClient, createConfig, type OptionsLegacyParser, formDataBodySerializer } from '@hey-api/client-fetch';
|
|
4
|
-
import type { HealthCheckError, HealthCheckResponse, GetCurrentSuiteError, GetCurrentSuiteResponse, GetSuiteProblemsData, GetSuiteProblemsError, GetSuiteProblemsResponse, GetLeaderboardData, GetLeaderboardError, GetLeaderboardResponse, GetTopAgentError, GetTopAgentResponse, GetAgentVersionStatusData, GetAgentVersionStatusError, GetAgentVersionStatusResponse, GetAgentVersionRunsData, GetAgentVersionRunsError, GetAgentVersionRunsResponse, GetAgentVersionProblemsData, GetAgentVersionProblemsError, GetAgentVersionProblemsResponse, GetAgentVersionData, GetAgentVersionError, GetAgentVersionResponse, GetArtifactDownloadUrlData, GetArtifactDownloadUrlError, GetArtifactDownloadUrlResponse, GetEvaluationRunData, GetEvaluationRunError, GetEvaluationRunResponse, GetValidatorsError, GetValidatorsResponse, GetRunningEvaluationsError, GetRunningEvaluationsResponse, GetPendingEvaluationsData, GetPendingEvaluationsError, GetPendingEvaluationsResponse, JoinWaitlistData, JoinWaitlistError, JoinWaitlistResponse, RequestChallengeData, RequestChallengeError, RequestChallengeResponse, CreateSessionEndpointData, CreateSessionEndpointError, CreateSessionEndpointResponse, LogoutData, LogoutError, LogoutResponse2, SubmitAgentData, SubmitAgentError, SubmitAgentResponse2, StoreChutesTokenData, StoreChutesTokenError, StoreChutesTokenResponse, GetChutesAuthStatusError, GetChutesAuthStatusResponse, ListMinerAgentsError, ListMinerAgentsResponse, ListAgentVersionsData, ListAgentVersionsError, ListAgentVersionsResponse, GetOwnedAgentVersionStatusData, GetOwnedAgentVersionStatusError, GetOwnedAgentVersionStatusResponse, ClaimWorkData, ClaimWorkError, ClaimWorkResponse2, HeartbeatData, HeartbeatError, HeartbeatResponse2, UpdateProgressData, UpdateProgressError, UpdateProgressResponse, PresignUploadData, PresignUploadError, PresignUploadResponse2, CompleteRunData, CompleteRunError, CompleteRunResponse2, BanMinerData, BanMinerError, BanMinerResponse, UnbanMinerData, UnbanMinerError, UnbanMinerResponse, BanValidatorData, BanValidatorError, BanValidatorResponse, UnbanValidatorData, UnbanValidatorError, UnbanValidatorResponse, DiscardAgentVersionData, DiscardAgentVersionError, DiscardAgentVersionResponse, ReinstateAgentVersionData, ReinstateAgentVersionError, ReinstateAgentVersionResponse, SetTopAgentData, SetTopAgentError, SetTopAgentResponse, InvalidateEvaluationRunData, InvalidateEvaluationRunError, InvalidateEvaluationRunResponse, ReevaluateAgentVersionData, ReevaluateAgentVersionError, ReevaluateAgentVersionResponse, CancelAgentVersionData, CancelAgentVersionError, CancelAgentVersionResponse, CreateSuiteData, CreateSuiteError, CreateSuiteResponse2, ActivateSuiteData, ActivateSuiteError, ActivateSuiteResponse2, GetAuditEventsData, GetAuditEventsError, GetAuditEventsResponse, GetReaperStatsError, GetReaperStatsResponse, ClearMinerCooldownData, ClearMinerCooldownError, ClearMinerCooldownResponse, ListMinersData, ListMinersError, ListMinersResponse, ListAgentVersions1Data, ListAgentVersions1Error, ListAgentVersions1Response, ListEvaluationRunsData, ListEvaluationRunsError, ListEvaluationRunsResponse } from './types.gen';
|
|
4
|
+
import type { HealthCheckError, HealthCheckResponse, GetCurrentSuiteError, GetCurrentSuiteResponse, GetSuiteProblemsData, GetSuiteProblemsError, GetSuiteProblemsResponse, GetLeaderboardData, GetLeaderboardError, GetLeaderboardResponse, GetTopAgentError, GetTopAgentResponse, GetAgentVersionStatusData, GetAgentVersionStatusError, GetAgentVersionStatusResponse, GetAgentVersionRunsData, GetAgentVersionRunsError, GetAgentVersionRunsResponse, GetAgentVersionProblemsData, GetAgentVersionProblemsError, GetAgentVersionProblemsResponse, GetAgentVersionData, GetAgentVersionError, GetAgentVersionResponse, GetArtifactDownloadUrlData, GetArtifactDownloadUrlError, GetArtifactDownloadUrlResponse, GetEvaluationRunData, GetEvaluationRunError, GetEvaluationRunResponse, GetValidatorsError, GetValidatorsResponse, GetRunningEvaluationsError, GetRunningEvaluationsResponse, GetPendingEvaluationsData, GetPendingEvaluationsError, GetPendingEvaluationsResponse, JoinWaitlistData, JoinWaitlistError, JoinWaitlistResponse, RequestChallengeData, RequestChallengeError, RequestChallengeResponse, CreateSessionEndpointData, CreateSessionEndpointError, CreateSessionEndpointResponse, LogoutData, LogoutError, LogoutResponse2, SubmitAgentData, SubmitAgentError, SubmitAgentResponse2, StoreChutesTokenData, StoreChutesTokenError, StoreChutesTokenResponse, GetChutesAuthStatusError, GetChutesAuthStatusResponse, ListMinerAgentsError, ListMinerAgentsResponse, ListAgentVersionsData, ListAgentVersionsError, ListAgentVersionsResponse, GetOwnedAgentVersionStatusData, GetOwnedAgentVersionStatusError, GetOwnedAgentVersionStatusResponse, ClaimWorkData, ClaimWorkError, ClaimWorkResponse2, HeartbeatData, HeartbeatError, HeartbeatResponse2, UpdateProgressData, UpdateProgressError, UpdateProgressResponse, PresignUploadData, PresignUploadError, PresignUploadResponse2, CompleteRunData, CompleteRunError, CompleteRunResponse2, BanMinerData, BanMinerError, BanMinerResponse, UnbanMinerData, UnbanMinerError, UnbanMinerResponse, BanValidatorData, BanValidatorError, BanValidatorResponse, UnbanValidatorData, UnbanValidatorError, UnbanValidatorResponse, DiscardAgentVersionData, DiscardAgentVersionError, DiscardAgentVersionResponse, ReinstateAgentVersionData, ReinstateAgentVersionError, ReinstateAgentVersionResponse, SetTopAgentData, SetTopAgentError, SetTopAgentResponse, InvalidateEvaluationRunData, InvalidateEvaluationRunError, InvalidateEvaluationRunResponse, ReevaluateAgentVersionData, ReevaluateAgentVersionError, ReevaluateAgentVersionResponse, CancelAgentVersionData, CancelAgentVersionError, CancelAgentVersionResponse, CreateSuiteData, CreateSuiteError, CreateSuiteResponse2, ActivateSuiteData, ActivateSuiteError, ActivateSuiteResponse2, GetAuditEventsData, GetAuditEventsError, GetAuditEventsResponse, GetReaperStatsError, GetReaperStatsResponse, ClearMinerCooldownData, ClearMinerCooldownError, ClearMinerCooldownResponse, ListMinersData, ListMinersError, ListMinersResponse, ListAgentVersions1Data, ListAgentVersions1Error, ListAgentVersions1Response, ListEvaluationRunsData, ListEvaluationRunsError, ListEvaluationRunsResponse, GetValidatorScoresData, GetValidatorScoresError, GetValidatorScoresResponse, GetAgentVersionVarianceData, GetAgentVersionVarianceError, GetAgentVersionVarianceResponse } from './types.gen';
|
|
5
5
|
|
|
6
6
|
export const client = createClient(createConfig());
|
|
7
7
|
|
|
@@ -560,4 +560,26 @@ export const listEvaluationRuns = <ThrowOnError extends boolean = false>(options
|
|
|
560
560
|
...options,
|
|
561
561
|
url: '/v1/admin/evaluation-runs'
|
|
562
562
|
});
|
|
563
|
+
};
|
|
564
|
+
|
|
565
|
+
/**
|
|
566
|
+
* Aggregated scoring statistics per validator
|
|
567
|
+
* Compute per-validator scoring statistics from completed runs.
|
|
568
|
+
*/
|
|
569
|
+
export const getValidatorScores = <ThrowOnError extends boolean = false>(options?: OptionsLegacyParser<GetValidatorScoresData, ThrowOnError>) => {
|
|
570
|
+
return (options?.client ?? client).get<GetValidatorScoresResponse, GetValidatorScoresError, ThrowOnError>({
|
|
571
|
+
...options,
|
|
572
|
+
url: '/v1/admin/analytics/validator-scores'
|
|
573
|
+
});
|
|
574
|
+
};
|
|
575
|
+
|
|
576
|
+
/**
|
|
577
|
+
* Score variance across validators for recent agent versions
|
|
578
|
+
* Find agent versions with high score variance across validators.
|
|
579
|
+
*/
|
|
580
|
+
export const getAgentVersionVariance = <ThrowOnError extends boolean = false>(options?: OptionsLegacyParser<GetAgentVersionVarianceData, ThrowOnError>) => {
|
|
581
|
+
return (options?.client ?? client).get<GetAgentVersionVarianceResponse, GetAgentVersionVarianceError, ThrowOnError>({
|
|
582
|
+
...options,
|
|
583
|
+
url: '/v1/admin/analytics/agent-version-variance'
|
|
584
|
+
});
|
|
563
585
|
};
|
|
@@ -384,6 +384,24 @@ export type AgentVersionPublic = {
|
|
|
384
384
|
latest_final_score?: (number | null);
|
|
385
385
|
};
|
|
386
386
|
|
|
387
|
+
/**
|
|
388
|
+
* Per-validator score for an agent version.
|
|
389
|
+
*/
|
|
390
|
+
export type AgentVersionScoreEntry = {
|
|
391
|
+
/**
|
|
392
|
+
* Validator hotkey
|
|
393
|
+
*/
|
|
394
|
+
validator_hotkey: string;
|
|
395
|
+
/**
|
|
396
|
+
* Score from this validator
|
|
397
|
+
*/
|
|
398
|
+
score: number;
|
|
399
|
+
/**
|
|
400
|
+
* Evaluation run ID
|
|
401
|
+
*/
|
|
402
|
+
run_id: string;
|
|
403
|
+
};
|
|
404
|
+
|
|
387
405
|
/**
|
|
388
406
|
* State of an agent version evaluation.
|
|
389
407
|
*/
|
|
@@ -457,6 +475,66 @@ export type AgentVersionStatus = {
|
|
|
457
475
|
} | null);
|
|
458
476
|
};
|
|
459
477
|
|
|
478
|
+
/**
|
|
479
|
+
* Score variance across validators for a single agent version.
|
|
480
|
+
*/
|
|
481
|
+
export type AgentVersionVariance = {
|
|
482
|
+
/**
|
|
483
|
+
* Agent version ID
|
|
484
|
+
*/
|
|
485
|
+
agent_version_id: string;
|
|
486
|
+
/**
|
|
487
|
+
* Agent name
|
|
488
|
+
*/
|
|
489
|
+
agent_name: string;
|
|
490
|
+
/**
|
|
491
|
+
* Miner hotkey
|
|
492
|
+
*/
|
|
493
|
+
miner_hotkey: string;
|
|
494
|
+
/**
|
|
495
|
+
* Number of validators that scored this version
|
|
496
|
+
*/
|
|
497
|
+
validator_count: number;
|
|
498
|
+
/**
|
|
499
|
+
* Mean score
|
|
500
|
+
*/
|
|
501
|
+
avg_score: number;
|
|
502
|
+
/**
|
|
503
|
+
* Minimum score
|
|
504
|
+
*/
|
|
505
|
+
min_score: number;
|
|
506
|
+
/**
|
|
507
|
+
* Maximum score
|
|
508
|
+
*/
|
|
509
|
+
max_score: number;
|
|
510
|
+
/**
|
|
511
|
+
* max_score - min_score
|
|
512
|
+
*/
|
|
513
|
+
spread: number;
|
|
514
|
+
/**
|
|
515
|
+
* True if spread exceeds threshold (default 10%)
|
|
516
|
+
*/
|
|
517
|
+
is_high_variance: boolean;
|
|
518
|
+
/**
|
|
519
|
+
* Individual validator scores
|
|
520
|
+
*/
|
|
521
|
+
per_validator: Array<AgentVersionScoreEntry>;
|
|
522
|
+
};
|
|
523
|
+
|
|
524
|
+
/**
|
|
525
|
+
* Response for agent version score variance analytics.
|
|
526
|
+
*/
|
|
527
|
+
export type AgentVersionVarianceResponse = {
|
|
528
|
+
/**
|
|
529
|
+
* Per-version variance data
|
|
530
|
+
*/
|
|
531
|
+
agent_versions: Array<AgentVersionVariance>;
|
|
532
|
+
/**
|
|
533
|
+
* Spread threshold used for flagging
|
|
534
|
+
*/
|
|
535
|
+
variance_threshold: number;
|
|
536
|
+
};
|
|
537
|
+
|
|
460
538
|
/**
|
|
461
539
|
* 409 - Resource is already invalidated.
|
|
462
540
|
*/
|
|
@@ -962,6 +1040,18 @@ export type EvaluationRunDetail = {
|
|
|
962
1040
|
* Validator-reported failure reason
|
|
963
1041
|
*/
|
|
964
1042
|
failure_reason?: (string | null);
|
|
1043
|
+
/**
|
|
1044
|
+
* Whether run is included in aggregate scoring
|
|
1045
|
+
*/
|
|
1046
|
+
is_included?: boolean;
|
|
1047
|
+
/**
|
|
1048
|
+
* When run was invalidated
|
|
1049
|
+
*/
|
|
1050
|
+
invalidated_at?: (string | null);
|
|
1051
|
+
/**
|
|
1052
|
+
* Reason for invalidation
|
|
1053
|
+
*/
|
|
1054
|
+
invalidation_reason?: (string | null);
|
|
965
1055
|
};
|
|
966
1056
|
|
|
967
1057
|
/**
|
|
@@ -2181,6 +2271,66 @@ export type ValidatorPublic = {
|
|
|
2181
2271
|
identity_description?: (string | null);
|
|
2182
2272
|
};
|
|
2183
2273
|
|
|
2274
|
+
/**
|
|
2275
|
+
* Response for validator scoring analytics.
|
|
2276
|
+
*/
|
|
2277
|
+
export type ValidatorScoresResponse = {
|
|
2278
|
+
/**
|
|
2279
|
+
* Per-validator summaries
|
|
2280
|
+
*/
|
|
2281
|
+
validators: Array<ValidatorScoreSummary>;
|
|
2282
|
+
/**
|
|
2283
|
+
* Global average score across all validators
|
|
2284
|
+
*/
|
|
2285
|
+
global_avg_score: number;
|
|
2286
|
+
/**
|
|
2287
|
+
* Global standard deviation
|
|
2288
|
+
*/
|
|
2289
|
+
global_stddev: number;
|
|
2290
|
+
};
|
|
2291
|
+
|
|
2292
|
+
/**
|
|
2293
|
+
* Aggregated scoring stats for a single validator.
|
|
2294
|
+
*/
|
|
2295
|
+
export type ValidatorScoreSummary = {
|
|
2296
|
+
/**
|
|
2297
|
+
* Validator hotkey
|
|
2298
|
+
*/
|
|
2299
|
+
validator_hotkey: string;
|
|
2300
|
+
/**
|
|
2301
|
+
* Total completed runs
|
|
2302
|
+
*/
|
|
2303
|
+
total_runs: number;
|
|
2304
|
+
/**
|
|
2305
|
+
* Mean score across runs
|
|
2306
|
+
*/
|
|
2307
|
+
avg_score: number;
|
|
2308
|
+
/**
|
|
2309
|
+
* Median score
|
|
2310
|
+
*/
|
|
2311
|
+
median_score: number;
|
|
2312
|
+
/**
|
|
2313
|
+
* Standard deviation of scores
|
|
2314
|
+
*/
|
|
2315
|
+
stddev_score: number;
|
|
2316
|
+
/**
|
|
2317
|
+
* Minimum score
|
|
2318
|
+
*/
|
|
2319
|
+
min_score: number;
|
|
2320
|
+
/**
|
|
2321
|
+
* Maximum score
|
|
2322
|
+
*/
|
|
2323
|
+
max_score: number;
|
|
2324
|
+
/**
|
|
2325
|
+
* Percentage deviation from the global average (negative = below)
|
|
2326
|
+
*/
|
|
2327
|
+
deviation_from_global: number;
|
|
2328
|
+
/**
|
|
2329
|
+
* True if deviation exceeds 1.5 standard deviations from global mean
|
|
2330
|
+
*/
|
|
2331
|
+
is_outlier: boolean;
|
|
2332
|
+
};
|
|
2333
|
+
|
|
2184
2334
|
/**
|
|
2185
2335
|
* Status of a validator.
|
|
2186
2336
|
*/
|
|
@@ -2852,4 +3002,54 @@ export type ListEvaluationRunsData = {
|
|
|
2852
3002
|
|
|
2853
3003
|
export type ListEvaluationRunsResponse = (AdminEvaluationRunsResponse);
|
|
2854
3004
|
|
|
2855
|
-
export type ListEvaluationRunsError = (HTTPValidationError);
|
|
3005
|
+
export type ListEvaluationRunsError = (HTTPValidationError);
|
|
3006
|
+
|
|
3007
|
+
export type GetValidatorScoresData = {
|
|
3008
|
+
query?: {
|
|
3009
|
+
/**
|
|
3010
|
+
* Only runs after this time
|
|
3011
|
+
*/
|
|
3012
|
+
since?: (string | null);
|
|
3013
|
+
/**
|
|
3014
|
+
* Suite ID (defaults to active suite)
|
|
3015
|
+
*/
|
|
3016
|
+
suite_id?: (number | null);
|
|
3017
|
+
/**
|
|
3018
|
+
* Only runs before this time
|
|
3019
|
+
*/
|
|
3020
|
+
until?: (string | null);
|
|
3021
|
+
};
|
|
3022
|
+
};
|
|
3023
|
+
|
|
3024
|
+
export type GetValidatorScoresResponse = (ValidatorScoresResponse);
|
|
3025
|
+
|
|
3026
|
+
export type GetValidatorScoresError = (HTTPValidationError);
|
|
3027
|
+
|
|
3028
|
+
export type GetAgentVersionVarianceData = {
|
|
3029
|
+
query?: {
|
|
3030
|
+
/**
|
|
3031
|
+
* Number of agent versions
|
|
3032
|
+
*/
|
|
3033
|
+
limit?: number;
|
|
3034
|
+
/**
|
|
3035
|
+
* Only versions after this time
|
|
3036
|
+
*/
|
|
3037
|
+
since?: (string | null);
|
|
3038
|
+
/**
|
|
3039
|
+
* Suite ID (defaults to active suite)
|
|
3040
|
+
*/
|
|
3041
|
+
suite_id?: (number | null);
|
|
3042
|
+
/**
|
|
3043
|
+
* Only versions before this time
|
|
3044
|
+
*/
|
|
3045
|
+
until?: (string | null);
|
|
3046
|
+
/**
|
|
3047
|
+
* Spread threshold for flagging
|
|
3048
|
+
*/
|
|
3049
|
+
variance_threshold?: number;
|
|
3050
|
+
};
|
|
3051
|
+
};
|
|
3052
|
+
|
|
3053
|
+
export type GetAgentVersionVarianceResponse = (AgentVersionVarianceResponse);
|
|
3054
|
+
|
|
3055
|
+
export type GetAgentVersionVarianceError = (HTTPValidationError);
|