@oro-ai/sdk 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -370,6 +370,23 @@ type AgentVersionPublic = {
370
370
  */
371
371
  latest_final_score?: (number | null);
372
372
  };
373
+ /**
374
+ * Per-validator score for an agent version.
375
+ */
376
+ type AgentVersionScoreEntry = {
377
+ /**
378
+ * Validator hotkey
379
+ */
380
+ validator_hotkey: string;
381
+ /**
382
+ * Score from this validator
383
+ */
384
+ score: number;
385
+ /**
386
+ * Evaluation run ID
387
+ */
388
+ run_id: string;
389
+ };
373
390
  /**
374
391
  * State of an agent version evaluation.
375
392
  */
@@ -441,6 +458,64 @@ type AgentVersionStatus = {
441
458
  [key: string]: (number);
442
459
  } | null);
443
460
  };
461
+ /**
462
+ * Score variance across validators for a single agent version.
463
+ */
464
+ type AgentVersionVariance = {
465
+ /**
466
+ * Agent version ID
467
+ */
468
+ agent_version_id: string;
469
+ /**
470
+ * Agent name
471
+ */
472
+ agent_name: string;
473
+ /**
474
+ * Miner hotkey
475
+ */
476
+ miner_hotkey: string;
477
+ /**
478
+ * Number of validators that scored this version
479
+ */
480
+ validator_count: number;
481
+ /**
482
+ * Mean score
483
+ */
484
+ avg_score: number;
485
+ /**
486
+ * Minimum score
487
+ */
488
+ min_score: number;
489
+ /**
490
+ * Maximum score
491
+ */
492
+ max_score: number;
493
+ /**
494
+ * max_score - min_score
495
+ */
496
+ spread: number;
497
+ /**
498
+ * True if spread exceeds threshold (default 10%)
499
+ */
500
+ is_high_variance: boolean;
501
+ /**
502
+ * Individual validator scores
503
+ */
504
+ per_validator: Array<AgentVersionScoreEntry>;
505
+ };
506
+ /**
507
+ * Response for agent version score variance analytics.
508
+ */
509
+ type AgentVersionVarianceResponse = {
510
+ /**
511
+ * Per-version variance data
512
+ */
513
+ agent_versions: Array<AgentVersionVariance>;
514
+ /**
515
+ * Spread threshold used for flagging
516
+ */
517
+ variance_threshold: number;
518
+ };
444
519
  /**
445
520
  * 409 - Resource is already invalidated.
446
521
  */
@@ -918,6 +993,18 @@ type EvaluationRunDetail = {
918
993
  * Validator-reported failure reason
919
994
  */
920
995
  failure_reason?: (string | null);
996
+ /**
997
+ * Whether run is included in aggregate scoring
998
+ */
999
+ is_included?: boolean;
1000
+ /**
1001
+ * When run was invalidated
1002
+ */
1003
+ invalidated_at?: (string | null);
1004
+ /**
1005
+ * Reason for invalidation
1006
+ */
1007
+ invalidation_reason?: (string | null);
921
1008
  };
922
1009
  /**
923
1010
  * Public representation of an evaluation run.
@@ -2078,6 +2165,64 @@ type ValidatorPublic = {
2078
2165
  */
2079
2166
  identity_description?: (string | null);
2080
2167
  };
2168
+ /**
2169
+ * Response for validator scoring analytics.
2170
+ */
2171
+ type ValidatorScoresResponse = {
2172
+ /**
2173
+ * Per-validator summaries
2174
+ */
2175
+ validators: Array<ValidatorScoreSummary>;
2176
+ /**
2177
+ * Global average score across all validators
2178
+ */
2179
+ global_avg_score: number;
2180
+ /**
2181
+ * Global standard deviation
2182
+ */
2183
+ global_stddev: number;
2184
+ };
2185
+ /**
2186
+ * Aggregated scoring stats for a single validator.
2187
+ */
2188
+ type ValidatorScoreSummary = {
2189
+ /**
2190
+ * Validator hotkey
2191
+ */
2192
+ validator_hotkey: string;
2193
+ /**
2194
+ * Total completed runs
2195
+ */
2196
+ total_runs: number;
2197
+ /**
2198
+ * Mean score across runs
2199
+ */
2200
+ avg_score: number;
2201
+ /**
2202
+ * Median score
2203
+ */
2204
+ median_score: number;
2205
+ /**
2206
+ * Standard deviation of scores
2207
+ */
2208
+ stddev_score: number;
2209
+ /**
2210
+ * Minimum score
2211
+ */
2212
+ min_score: number;
2213
+ /**
2214
+ * Maximum score
2215
+ */
2216
+ max_score: number;
2217
+ /**
2218
+ * Percentage deviation from the global average (negative = below)
2219
+ */
2220
+ deviation_from_global: number;
2221
+ /**
2222
+ * True if deviation exceeds 1.5 standard deviations from global mean
2223
+ */
2224
+ is_outlier: boolean;
2225
+ };
2081
2226
  /**
2082
2227
  * Status of a validator.
2083
2228
  */
@@ -2614,6 +2759,50 @@ type ListEvaluationRunsData = {
2614
2759
  };
2615
2760
  type ListEvaluationRunsResponse = (AdminEvaluationRunsResponse);
2616
2761
  type ListEvaluationRunsError = (HTTPValidationError);
2762
+ type GetValidatorScoresData = {
2763
+ query?: {
2764
+ /**
2765
+ * Only runs after this time
2766
+ */
2767
+ since?: (string | null);
2768
+ /**
2769
+ * Suite ID (defaults to active suite)
2770
+ */
2771
+ suite_id?: (number | null);
2772
+ /**
2773
+ * Only runs before this time
2774
+ */
2775
+ until?: (string | null);
2776
+ };
2777
+ };
2778
+ type GetValidatorScoresResponse = (ValidatorScoresResponse);
2779
+ type GetValidatorScoresError = (HTTPValidationError);
2780
+ type GetAgentVersionVarianceData = {
2781
+ query?: {
2782
+ /**
2783
+ * Number of agent versions
2784
+ */
2785
+ limit?: number;
2786
+ /**
2787
+ * Only versions after this time
2788
+ */
2789
+ since?: (string | null);
2790
+ /**
2791
+ * Suite ID (defaults to active suite)
2792
+ */
2793
+ suite_id?: (number | null);
2794
+ /**
2795
+ * Only versions before this time
2796
+ */
2797
+ until?: (string | null);
2798
+ /**
2799
+ * Spread threshold for flagging
2800
+ */
2801
+ variance_threshold?: number;
2802
+ };
2803
+ };
2804
+ type GetAgentVersionVarianceResponse = (AgentVersionVarianceResponse);
2805
+ type GetAgentVersionVarianceError = (HTTPValidationError);
2617
2806
 
2618
2807
  declare const client: _hey_api_client_fetch.Client<Request, Response, unknown, _hey_api_client_fetch.RequestOptions<boolean, string>>;
2619
2808
  /**
@@ -2886,6 +3075,16 @@ declare const listAgentVersions1: <ThrowOnError extends boolean = false>(options
2886
3075
  * List evaluation runs with optional filtering and pagination.
2887
3076
  */
2888
3077
  declare const listEvaluationRuns: <ThrowOnError extends boolean = false>(options?: OptionsLegacyParser<ListEvaluationRunsData, ThrowOnError>) => _hey_api_client_fetch.RequestResult<AdminEvaluationRunsResponse, HTTPValidationError, ThrowOnError>;
3078
+ /**
3079
+ * Aggregated scoring statistics per validator
3080
+ * Compute per-validator scoring statistics from completed runs.
3081
+ */
3082
+ declare const getValidatorScores: <ThrowOnError extends boolean = false>(options?: OptionsLegacyParser<GetValidatorScoresData, ThrowOnError>) => _hey_api_client_fetch.RequestResult<ValidatorScoresResponse, HTTPValidationError, ThrowOnError>;
3083
+ /**
3084
+ * Score variance across validators for recent agent versions
3085
+ * Find agent versions with high score variance across validators.
3086
+ */
3087
+ declare const getAgentVersionVariance: <ThrowOnError extends boolean = false>(options?: OptionsLegacyParser<GetAgentVersionVarianceData, ThrowOnError>) => _hey_api_client_fetch.RequestResult<AgentVersionVarianceResponse, HTTPValidationError, ThrowOnError>;
2889
3088
 
2890
3089
  /**
2891
3090
  * Auto-generated error code type.
@@ -3238,4 +3437,4 @@ declare class SessionAuthManager {
3238
3437
  */
3239
3438
  declare function configureSessionAuth(baseUrl: string, config: SessionAuthConfig): SessionAuthManager;
3240
3439
 
3241
- export { type ActivateSuiteData, type ActivateSuiteError, type ActivateSuiteResponse, type ActivateSuiteResponse2, type AdminAgentVersionEntry, type AdminAgentVersionsResponse, type AdminEvaluationRunEntry, type AdminEvaluationRunsResponse, type AdminMinerEntry, type AdminMinersResponse, type AdmissionReason, type AdmissionStatus, type AgentNotFoundError, type AgentPublic, type AgentVersionHistoryEntry, type AgentVersionNotFoundError, type AgentVersionProblemsResponse, type AgentVersionPublic, type AgentVersionState, type AgentVersionStatus, type AlreadyInvalidatedError, type ArtifactDownloadRequest, type ArtifactDownloadResponse, type ArtifactNotFoundError, type ArtifactNotReleasedError, type ArtifactReleaseState, type ArtifactType, type AtCapacityError, type AuditEventEntry, type AuditEventsResponse, type BanMinerData, type BanMinerError, type BanMinerResponse, type BanRequest, type BanResponse, type BanValidatorData, type BanValidatorError, type BanValidatorResponse, type BittensorAuthConfig, type Body_submit_agent, type CachedSession, type CancelAgentVersionData, type CancelAgentVersionError, type CancelAgentVersionResponse, type CancelRequest, type CancelResponse, type ChallengeRequest, type ChallengeResponse, type ChutesAuthStatusResponse, type ClaimWorkData, type ClaimWorkError, type ClaimWorkResponse, type ClaimWorkResponse2, type ClearMinerCooldownData, type ClearMinerCooldownError, type ClearMinerCooldownResponse, type CodeAnalysisError, type CompleteRunData, type CompleteRunError, type CompleteRunRequest, type CompleteRunResponse, type CompleteRunResponse2, type CooldownActiveError, type CreateSessionEndpointData, type CreateSessionEndpointError, type CreateSessionEndpointResponse, type CreateSuiteData, type CreateSuiteError, type CreateSuiteRequest, type CreateSuiteResponse, type CreateSuiteResponse2, type DiscardAgentVersionData, type DiscardAgentVersionError, type DiscardAgentVersionResponse, type DiscardRequest, type DiscardResponse, type ErrorCategory, type EvalRunNotFoundError, type EvaluationRunDetail, type EvaluationRunPublic, type EvaluationRunStatus, type EvaluationRunStatusPublic, type FileTooLargeError, type GetAgentVersionData, type GetAgentVersionError, type GetAgentVersionProblemsData, type GetAgentVersionProblemsError, type GetAgentVersionProblemsResponse, type GetAgentVersionResponse, type GetAgentVersionRunsData, type GetAgentVersionRunsError, type GetAgentVersionRunsResponse, type GetAgentVersionStatusData, type GetAgentVersionStatusError, type GetAgentVersionStatusResponse, type GetArtifactDownloadUrlData, type GetArtifactDownloadUrlError, type GetArtifactDownloadUrlResponse, type GetAuditEventsData, type GetAuditEventsError, type GetAuditEventsResponse, type GetChutesAuthStatusError, type GetChutesAuthStatusResponse, type GetCurrentSuiteError, type GetCurrentSuiteResponse, type GetEvaluationRunData, type GetEvaluationRunError, type GetEvaluationRunResponse, type GetLeaderboardData, type GetLeaderboardError, type GetLeaderboardResponse, type GetOwnedAgentVersionStatusData, type GetOwnedAgentVersionStatusError, type GetOwnedAgentVersionStatusResponse, type GetPendingEvaluationsData, type GetPendingEvaluationsError, type GetPendingEvaluationsResponse, type GetReaperStatsError, type GetReaperStatsResponse, type GetRunningEvaluationsError, type GetRunningEvaluationsResponse, type GetSuiteProblemsData, type GetSuiteProblemsError, type GetSuiteProblemsResponse, type GetTopAgentError, type GetTopAgentResponse, type GetValidatorsError, type GetValidatorsResponse, type HTTPValidationError, type HealthCheckError, type HealthCheckResponse, type HeartbeatData, type HeartbeatError, type HeartbeatRequest, type HeartbeatResponse, type HeartbeatResponse2, type InvalidAgentNameError, type InvalidArtifactTypeError, type InvalidFileError, type InvalidProblemIdError, type InvalidateEvaluationRunData, type InvalidateEvaluationRunError, type InvalidateEvaluationRunResponse, type InvalidateRunRequest, type JoinWaitlistData, type JoinWaitlistError, type JoinWaitlistResponse, type LeaderboardEntry, type LeaderboardResponse, type LeaseExpiredError, type ListAgentVersions1Data, type ListAgentVersions1Error, type ListAgentVersions1Response, type ListAgentVersionsData, type ListAgentVersionsError, type ListAgentVersionsResponse, type ListEvaluationRunsData, type ListEvaluationRunsError, type ListEvaluationRunsResponse, type ListMinerAgentsError, type ListMinerAgentsResponse, type ListMinersData, type ListMinersError, type ListMinersResponse, type LogoutData, type LogoutError, type LogoutResponse, type LogoutResponse2, type MinerAgentsResponse, type MinerNotFoundError, type MissingParameterError, type MissingScoreError, type NoActiveSuiteError, type NotRunOwnerError, type OroErrorCode, type PendingEvaluation, type PendingEvaluationSummary, type PendingEvaluationsResponse, type PresignUploadData, type PresignUploadError, type PresignUploadRequest, type PresignUploadResponse, type PresignUploadResponse2, type ProblemNotFoundError, type ProblemProgressEntry, type ProblemProgressUpdate, type ProblemPublic, type ProblemStatus, type ProgressUpdateRequest, type ProgressUpdateResponse, type RateLimitExceededError, type ReaperStatsResponse, type ReevaluateAgentVersionData, type ReevaluateAgentVersionError, type ReevaluateAgentVersionResponse, type ReevaluateRequest, type ReevaluateResponse, type ReinstateAgentVersionData, type ReinstateAgentVersionError, type ReinstateAgentVersionResponse, type ReinstateRequest, type RequestChallengeData, type RequestChallengeError, type RequestChallengeResponse, type RetryConfig, type RetryContext, type RunAlreadyCompleteError, type RunningEvaluation, type ScoreBelowThresholdError, type SessionAuthConfig, SessionAuthManager, type SessionInfo, type SessionRequest, type SessionResponse, type SetTopAgentData, type SetTopAgentError, type SetTopAgentResponse, type SetTopRequest, type SetTopResponse, type StoreChutesTokenData, type StoreChutesTokenError, type StoreChutesTokenRequest, type StoreChutesTokenResponse, type SubmitAgentData, type SubmitAgentError, type SubmitAgentResponse, type SubmitAgentResponse2, type SuiteNotFoundError, type SuitePublic, type SuiteWithProblemsResponse, type TerminalStatus, type TopAgentResponse, type UnbanMinerData, type UnbanMinerError, type UnbanMinerResponse, type UnbanValidatorData, type UnbanValidatorError, type UnbanValidatorResponse, type UpdateProgressData, type UpdateProgressError, type UpdateProgressResponse, type ValidationError, type ValidatorCurrentAgent, type ValidatorNotFoundError, type ValidatorProblemResult, type ValidatorPublic, type ValidatorStatus, type WaitlistSignupRequest, type WaitlistSignupResponse, type WorkItemStatus, activateSuite, banMiner, banValidator, cancelAgentVersion, claimWork, classifyError, classifyStatus, clearMinerCooldown, client, completeRun, computeDelay, configureBittensorAuth, configurePublicClient, configureSessionAuth, createRetryFetch, createSessionEndpoint, createSuite, discardAgentVersion, generateAuthHeaders, getAgentVersion, getAgentVersionProblems, getAgentVersionRuns, getAgentVersionStatus, getArtifactDownloadUrl, getAuditEvents, getChutesAuthStatus, getCurrentSuite, getErrorCode, getErrorDetail, getEvaluationRun, getLeaderboard, getOwnedAgentVersionStatus, getPendingEvaluations, getReaperStats, getRunningEvaluations, getSuiteProblems, getTopAgent, getValidators, hasDetail, hasErrorCode, healthCheck, heartbeat, invalidateEvaluationRun, isTransient, isTransientError, joinWaitlist, listAgentVersions, listAgentVersions1, listEvaluationRuns, listMinerAgents, listMiners, logout, parseRetryAfter, presignUpload, reevaluateAgentVersion, reinstateAgentVersion, requestChallenge, setTopAgent, storeChutesToken, submitAgent, unbanMiner, unbanValidator, updateProgress };
3440
+ export { type ActivateSuiteData, type ActivateSuiteError, type ActivateSuiteResponse, type ActivateSuiteResponse2, type AdminAgentVersionEntry, type AdminAgentVersionsResponse, type AdminEvaluationRunEntry, type AdminEvaluationRunsResponse, type AdminMinerEntry, type AdminMinersResponse, type AdmissionReason, type AdmissionStatus, type AgentNotFoundError, type AgentPublic, type AgentVersionHistoryEntry, type AgentVersionNotFoundError, type AgentVersionProblemsResponse, type AgentVersionPublic, type AgentVersionScoreEntry, type AgentVersionState, type AgentVersionStatus, type AgentVersionVariance, type AgentVersionVarianceResponse, type AlreadyInvalidatedError, type ArtifactDownloadRequest, type ArtifactDownloadResponse, type ArtifactNotFoundError, type ArtifactNotReleasedError, type ArtifactReleaseState, type ArtifactType, type AtCapacityError, type AuditEventEntry, type AuditEventsResponse, type BanMinerData, type BanMinerError, type BanMinerResponse, type BanRequest, type BanResponse, type BanValidatorData, type BanValidatorError, type BanValidatorResponse, type BittensorAuthConfig, type Body_submit_agent, type CachedSession, type CancelAgentVersionData, type CancelAgentVersionError, type CancelAgentVersionResponse, type CancelRequest, type CancelResponse, type ChallengeRequest, type ChallengeResponse, type ChutesAuthStatusResponse, type ClaimWorkData, type ClaimWorkError, type ClaimWorkResponse, type ClaimWorkResponse2, type ClearMinerCooldownData, type ClearMinerCooldownError, type ClearMinerCooldownResponse, type CodeAnalysisError, type CompleteRunData, type CompleteRunError, type CompleteRunRequest, type CompleteRunResponse, type CompleteRunResponse2, type CooldownActiveError, type CreateSessionEndpointData, type CreateSessionEndpointError, type CreateSessionEndpointResponse, type CreateSuiteData, type CreateSuiteError, type CreateSuiteRequest, type CreateSuiteResponse, type CreateSuiteResponse2, type DiscardAgentVersionData, type DiscardAgentVersionError, type DiscardAgentVersionResponse, type DiscardRequest, type DiscardResponse, type ErrorCategory, type EvalRunNotFoundError, type EvaluationRunDetail, type EvaluationRunPublic, type EvaluationRunStatus, type EvaluationRunStatusPublic, type FileTooLargeError, type GetAgentVersionData, type GetAgentVersionError, type GetAgentVersionProblemsData, type GetAgentVersionProblemsError, type GetAgentVersionProblemsResponse, type GetAgentVersionResponse, type GetAgentVersionRunsData, type GetAgentVersionRunsError, type GetAgentVersionRunsResponse, type GetAgentVersionStatusData, type GetAgentVersionStatusError, type GetAgentVersionStatusResponse, type GetAgentVersionVarianceData, type GetAgentVersionVarianceError, type GetAgentVersionVarianceResponse, type GetArtifactDownloadUrlData, type GetArtifactDownloadUrlError, type GetArtifactDownloadUrlResponse, type GetAuditEventsData, type GetAuditEventsError, type GetAuditEventsResponse, type GetChutesAuthStatusError, type GetChutesAuthStatusResponse, type GetCurrentSuiteError, type GetCurrentSuiteResponse, type GetEvaluationRunData, type GetEvaluationRunError, type GetEvaluationRunResponse, type GetLeaderboardData, type GetLeaderboardError, type GetLeaderboardResponse, type GetOwnedAgentVersionStatusData, type GetOwnedAgentVersionStatusError, type GetOwnedAgentVersionStatusResponse, type GetPendingEvaluationsData, type GetPendingEvaluationsError, type GetPendingEvaluationsResponse, type GetReaperStatsError, type GetReaperStatsResponse, type GetRunningEvaluationsError, type GetRunningEvaluationsResponse, type GetSuiteProblemsData, type GetSuiteProblemsError, type GetSuiteProblemsResponse, type GetTopAgentError, type GetTopAgentResponse, type GetValidatorScoresData, type GetValidatorScoresError, type GetValidatorScoresResponse, type GetValidatorsError, type GetValidatorsResponse, type HTTPValidationError, type HealthCheckError, type HealthCheckResponse, type HeartbeatData, type HeartbeatError, type HeartbeatRequest, type HeartbeatResponse, type HeartbeatResponse2, type InvalidAgentNameError, type InvalidArtifactTypeError, type InvalidFileError, type InvalidProblemIdError, type InvalidateEvaluationRunData, type InvalidateEvaluationRunError, type InvalidateEvaluationRunResponse, type InvalidateRunRequest, type JoinWaitlistData, type JoinWaitlistError, type JoinWaitlistResponse, type LeaderboardEntry, type LeaderboardResponse, type LeaseExpiredError, type ListAgentVersions1Data, type ListAgentVersions1Error, type ListAgentVersions1Response, type ListAgentVersionsData, type ListAgentVersionsError, type ListAgentVersionsResponse, type ListEvaluationRunsData, type ListEvaluationRunsError, type ListEvaluationRunsResponse, type ListMinerAgentsError, type ListMinerAgentsResponse, type ListMinersData, type ListMinersError, type ListMinersResponse, type LogoutData, type LogoutError, type LogoutResponse, type LogoutResponse2, type MinerAgentsResponse, type MinerNotFoundError, type MissingParameterError, type MissingScoreError, type NoActiveSuiteError, type NotRunOwnerError, type OroErrorCode, type PendingEvaluation, type PendingEvaluationSummary, type PendingEvaluationsResponse, type PresignUploadData, type PresignUploadError, type PresignUploadRequest, type PresignUploadResponse, type PresignUploadResponse2, type ProblemNotFoundError, type ProblemProgressEntry, type ProblemProgressUpdate, type ProblemPublic, type ProblemStatus, type ProgressUpdateRequest, type ProgressUpdateResponse, type RateLimitExceededError, type ReaperStatsResponse, type ReevaluateAgentVersionData, type ReevaluateAgentVersionError, type ReevaluateAgentVersionResponse, type ReevaluateRequest, type ReevaluateResponse, type ReinstateAgentVersionData, type ReinstateAgentVersionError, type ReinstateAgentVersionResponse, type ReinstateRequest, type RequestChallengeData, type RequestChallengeError, type RequestChallengeResponse, type RetryConfig, type RetryContext, type RunAlreadyCompleteError, type RunningEvaluation, type ScoreBelowThresholdError, type SessionAuthConfig, SessionAuthManager, type SessionInfo, type SessionRequest, type SessionResponse, type SetTopAgentData, type SetTopAgentError, type SetTopAgentResponse, type SetTopRequest, type SetTopResponse, type StoreChutesTokenData, type StoreChutesTokenError, type StoreChutesTokenRequest, type StoreChutesTokenResponse, type SubmitAgentData, type SubmitAgentError, type SubmitAgentResponse, type SubmitAgentResponse2, type SuiteNotFoundError, type SuitePublic, type SuiteWithProblemsResponse, type TerminalStatus, type TopAgentResponse, type UnbanMinerData, type UnbanMinerError, type UnbanMinerResponse, type UnbanValidatorData, type UnbanValidatorError, type UnbanValidatorResponse, type UpdateProgressData, type UpdateProgressError, type UpdateProgressResponse, type ValidationError, type ValidatorCurrentAgent, type ValidatorNotFoundError, type ValidatorProblemResult, type ValidatorPublic, type ValidatorScoreSummary, type ValidatorScoresResponse, type ValidatorStatus, type WaitlistSignupRequest, type WaitlistSignupResponse, type WorkItemStatus, activateSuite, banMiner, banValidator, cancelAgentVersion, claimWork, classifyError, classifyStatus, clearMinerCooldown, client, completeRun, computeDelay, configureBittensorAuth, configurePublicClient, configureSessionAuth, createRetryFetch, createSessionEndpoint, createSuite, discardAgentVersion, generateAuthHeaders, getAgentVersion, getAgentVersionProblems, getAgentVersionRuns, getAgentVersionStatus, getAgentVersionVariance, getArtifactDownloadUrl, getAuditEvents, getChutesAuthStatus, getCurrentSuite, getErrorCode, getErrorDetail, getEvaluationRun, getLeaderboard, getOwnedAgentVersionStatus, getPendingEvaluations, getReaperStats, getRunningEvaluations, getSuiteProblems, getTopAgent, getValidatorScores, getValidators, hasDetail, hasErrorCode, healthCheck, heartbeat, invalidateEvaluationRun, isTransient, isTransientError, joinWaitlist, listAgentVersions, listAgentVersions1, listEvaluationRuns, listMinerAgents, listMiners, logout, parseRetryAfter, presignUpload, reevaluateAgentVersion, reinstateAgentVersion, requestChallenge, setTopAgent, storeChutesToken, submitAgent, unbanMiner, unbanValidator, updateProgress };
package/dist/index.d.ts CHANGED
@@ -370,6 +370,23 @@ type AgentVersionPublic = {
370
370
  */
371
371
  latest_final_score?: (number | null);
372
372
  };
373
+ /**
374
+ * Per-validator score for an agent version.
375
+ */
376
+ type AgentVersionScoreEntry = {
377
+ /**
378
+ * Validator hotkey
379
+ */
380
+ validator_hotkey: string;
381
+ /**
382
+ * Score from this validator
383
+ */
384
+ score: number;
385
+ /**
386
+ * Evaluation run ID
387
+ */
388
+ run_id: string;
389
+ };
373
390
  /**
374
391
  * State of an agent version evaluation.
375
392
  */
@@ -441,6 +458,64 @@ type AgentVersionStatus = {
441
458
  [key: string]: (number);
442
459
  } | null);
443
460
  };
461
+ /**
462
+ * Score variance across validators for a single agent version.
463
+ */
464
+ type AgentVersionVariance = {
465
+ /**
466
+ * Agent version ID
467
+ */
468
+ agent_version_id: string;
469
+ /**
470
+ * Agent name
471
+ */
472
+ agent_name: string;
473
+ /**
474
+ * Miner hotkey
475
+ */
476
+ miner_hotkey: string;
477
+ /**
478
+ * Number of validators that scored this version
479
+ */
480
+ validator_count: number;
481
+ /**
482
+ * Mean score
483
+ */
484
+ avg_score: number;
485
+ /**
486
+ * Minimum score
487
+ */
488
+ min_score: number;
489
+ /**
490
+ * Maximum score
491
+ */
492
+ max_score: number;
493
+ /**
494
+ * max_score - min_score
495
+ */
496
+ spread: number;
497
+ /**
498
+ * True if spread exceeds threshold (default 10%)
499
+ */
500
+ is_high_variance: boolean;
501
+ /**
502
+ * Individual validator scores
503
+ */
504
+ per_validator: Array<AgentVersionScoreEntry>;
505
+ };
506
+ /**
507
+ * Response for agent version score variance analytics.
508
+ */
509
+ type AgentVersionVarianceResponse = {
510
+ /**
511
+ * Per-version variance data
512
+ */
513
+ agent_versions: Array<AgentVersionVariance>;
514
+ /**
515
+ * Spread threshold used for flagging
516
+ */
517
+ variance_threshold: number;
518
+ };
444
519
  /**
445
520
  * 409 - Resource is already invalidated.
446
521
  */
@@ -918,6 +993,18 @@ type EvaluationRunDetail = {
918
993
  * Validator-reported failure reason
919
994
  */
920
995
  failure_reason?: (string | null);
996
+ /**
997
+ * Whether run is included in aggregate scoring
998
+ */
999
+ is_included?: boolean;
1000
+ /**
1001
+ * When run was invalidated
1002
+ */
1003
+ invalidated_at?: (string | null);
1004
+ /**
1005
+ * Reason for invalidation
1006
+ */
1007
+ invalidation_reason?: (string | null);
921
1008
  };
922
1009
  /**
923
1010
  * Public representation of an evaluation run.
@@ -2078,6 +2165,64 @@ type ValidatorPublic = {
2078
2165
  */
2079
2166
  identity_description?: (string | null);
2080
2167
  };
2168
+ /**
2169
+ * Response for validator scoring analytics.
2170
+ */
2171
+ type ValidatorScoresResponse = {
2172
+ /**
2173
+ * Per-validator summaries
2174
+ */
2175
+ validators: Array<ValidatorScoreSummary>;
2176
+ /**
2177
+ * Global average score across all validators
2178
+ */
2179
+ global_avg_score: number;
2180
+ /**
2181
+ * Global standard deviation
2182
+ */
2183
+ global_stddev: number;
2184
+ };
2185
+ /**
2186
+ * Aggregated scoring stats for a single validator.
2187
+ */
2188
+ type ValidatorScoreSummary = {
2189
+ /**
2190
+ * Validator hotkey
2191
+ */
2192
+ validator_hotkey: string;
2193
+ /**
2194
+ * Total completed runs
2195
+ */
2196
+ total_runs: number;
2197
+ /**
2198
+ * Mean score across runs
2199
+ */
2200
+ avg_score: number;
2201
+ /**
2202
+ * Median score
2203
+ */
2204
+ median_score: number;
2205
+ /**
2206
+ * Standard deviation of scores
2207
+ */
2208
+ stddev_score: number;
2209
+ /**
2210
+ * Minimum score
2211
+ */
2212
+ min_score: number;
2213
+ /**
2214
+ * Maximum score
2215
+ */
2216
+ max_score: number;
2217
+ /**
2218
+ * Percentage deviation from the global average (negative = below)
2219
+ */
2220
+ deviation_from_global: number;
2221
+ /**
2222
+ * True if deviation exceeds 1.5 standard deviations from global mean
2223
+ */
2224
+ is_outlier: boolean;
2225
+ };
2081
2226
  /**
2082
2227
  * Status of a validator.
2083
2228
  */
@@ -2614,6 +2759,50 @@ type ListEvaluationRunsData = {
2614
2759
  };
2615
2760
  type ListEvaluationRunsResponse = (AdminEvaluationRunsResponse);
2616
2761
  type ListEvaluationRunsError = (HTTPValidationError);
2762
+ type GetValidatorScoresData = {
2763
+ query?: {
2764
+ /**
2765
+ * Only runs after this time
2766
+ */
2767
+ since?: (string | null);
2768
+ /**
2769
+ * Suite ID (defaults to active suite)
2770
+ */
2771
+ suite_id?: (number | null);
2772
+ /**
2773
+ * Only runs before this time
2774
+ */
2775
+ until?: (string | null);
2776
+ };
2777
+ };
2778
+ type GetValidatorScoresResponse = (ValidatorScoresResponse);
2779
+ type GetValidatorScoresError = (HTTPValidationError);
2780
+ type GetAgentVersionVarianceData = {
2781
+ query?: {
2782
+ /**
2783
+ * Number of agent versions
2784
+ */
2785
+ limit?: number;
2786
+ /**
2787
+ * Only versions after this time
2788
+ */
2789
+ since?: (string | null);
2790
+ /**
2791
+ * Suite ID (defaults to active suite)
2792
+ */
2793
+ suite_id?: (number | null);
2794
+ /**
2795
+ * Only versions before this time
2796
+ */
2797
+ until?: (string | null);
2798
+ /**
2799
+ * Spread threshold for flagging
2800
+ */
2801
+ variance_threshold?: number;
2802
+ };
2803
+ };
2804
+ type GetAgentVersionVarianceResponse = (AgentVersionVarianceResponse);
2805
+ type GetAgentVersionVarianceError = (HTTPValidationError);
2617
2806
 
2618
2807
  declare const client: _hey_api_client_fetch.Client<Request, Response, unknown, _hey_api_client_fetch.RequestOptions<boolean, string>>;
2619
2808
  /**
@@ -2886,6 +3075,16 @@ declare const listAgentVersions1: <ThrowOnError extends boolean = false>(options
2886
3075
  * List evaluation runs with optional filtering and pagination.
2887
3076
  */
2888
3077
  declare const listEvaluationRuns: <ThrowOnError extends boolean = false>(options?: OptionsLegacyParser<ListEvaluationRunsData, ThrowOnError>) => _hey_api_client_fetch.RequestResult<AdminEvaluationRunsResponse, HTTPValidationError, ThrowOnError>;
3078
+ /**
3079
+ * Aggregated scoring statistics per validator
3080
+ * Compute per-validator scoring statistics from completed runs.
3081
+ */
3082
+ declare const getValidatorScores: <ThrowOnError extends boolean = false>(options?: OptionsLegacyParser<GetValidatorScoresData, ThrowOnError>) => _hey_api_client_fetch.RequestResult<ValidatorScoresResponse, HTTPValidationError, ThrowOnError>;
3083
+ /**
3084
+ * Score variance across validators for recent agent versions
3085
+ * Find agent versions with high score variance across validators.
3086
+ */
3087
+ declare const getAgentVersionVariance: <ThrowOnError extends boolean = false>(options?: OptionsLegacyParser<GetAgentVersionVarianceData, ThrowOnError>) => _hey_api_client_fetch.RequestResult<AgentVersionVarianceResponse, HTTPValidationError, ThrowOnError>;
2889
3088
 
2890
3089
  /**
2891
3090
  * Auto-generated error code type.
@@ -3238,4 +3437,4 @@ declare class SessionAuthManager {
3238
3437
  */
3239
3438
  declare function configureSessionAuth(baseUrl: string, config: SessionAuthConfig): SessionAuthManager;
3240
3439
 
3241
- export { type ActivateSuiteData, type ActivateSuiteError, type ActivateSuiteResponse, type ActivateSuiteResponse2, type AdminAgentVersionEntry, type AdminAgentVersionsResponse, type AdminEvaluationRunEntry, type AdminEvaluationRunsResponse, type AdminMinerEntry, type AdminMinersResponse, type AdmissionReason, type AdmissionStatus, type AgentNotFoundError, type AgentPublic, type AgentVersionHistoryEntry, type AgentVersionNotFoundError, type AgentVersionProblemsResponse, type AgentVersionPublic, type AgentVersionState, type AgentVersionStatus, type AlreadyInvalidatedError, type ArtifactDownloadRequest, type ArtifactDownloadResponse, type ArtifactNotFoundError, type ArtifactNotReleasedError, type ArtifactReleaseState, type ArtifactType, type AtCapacityError, type AuditEventEntry, type AuditEventsResponse, type BanMinerData, type BanMinerError, type BanMinerResponse, type BanRequest, type BanResponse, type BanValidatorData, type BanValidatorError, type BanValidatorResponse, type BittensorAuthConfig, type Body_submit_agent, type CachedSession, type CancelAgentVersionData, type CancelAgentVersionError, type CancelAgentVersionResponse, type CancelRequest, type CancelResponse, type ChallengeRequest, type ChallengeResponse, type ChutesAuthStatusResponse, type ClaimWorkData, type ClaimWorkError, type ClaimWorkResponse, type ClaimWorkResponse2, type ClearMinerCooldownData, type ClearMinerCooldownError, type ClearMinerCooldownResponse, type CodeAnalysisError, type CompleteRunData, type CompleteRunError, type CompleteRunRequest, type CompleteRunResponse, type CompleteRunResponse2, type CooldownActiveError, type CreateSessionEndpointData, type CreateSessionEndpointError, type CreateSessionEndpointResponse, type CreateSuiteData, type CreateSuiteError, type CreateSuiteRequest, type CreateSuiteResponse, type CreateSuiteResponse2, type DiscardAgentVersionData, type DiscardAgentVersionError, type DiscardAgentVersionResponse, type DiscardRequest, type DiscardResponse, type ErrorCategory, type EvalRunNotFoundError, type EvaluationRunDetail, type EvaluationRunPublic, type EvaluationRunStatus, type EvaluationRunStatusPublic, type FileTooLargeError, type GetAgentVersionData, type GetAgentVersionError, type GetAgentVersionProblemsData, type GetAgentVersionProblemsError, type GetAgentVersionProblemsResponse, type GetAgentVersionResponse, type GetAgentVersionRunsData, type GetAgentVersionRunsError, type GetAgentVersionRunsResponse, type GetAgentVersionStatusData, type GetAgentVersionStatusError, type GetAgentVersionStatusResponse, type GetArtifactDownloadUrlData, type GetArtifactDownloadUrlError, type GetArtifactDownloadUrlResponse, type GetAuditEventsData, type GetAuditEventsError, type GetAuditEventsResponse, type GetChutesAuthStatusError, type GetChutesAuthStatusResponse, type GetCurrentSuiteError, type GetCurrentSuiteResponse, type GetEvaluationRunData, type GetEvaluationRunError, type GetEvaluationRunResponse, type GetLeaderboardData, type GetLeaderboardError, type GetLeaderboardResponse, type GetOwnedAgentVersionStatusData, type GetOwnedAgentVersionStatusError, type GetOwnedAgentVersionStatusResponse, type GetPendingEvaluationsData, type GetPendingEvaluationsError, type GetPendingEvaluationsResponse, type GetReaperStatsError, type GetReaperStatsResponse, type GetRunningEvaluationsError, type GetRunningEvaluationsResponse, type GetSuiteProblemsData, type GetSuiteProblemsError, type GetSuiteProblemsResponse, type GetTopAgentError, type GetTopAgentResponse, type GetValidatorsError, type GetValidatorsResponse, type HTTPValidationError, type HealthCheckError, type HealthCheckResponse, type HeartbeatData, type HeartbeatError, type HeartbeatRequest, type HeartbeatResponse, type HeartbeatResponse2, type InvalidAgentNameError, type InvalidArtifactTypeError, type InvalidFileError, type InvalidProblemIdError, type InvalidateEvaluationRunData, type InvalidateEvaluationRunError, type InvalidateEvaluationRunResponse, type InvalidateRunRequest, type JoinWaitlistData, type JoinWaitlistError, type JoinWaitlistResponse, type LeaderboardEntry, type LeaderboardResponse, type LeaseExpiredError, type ListAgentVersions1Data, type ListAgentVersions1Error, type ListAgentVersions1Response, type ListAgentVersionsData, type ListAgentVersionsError, type ListAgentVersionsResponse, type ListEvaluationRunsData, type ListEvaluationRunsError, type ListEvaluationRunsResponse, type ListMinerAgentsError, type ListMinerAgentsResponse, type ListMinersData, type ListMinersError, type ListMinersResponse, type LogoutData, type LogoutError, type LogoutResponse, type LogoutResponse2, type MinerAgentsResponse, type MinerNotFoundError, type MissingParameterError, type MissingScoreError, type NoActiveSuiteError, type NotRunOwnerError, type OroErrorCode, type PendingEvaluation, type PendingEvaluationSummary, type PendingEvaluationsResponse, type PresignUploadData, type PresignUploadError, type PresignUploadRequest, type PresignUploadResponse, type PresignUploadResponse2, type ProblemNotFoundError, type ProblemProgressEntry, type ProblemProgressUpdate, type ProblemPublic, type ProblemStatus, type ProgressUpdateRequest, type ProgressUpdateResponse, type RateLimitExceededError, type ReaperStatsResponse, type ReevaluateAgentVersionData, type ReevaluateAgentVersionError, type ReevaluateAgentVersionResponse, type ReevaluateRequest, type ReevaluateResponse, type ReinstateAgentVersionData, type ReinstateAgentVersionError, type ReinstateAgentVersionResponse, type ReinstateRequest, type RequestChallengeData, type RequestChallengeError, type RequestChallengeResponse, type RetryConfig, type RetryContext, type RunAlreadyCompleteError, type RunningEvaluation, type ScoreBelowThresholdError, type SessionAuthConfig, SessionAuthManager, type SessionInfo, type SessionRequest, type SessionResponse, type SetTopAgentData, type SetTopAgentError, type SetTopAgentResponse, type SetTopRequest, type SetTopResponse, type StoreChutesTokenData, type StoreChutesTokenError, type StoreChutesTokenRequest, type StoreChutesTokenResponse, type SubmitAgentData, type SubmitAgentError, type SubmitAgentResponse, type SubmitAgentResponse2, type SuiteNotFoundError, type SuitePublic, type SuiteWithProblemsResponse, type TerminalStatus, type TopAgentResponse, type UnbanMinerData, type UnbanMinerError, type UnbanMinerResponse, type UnbanValidatorData, type UnbanValidatorError, type UnbanValidatorResponse, type UpdateProgressData, type UpdateProgressError, type UpdateProgressResponse, type ValidationError, type ValidatorCurrentAgent, type ValidatorNotFoundError, type ValidatorProblemResult, type ValidatorPublic, type ValidatorStatus, type WaitlistSignupRequest, type WaitlistSignupResponse, type WorkItemStatus, activateSuite, banMiner, banValidator, cancelAgentVersion, claimWork, classifyError, classifyStatus, clearMinerCooldown, client, completeRun, computeDelay, configureBittensorAuth, configurePublicClient, configureSessionAuth, createRetryFetch, createSessionEndpoint, createSuite, discardAgentVersion, generateAuthHeaders, getAgentVersion, getAgentVersionProblems, getAgentVersionRuns, getAgentVersionStatus, getArtifactDownloadUrl, getAuditEvents, getChutesAuthStatus, getCurrentSuite, getErrorCode, getErrorDetail, getEvaluationRun, getLeaderboard, getOwnedAgentVersionStatus, getPendingEvaluations, getReaperStats, getRunningEvaluations, getSuiteProblems, getTopAgent, getValidators, hasDetail, hasErrorCode, healthCheck, heartbeat, invalidateEvaluationRun, isTransient, isTransientError, joinWaitlist, listAgentVersions, listAgentVersions1, listEvaluationRuns, listMinerAgents, listMiners, logout, parseRetryAfter, presignUpload, reevaluateAgentVersion, reinstateAgentVersion, requestChallenge, setTopAgent, storeChutesToken, submitAgent, unbanMiner, unbanValidator, updateProgress };
3440
+ export { type ActivateSuiteData, type ActivateSuiteError, type ActivateSuiteResponse, type ActivateSuiteResponse2, type AdminAgentVersionEntry, type AdminAgentVersionsResponse, type AdminEvaluationRunEntry, type AdminEvaluationRunsResponse, type AdminMinerEntry, type AdminMinersResponse, type AdmissionReason, type AdmissionStatus, type AgentNotFoundError, type AgentPublic, type AgentVersionHistoryEntry, type AgentVersionNotFoundError, type AgentVersionProblemsResponse, type AgentVersionPublic, type AgentVersionScoreEntry, type AgentVersionState, type AgentVersionStatus, type AgentVersionVariance, type AgentVersionVarianceResponse, type AlreadyInvalidatedError, type ArtifactDownloadRequest, type ArtifactDownloadResponse, type ArtifactNotFoundError, type ArtifactNotReleasedError, type ArtifactReleaseState, type ArtifactType, type AtCapacityError, type AuditEventEntry, type AuditEventsResponse, type BanMinerData, type BanMinerError, type BanMinerResponse, type BanRequest, type BanResponse, type BanValidatorData, type BanValidatorError, type BanValidatorResponse, type BittensorAuthConfig, type Body_submit_agent, type CachedSession, type CancelAgentVersionData, type CancelAgentVersionError, type CancelAgentVersionResponse, type CancelRequest, type CancelResponse, type ChallengeRequest, type ChallengeResponse, type ChutesAuthStatusResponse, type ClaimWorkData, type ClaimWorkError, type ClaimWorkResponse, type ClaimWorkResponse2, type ClearMinerCooldownData, type ClearMinerCooldownError, type ClearMinerCooldownResponse, type CodeAnalysisError, type CompleteRunData, type CompleteRunError, type CompleteRunRequest, type CompleteRunResponse, type CompleteRunResponse2, type CooldownActiveError, type CreateSessionEndpointData, type CreateSessionEndpointError, type CreateSessionEndpointResponse, type CreateSuiteData, type CreateSuiteError, type CreateSuiteRequest, type CreateSuiteResponse, type CreateSuiteResponse2, type DiscardAgentVersionData, type DiscardAgentVersionError, type DiscardAgentVersionResponse, type DiscardRequest, type DiscardResponse, type ErrorCategory, type EvalRunNotFoundError, type EvaluationRunDetail, type EvaluationRunPublic, type EvaluationRunStatus, type EvaluationRunStatusPublic, type FileTooLargeError, type GetAgentVersionData, type GetAgentVersionError, type GetAgentVersionProblemsData, type GetAgentVersionProblemsError, type GetAgentVersionProblemsResponse, type GetAgentVersionResponse, type GetAgentVersionRunsData, type GetAgentVersionRunsError, type GetAgentVersionRunsResponse, type GetAgentVersionStatusData, type GetAgentVersionStatusError, type GetAgentVersionStatusResponse, type GetAgentVersionVarianceData, type GetAgentVersionVarianceError, type GetAgentVersionVarianceResponse, type GetArtifactDownloadUrlData, type GetArtifactDownloadUrlError, type GetArtifactDownloadUrlResponse, type GetAuditEventsData, type GetAuditEventsError, type GetAuditEventsResponse, type GetChutesAuthStatusError, type GetChutesAuthStatusResponse, type GetCurrentSuiteError, type GetCurrentSuiteResponse, type GetEvaluationRunData, type GetEvaluationRunError, type GetEvaluationRunResponse, type GetLeaderboardData, type GetLeaderboardError, type GetLeaderboardResponse, type GetOwnedAgentVersionStatusData, type GetOwnedAgentVersionStatusError, type GetOwnedAgentVersionStatusResponse, type GetPendingEvaluationsData, type GetPendingEvaluationsError, type GetPendingEvaluationsResponse, type GetReaperStatsError, type GetReaperStatsResponse, type GetRunningEvaluationsError, type GetRunningEvaluationsResponse, type GetSuiteProblemsData, type GetSuiteProblemsError, type GetSuiteProblemsResponse, type GetTopAgentError, type GetTopAgentResponse, type GetValidatorScoresData, type GetValidatorScoresError, type GetValidatorScoresResponse, type GetValidatorsError, type GetValidatorsResponse, type HTTPValidationError, type HealthCheckError, type HealthCheckResponse, type HeartbeatData, type HeartbeatError, type HeartbeatRequest, type HeartbeatResponse, type HeartbeatResponse2, type InvalidAgentNameError, type InvalidArtifactTypeError, type InvalidFileError, type InvalidProblemIdError, type InvalidateEvaluationRunData, type InvalidateEvaluationRunError, type InvalidateEvaluationRunResponse, type InvalidateRunRequest, type JoinWaitlistData, type JoinWaitlistError, type JoinWaitlistResponse, type LeaderboardEntry, type LeaderboardResponse, type LeaseExpiredError, type ListAgentVersions1Data, type ListAgentVersions1Error, type ListAgentVersions1Response, type ListAgentVersionsData, type ListAgentVersionsError, type ListAgentVersionsResponse, type ListEvaluationRunsData, type ListEvaluationRunsError, type ListEvaluationRunsResponse, type ListMinerAgentsError, type ListMinerAgentsResponse, type ListMinersData, type ListMinersError, type ListMinersResponse, type LogoutData, type LogoutError, type LogoutResponse, type LogoutResponse2, type MinerAgentsResponse, type MinerNotFoundError, type MissingParameterError, type MissingScoreError, type NoActiveSuiteError, type NotRunOwnerError, type OroErrorCode, type PendingEvaluation, type PendingEvaluationSummary, type PendingEvaluationsResponse, type PresignUploadData, type PresignUploadError, type PresignUploadRequest, type PresignUploadResponse, type PresignUploadResponse2, type ProblemNotFoundError, type ProblemProgressEntry, type ProblemProgressUpdate, type ProblemPublic, type ProblemStatus, type ProgressUpdateRequest, type ProgressUpdateResponse, type RateLimitExceededError, type ReaperStatsResponse, type ReevaluateAgentVersionData, type ReevaluateAgentVersionError, type ReevaluateAgentVersionResponse, type ReevaluateRequest, type ReevaluateResponse, type ReinstateAgentVersionData, type ReinstateAgentVersionError, type ReinstateAgentVersionResponse, type ReinstateRequest, type RequestChallengeData, type RequestChallengeError, type RequestChallengeResponse, type RetryConfig, type RetryContext, type RunAlreadyCompleteError, type RunningEvaluation, type ScoreBelowThresholdError, type SessionAuthConfig, SessionAuthManager, type SessionInfo, type SessionRequest, type SessionResponse, type SetTopAgentData, type SetTopAgentError, type SetTopAgentResponse, type SetTopRequest, type SetTopResponse, type StoreChutesTokenData, type StoreChutesTokenError, type StoreChutesTokenRequest, type StoreChutesTokenResponse, type SubmitAgentData, type SubmitAgentError, type SubmitAgentResponse, type SubmitAgentResponse2, type SuiteNotFoundError, type SuitePublic, type SuiteWithProblemsResponse, type TerminalStatus, type TopAgentResponse, type UnbanMinerData, type UnbanMinerError, type UnbanMinerResponse, type UnbanValidatorData, type UnbanValidatorError, type UnbanValidatorResponse, type UpdateProgressData, type UpdateProgressError, type UpdateProgressResponse, type ValidationError, type ValidatorCurrentAgent, type ValidatorNotFoundError, type ValidatorProblemResult, type ValidatorPublic, type ValidatorScoreSummary, type ValidatorScoresResponse, type ValidatorStatus, type WaitlistSignupRequest, type WaitlistSignupResponse, type WorkItemStatus, activateSuite, banMiner, banValidator, cancelAgentVersion, claimWork, classifyError, classifyStatus, clearMinerCooldown, client, completeRun, computeDelay, configureBittensorAuth, configurePublicClient, configureSessionAuth, createRetryFetch, createSessionEndpoint, createSuite, discardAgentVersion, generateAuthHeaders, getAgentVersion, getAgentVersionProblems, getAgentVersionRuns, getAgentVersionStatus, getAgentVersionVariance, getArtifactDownloadUrl, getAuditEvents, getChutesAuthStatus, getCurrentSuite, getErrorCode, getErrorDetail, getEvaluationRun, getLeaderboard, getOwnedAgentVersionStatus, getPendingEvaluations, getReaperStats, getRunningEvaluations, getSuiteProblems, getTopAgent, getValidatorScores, getValidators, hasDetail, hasErrorCode, healthCheck, heartbeat, invalidateEvaluationRun, isTransient, isTransientError, joinWaitlist, listAgentVersions, listAgentVersions1, listEvaluationRuns, listMinerAgents, listMiners, logout, parseRetryAfter, presignUpload, reevaluateAgentVersion, reinstateAgentVersion, requestChallenge, setTopAgent, storeChutesToken, submitAgent, unbanMiner, unbanValidator, updateProgress };
package/dist/index.js CHANGED
@@ -54,6 +54,7 @@ __export(index_exports, {
54
54
  getAgentVersionProblems: () => getAgentVersionProblems,
55
55
  getAgentVersionRuns: () => getAgentVersionRuns,
56
56
  getAgentVersionStatus: () => getAgentVersionStatus,
57
+ getAgentVersionVariance: () => getAgentVersionVariance,
57
58
  getArtifactDownloadUrl: () => getArtifactDownloadUrl,
58
59
  getAuditEvents: () => getAuditEvents,
59
60
  getChutesAuthStatus: () => getChutesAuthStatus,
@@ -68,6 +69,7 @@ __export(index_exports, {
68
69
  getRunningEvaluations: () => getRunningEvaluations,
69
70
  getSuiteProblems: () => getSuiteProblems,
70
71
  getTopAgent: () => getTopAgent,
72
+ getValidatorScores: () => getValidatorScores,
71
73
  getValidators: () => getValidators,
72
74
  hasDetail: () => hasDetail,
73
75
  hasErrorCode: () => hasErrorCode,
@@ -387,6 +389,18 @@ var listEvaluationRuns = (options) => {
387
389
  url: "/v1/admin/evaluation-runs"
388
390
  });
389
391
  };
392
+ var getValidatorScores = (options) => {
393
+ return (options?.client ?? client).get({
394
+ ...options,
395
+ url: "/v1/admin/analytics/validator-scores"
396
+ });
397
+ };
398
+ var getAgentVersionVariance = (options) => {
399
+ return (options?.client ?? client).get({
400
+ ...options,
401
+ url: "/v1/admin/analytics/agent-version-variance"
402
+ });
403
+ };
390
404
 
391
405
  // src/errors.ts
392
406
  function classifyStatus(status) {
@@ -773,6 +787,7 @@ function configureSessionAuth(baseUrl, config) {
773
787
  getAgentVersionProblems,
774
788
  getAgentVersionRuns,
775
789
  getAgentVersionStatus,
790
+ getAgentVersionVariance,
776
791
  getArtifactDownloadUrl,
777
792
  getAuditEvents,
778
793
  getChutesAuthStatus,
@@ -787,6 +802,7 @@ function configureSessionAuth(baseUrl, config) {
787
802
  getRunningEvaluations,
788
803
  getSuiteProblems,
789
804
  getTopAgent,
805
+ getValidatorScores,
790
806
  getValidators,
791
807
  hasDetail,
792
808
  hasErrorCode,
package/dist/index.mjs CHANGED
@@ -288,6 +288,18 @@ var listEvaluationRuns = (options) => {
288
288
  url: "/v1/admin/evaluation-runs"
289
289
  });
290
290
  };
291
+ var getValidatorScores = (options) => {
292
+ return (options?.client ?? client).get({
293
+ ...options,
294
+ url: "/v1/admin/analytics/validator-scores"
295
+ });
296
+ };
297
+ var getAgentVersionVariance = (options) => {
298
+ return (options?.client ?? client).get({
299
+ ...options,
300
+ url: "/v1/admin/analytics/agent-version-variance"
301
+ });
302
+ };
291
303
 
292
304
  // src/errors.ts
293
305
  function classifyStatus(status) {
@@ -673,6 +685,7 @@ export {
673
685
  getAgentVersionProblems,
674
686
  getAgentVersionRuns,
675
687
  getAgentVersionStatus,
688
+ getAgentVersionVariance,
676
689
  getArtifactDownloadUrl,
677
690
  getAuditEvents,
678
691
  getChutesAuthStatus,
@@ -687,6 +700,7 @@ export {
687
700
  getRunningEvaluations,
688
701
  getSuiteProblems,
689
702
  getTopAgent,
703
+ getValidatorScores,
690
704
  getValidators,
691
705
  hasDetail,
692
706
  hasErrorCode,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@oro-ai/sdk",
3
- "version": "1.0.4",
3
+ "version": "1.0.6",
4
4
  "description": "Official TypeScript SDK for the ORO Bittensor Subnet API",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.mjs",
@@ -1,7 +1,7 @@
1
1
  // This file is auto-generated by @hey-api/openapi-ts
2
2
 
3
3
  import { createClient, createConfig, type OptionsLegacyParser, formDataBodySerializer } from '@hey-api/client-fetch';
4
- import type { HealthCheckError, HealthCheckResponse, GetCurrentSuiteError, GetCurrentSuiteResponse, GetSuiteProblemsData, GetSuiteProblemsError, GetSuiteProblemsResponse, GetLeaderboardData, GetLeaderboardError, GetLeaderboardResponse, GetTopAgentError, GetTopAgentResponse, GetAgentVersionStatusData, GetAgentVersionStatusError, GetAgentVersionStatusResponse, GetAgentVersionRunsData, GetAgentVersionRunsError, GetAgentVersionRunsResponse, GetAgentVersionProblemsData, GetAgentVersionProblemsError, GetAgentVersionProblemsResponse, GetAgentVersionData, GetAgentVersionError, GetAgentVersionResponse, GetArtifactDownloadUrlData, GetArtifactDownloadUrlError, GetArtifactDownloadUrlResponse, GetEvaluationRunData, GetEvaluationRunError, GetEvaluationRunResponse, GetValidatorsError, GetValidatorsResponse, GetRunningEvaluationsError, GetRunningEvaluationsResponse, GetPendingEvaluationsData, GetPendingEvaluationsError, GetPendingEvaluationsResponse, JoinWaitlistData, JoinWaitlistError, JoinWaitlistResponse, RequestChallengeData, RequestChallengeError, RequestChallengeResponse, CreateSessionEndpointData, CreateSessionEndpointError, CreateSessionEndpointResponse, LogoutData, LogoutError, LogoutResponse2, SubmitAgentData, SubmitAgentError, SubmitAgentResponse2, StoreChutesTokenData, StoreChutesTokenError, StoreChutesTokenResponse, GetChutesAuthStatusError, GetChutesAuthStatusResponse, ListMinerAgentsError, ListMinerAgentsResponse, ListAgentVersionsData, ListAgentVersionsError, ListAgentVersionsResponse, GetOwnedAgentVersionStatusData, GetOwnedAgentVersionStatusError, GetOwnedAgentVersionStatusResponse, ClaimWorkData, ClaimWorkError, ClaimWorkResponse2, HeartbeatData, HeartbeatError, HeartbeatResponse2, UpdateProgressData, UpdateProgressError, UpdateProgressResponse, PresignUploadData, PresignUploadError, PresignUploadResponse2, CompleteRunData, CompleteRunError, CompleteRunResponse2, BanMinerData, BanMinerError, BanMinerResponse, UnbanMinerData, UnbanMinerError, UnbanMinerResponse, BanValidatorData, BanValidatorError, BanValidatorResponse, UnbanValidatorData, UnbanValidatorError, UnbanValidatorResponse, DiscardAgentVersionData, DiscardAgentVersionError, DiscardAgentVersionResponse, ReinstateAgentVersionData, ReinstateAgentVersionError, ReinstateAgentVersionResponse, SetTopAgentData, SetTopAgentError, SetTopAgentResponse, InvalidateEvaluationRunData, InvalidateEvaluationRunError, InvalidateEvaluationRunResponse, ReevaluateAgentVersionData, ReevaluateAgentVersionError, ReevaluateAgentVersionResponse, CancelAgentVersionData, CancelAgentVersionError, CancelAgentVersionResponse, CreateSuiteData, CreateSuiteError, CreateSuiteResponse2, ActivateSuiteData, ActivateSuiteError, ActivateSuiteResponse2, GetAuditEventsData, GetAuditEventsError, GetAuditEventsResponse, GetReaperStatsError, GetReaperStatsResponse, ClearMinerCooldownData, ClearMinerCooldownError, ClearMinerCooldownResponse, ListMinersData, ListMinersError, ListMinersResponse, ListAgentVersions1Data, ListAgentVersions1Error, ListAgentVersions1Response, ListEvaluationRunsData, ListEvaluationRunsError, ListEvaluationRunsResponse } from './types.gen';
4
+ import type { HealthCheckError, HealthCheckResponse, GetCurrentSuiteError, GetCurrentSuiteResponse, GetSuiteProblemsData, GetSuiteProblemsError, GetSuiteProblemsResponse, GetLeaderboardData, GetLeaderboardError, GetLeaderboardResponse, GetTopAgentError, GetTopAgentResponse, GetAgentVersionStatusData, GetAgentVersionStatusError, GetAgentVersionStatusResponse, GetAgentVersionRunsData, GetAgentVersionRunsError, GetAgentVersionRunsResponse, GetAgentVersionProblemsData, GetAgentVersionProblemsError, GetAgentVersionProblemsResponse, GetAgentVersionData, GetAgentVersionError, GetAgentVersionResponse, GetArtifactDownloadUrlData, GetArtifactDownloadUrlError, GetArtifactDownloadUrlResponse, GetEvaluationRunData, GetEvaluationRunError, GetEvaluationRunResponse, GetValidatorsError, GetValidatorsResponse, GetRunningEvaluationsError, GetRunningEvaluationsResponse, GetPendingEvaluationsData, GetPendingEvaluationsError, GetPendingEvaluationsResponse, JoinWaitlistData, JoinWaitlistError, JoinWaitlistResponse, RequestChallengeData, RequestChallengeError, RequestChallengeResponse, CreateSessionEndpointData, CreateSessionEndpointError, CreateSessionEndpointResponse, LogoutData, LogoutError, LogoutResponse2, SubmitAgentData, SubmitAgentError, SubmitAgentResponse2, StoreChutesTokenData, StoreChutesTokenError, StoreChutesTokenResponse, GetChutesAuthStatusError, GetChutesAuthStatusResponse, ListMinerAgentsError, ListMinerAgentsResponse, ListAgentVersionsData, ListAgentVersionsError, ListAgentVersionsResponse, GetOwnedAgentVersionStatusData, GetOwnedAgentVersionStatusError, GetOwnedAgentVersionStatusResponse, ClaimWorkData, ClaimWorkError, ClaimWorkResponse2, HeartbeatData, HeartbeatError, HeartbeatResponse2, UpdateProgressData, UpdateProgressError, UpdateProgressResponse, PresignUploadData, PresignUploadError, PresignUploadResponse2, CompleteRunData, CompleteRunError, CompleteRunResponse2, BanMinerData, BanMinerError, BanMinerResponse, UnbanMinerData, UnbanMinerError, UnbanMinerResponse, BanValidatorData, BanValidatorError, BanValidatorResponse, UnbanValidatorData, UnbanValidatorError, UnbanValidatorResponse, DiscardAgentVersionData, DiscardAgentVersionError, DiscardAgentVersionResponse, ReinstateAgentVersionData, ReinstateAgentVersionError, ReinstateAgentVersionResponse, SetTopAgentData, SetTopAgentError, SetTopAgentResponse, InvalidateEvaluationRunData, InvalidateEvaluationRunError, InvalidateEvaluationRunResponse, ReevaluateAgentVersionData, ReevaluateAgentVersionError, ReevaluateAgentVersionResponse, CancelAgentVersionData, CancelAgentVersionError, CancelAgentVersionResponse, CreateSuiteData, CreateSuiteError, CreateSuiteResponse2, ActivateSuiteData, ActivateSuiteError, ActivateSuiteResponse2, GetAuditEventsData, GetAuditEventsError, GetAuditEventsResponse, GetReaperStatsError, GetReaperStatsResponse, ClearMinerCooldownData, ClearMinerCooldownError, ClearMinerCooldownResponse, ListMinersData, ListMinersError, ListMinersResponse, ListAgentVersions1Data, ListAgentVersions1Error, ListAgentVersions1Response, ListEvaluationRunsData, ListEvaluationRunsError, ListEvaluationRunsResponse, GetValidatorScoresData, GetValidatorScoresError, GetValidatorScoresResponse, GetAgentVersionVarianceData, GetAgentVersionVarianceError, GetAgentVersionVarianceResponse } from './types.gen';
5
5
 
6
6
  export const client = createClient(createConfig());
7
7
 
@@ -560,4 +560,26 @@ export const listEvaluationRuns = <ThrowOnError extends boolean = false>(options
560
560
  ...options,
561
561
  url: '/v1/admin/evaluation-runs'
562
562
  });
563
+ };
564
+
565
+ /**
566
+ * Aggregated scoring statistics per validator
567
+ * Compute per-validator scoring statistics from completed runs.
568
+ */
569
+ export const getValidatorScores = <ThrowOnError extends boolean = false>(options?: OptionsLegacyParser<GetValidatorScoresData, ThrowOnError>) => {
570
+ return (options?.client ?? client).get<GetValidatorScoresResponse, GetValidatorScoresError, ThrowOnError>({
571
+ ...options,
572
+ url: '/v1/admin/analytics/validator-scores'
573
+ });
574
+ };
575
+
576
+ /**
577
+ * Score variance across validators for recent agent versions
578
+ * Find agent versions with high score variance across validators.
579
+ */
580
+ export const getAgentVersionVariance = <ThrowOnError extends boolean = false>(options?: OptionsLegacyParser<GetAgentVersionVarianceData, ThrowOnError>) => {
581
+ return (options?.client ?? client).get<GetAgentVersionVarianceResponse, GetAgentVersionVarianceError, ThrowOnError>({
582
+ ...options,
583
+ url: '/v1/admin/analytics/agent-version-variance'
584
+ });
563
585
  };
@@ -384,6 +384,24 @@ export type AgentVersionPublic = {
384
384
  latest_final_score?: (number | null);
385
385
  };
386
386
 
387
+ /**
388
+ * Per-validator score for an agent version.
389
+ */
390
+ export type AgentVersionScoreEntry = {
391
+ /**
392
+ * Validator hotkey
393
+ */
394
+ validator_hotkey: string;
395
+ /**
396
+ * Score from this validator
397
+ */
398
+ score: number;
399
+ /**
400
+ * Evaluation run ID
401
+ */
402
+ run_id: string;
403
+ };
404
+
387
405
  /**
388
406
  * State of an agent version evaluation.
389
407
  */
@@ -457,6 +475,66 @@ export type AgentVersionStatus = {
457
475
  } | null);
458
476
  };
459
477
 
478
+ /**
479
+ * Score variance across validators for a single agent version.
480
+ */
481
+ export type AgentVersionVariance = {
482
+ /**
483
+ * Agent version ID
484
+ */
485
+ agent_version_id: string;
486
+ /**
487
+ * Agent name
488
+ */
489
+ agent_name: string;
490
+ /**
491
+ * Miner hotkey
492
+ */
493
+ miner_hotkey: string;
494
+ /**
495
+ * Number of validators that scored this version
496
+ */
497
+ validator_count: number;
498
+ /**
499
+ * Mean score
500
+ */
501
+ avg_score: number;
502
+ /**
503
+ * Minimum score
504
+ */
505
+ min_score: number;
506
+ /**
507
+ * Maximum score
508
+ */
509
+ max_score: number;
510
+ /**
511
+ * max_score - min_score
512
+ */
513
+ spread: number;
514
+ /**
515
+ * True if spread exceeds threshold (default 10%)
516
+ */
517
+ is_high_variance: boolean;
518
+ /**
519
+ * Individual validator scores
520
+ */
521
+ per_validator: Array<AgentVersionScoreEntry>;
522
+ };
523
+
524
+ /**
525
+ * Response for agent version score variance analytics.
526
+ */
527
+ export type AgentVersionVarianceResponse = {
528
+ /**
529
+ * Per-version variance data
530
+ */
531
+ agent_versions: Array<AgentVersionVariance>;
532
+ /**
533
+ * Spread threshold used for flagging
534
+ */
535
+ variance_threshold: number;
536
+ };
537
+
460
538
  /**
461
539
  * 409 - Resource is already invalidated.
462
540
  */
@@ -962,6 +1040,18 @@ export type EvaluationRunDetail = {
962
1040
  * Validator-reported failure reason
963
1041
  */
964
1042
  failure_reason?: (string | null);
1043
+ /**
1044
+ * Whether run is included in aggregate scoring
1045
+ */
1046
+ is_included?: boolean;
1047
+ /**
1048
+ * When run was invalidated
1049
+ */
1050
+ invalidated_at?: (string | null);
1051
+ /**
1052
+ * Reason for invalidation
1053
+ */
1054
+ invalidation_reason?: (string | null);
965
1055
  };
966
1056
 
967
1057
  /**
@@ -2181,6 +2271,66 @@ export type ValidatorPublic = {
2181
2271
  identity_description?: (string | null);
2182
2272
  };
2183
2273
 
2274
+ /**
2275
+ * Response for validator scoring analytics.
2276
+ */
2277
+ export type ValidatorScoresResponse = {
2278
+ /**
2279
+ * Per-validator summaries
2280
+ */
2281
+ validators: Array<ValidatorScoreSummary>;
2282
+ /**
2283
+ * Global average score across all validators
2284
+ */
2285
+ global_avg_score: number;
2286
+ /**
2287
+ * Global standard deviation
2288
+ */
2289
+ global_stddev: number;
2290
+ };
2291
+
2292
+ /**
2293
+ * Aggregated scoring stats for a single validator.
2294
+ */
2295
+ export type ValidatorScoreSummary = {
2296
+ /**
2297
+ * Validator hotkey
2298
+ */
2299
+ validator_hotkey: string;
2300
+ /**
2301
+ * Total completed runs
2302
+ */
2303
+ total_runs: number;
2304
+ /**
2305
+ * Mean score across runs
2306
+ */
2307
+ avg_score: number;
2308
+ /**
2309
+ * Median score
2310
+ */
2311
+ median_score: number;
2312
+ /**
2313
+ * Standard deviation of scores
2314
+ */
2315
+ stddev_score: number;
2316
+ /**
2317
+ * Minimum score
2318
+ */
2319
+ min_score: number;
2320
+ /**
2321
+ * Maximum score
2322
+ */
2323
+ max_score: number;
2324
+ /**
2325
+ * Percentage deviation from the global average (negative = below)
2326
+ */
2327
+ deviation_from_global: number;
2328
+ /**
2329
+ * True if deviation exceeds 1.5 standard deviations from global mean
2330
+ */
2331
+ is_outlier: boolean;
2332
+ };
2333
+
2184
2334
  /**
2185
2335
  * Status of a validator.
2186
2336
  */
@@ -2852,4 +3002,54 @@ export type ListEvaluationRunsData = {
2852
3002
 
2853
3003
  export type ListEvaluationRunsResponse = (AdminEvaluationRunsResponse);
2854
3004
 
2855
- export type ListEvaluationRunsError = (HTTPValidationError);
3005
+ export type ListEvaluationRunsError = (HTTPValidationError);
3006
+
3007
+ export type GetValidatorScoresData = {
3008
+ query?: {
3009
+ /**
3010
+ * Only runs after this time
3011
+ */
3012
+ since?: (string | null);
3013
+ /**
3014
+ * Suite ID (defaults to active suite)
3015
+ */
3016
+ suite_id?: (number | null);
3017
+ /**
3018
+ * Only runs before this time
3019
+ */
3020
+ until?: (string | null);
3021
+ };
3022
+ };
3023
+
3024
+ export type GetValidatorScoresResponse = (ValidatorScoresResponse);
3025
+
3026
+ export type GetValidatorScoresError = (HTTPValidationError);
3027
+
3028
+ export type GetAgentVersionVarianceData = {
3029
+ query?: {
3030
+ /**
3031
+ * Number of agent versions
3032
+ */
3033
+ limit?: number;
3034
+ /**
3035
+ * Only versions after this time
3036
+ */
3037
+ since?: (string | null);
3038
+ /**
3039
+ * Suite ID (defaults to active suite)
3040
+ */
3041
+ suite_id?: (number | null);
3042
+ /**
3043
+ * Only versions before this time
3044
+ */
3045
+ until?: (string | null);
3046
+ /**
3047
+ * Spread threshold for flagging
3048
+ */
3049
+ variance_threshold?: number;
3050
+ };
3051
+ };
3052
+
3053
+ export type GetAgentVersionVarianceResponse = (AgentVersionVarianceResponse);
3054
+
3055
+ export type GetAgentVersionVarianceError = (HTTPValidationError);