@evalops/sdk-ts 0.1.101 → 0.1.103

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -52,12 +52,216 @@ export type LLMRubricAssertion = Message<"fermata.v1.LLMRubricAssertion"> & {
52
52
  * @generated from field: optional int32 min_judge_validation_count = 10;
53
53
  */
54
54
  minJudgeValidationCount?: number | undefined;
55
+ /**
56
+ * @generated from field: string rubric_version = 11;
57
+ */
58
+ rubricVersion: string;
59
+ /**
60
+ * @generated from field: string calibration_cohort = 12;
61
+ */
62
+ calibrationCohort: string;
63
+ /**
64
+ * @generated from field: bool advisory_only = 13;
65
+ */
66
+ advisoryOnly: boolean;
55
67
  };
56
68
  /**
57
69
  * Describes the message fermata.v1.LLMRubricAssertion.
58
70
  * Use `create(LLMRubricAssertionSchema)` to create a new message.
59
71
  */
60
72
  export declare const LLMRubricAssertionSchema: GenMessage<LLMRubricAssertion>;
73
+ /**
74
+ * @generated from message fermata.v1.LLMPairwiseRubricAssertion
75
+ */
76
+ export type LLMPairwiseRubricAssertion = Message<"fermata.v1.LLMPairwiseRubricAssertion"> & {
77
+ /**
78
+ * @generated from field: string judge_id = 1;
79
+ */
80
+ judgeId: string;
81
+ /**
82
+ * @generated from field: string verifier_judge_id = 2;
83
+ */
84
+ verifierJudgeId: string;
85
+ /**
86
+ * @generated from field: string rubric = 3;
87
+ */
88
+ rubric: string;
89
+ /**
90
+ * @generated from field: string baseline_label = 4;
91
+ */
92
+ baselineLabel: string;
93
+ /**
94
+ * @generated from field: string candidate_label = 5;
95
+ */
96
+ candidateLabel: string;
97
+ /**
98
+ * @generated from field: optional double min_score = 6;
99
+ */
100
+ minScore?: number | undefined;
101
+ /**
102
+ * @generated from field: int32 repeat = 7;
103
+ */
104
+ repeat: number;
105
+ /**
106
+ * @generated from field: int32 quorum = 8;
107
+ */
108
+ quorum: number;
109
+ /**
110
+ * @generated from field: bool record_judge_validation = 9;
111
+ */
112
+ recordJudgeValidation: boolean;
113
+ /**
114
+ * @generated from field: bool require_calibrated_judge = 10;
115
+ */
116
+ requireCalibratedJudge: boolean;
117
+ /**
118
+ * @generated from field: optional double min_judge_validation_accuracy = 11;
119
+ */
120
+ minJudgeValidationAccuracy?: number | undefined;
121
+ /**
122
+ * @generated from field: optional int32 min_judge_validation_count = 12;
123
+ */
124
+ minJudgeValidationCount?: number | undefined;
125
+ /**
126
+ * @generated from field: string rubric_version = 13;
127
+ */
128
+ rubricVersion: string;
129
+ /**
130
+ * @generated from field: string calibration_cohort = 14;
131
+ */
132
+ calibrationCohort: string;
133
+ /**
134
+ * @generated from field: bool advisory_only = 15;
135
+ */
136
+ advisoryOnly: boolean;
137
+ };
138
+ /**
139
+ * Describes the message fermata.v1.LLMPairwiseRubricAssertion.
140
+ * Use `create(LLMPairwiseRubricAssertionSchema)` to create a new message.
141
+ */
142
+ export declare const LLMPairwiseRubricAssertionSchema: GenMessage<LLMPairwiseRubricAssertion>;
143
+ /**
144
+ * @generated from message fermata.v1.AgentTrajectoryStatusExpectation
145
+ */
146
+ export type AgentTrajectoryStatusExpectation = Message<"fermata.v1.AgentTrajectoryStatusExpectation"> & {
147
+ /**
148
+ * @generated from field: string id = 1;
149
+ */
150
+ id: string;
151
+ /**
152
+ * @generated from field: string status = 2;
153
+ */
154
+ status: string;
155
+ };
156
+ /**
157
+ * Describes the message fermata.v1.AgentTrajectoryStatusExpectation.
158
+ * Use `create(AgentTrajectoryStatusExpectationSchema)` to create a new message.
159
+ */
160
+ export declare const AgentTrajectoryStatusExpectationSchema: GenMessage<AgentTrajectoryStatusExpectation>;
161
+ /**
162
+ * @generated from message fermata.v1.AgentTrajectoryStateExpectation
163
+ */
164
+ export type AgentTrajectoryStateExpectation = Message<"fermata.v1.AgentTrajectoryStateExpectation"> & {
165
+ /**
166
+ * @generated from field: string path = 1;
167
+ */
168
+ path: string;
169
+ /**
170
+ * @generated from field: string value_json = 2;
171
+ */
172
+ valueJson: string;
173
+ };
174
+ /**
175
+ * Describes the message fermata.v1.AgentTrajectoryStateExpectation.
176
+ * Use `create(AgentTrajectoryStateExpectationSchema)` to create a new message.
177
+ */
178
+ export declare const AgentTrajectoryStateExpectationSchema: GenMessage<AgentTrajectoryStateExpectation>;
179
+ /**
180
+ * @generated from message fermata.v1.AgentTrajectoryAssertion
181
+ */
182
+ export type AgentTrajectoryAssertion = Message<"fermata.v1.AgentTrajectoryAssertion"> & {
183
+ /**
184
+ * @generated from field: repeated string required_tools = 1;
185
+ */
186
+ requiredTools: string[];
187
+ /**
188
+ * @generated from field: repeated string forbidden_tools = 2;
189
+ */
190
+ forbiddenTools: string[];
191
+ /**
192
+ * @generated from field: repeated string required_events = 3;
193
+ */
194
+ requiredEvents: string[];
195
+ /**
196
+ * @generated from field: repeated string forbidden_events = 4;
197
+ */
198
+ forbiddenEvents: string[];
199
+ /**
200
+ * @generated from field: repeated fermata.v1.AgentTrajectoryStatusExpectation required_assertion_statuses = 5;
201
+ */
202
+ requiredAssertionStatuses: AgentTrajectoryStatusExpectation[];
203
+ /**
204
+ * @generated from field: repeated fermata.v1.AgentTrajectoryStateExpectation required_state_writes = 6;
205
+ */
206
+ requiredStateWrites: AgentTrajectoryStateExpectation[];
207
+ /**
208
+ * @generated from field: repeated fermata.v1.AgentTrajectoryStateExpectation forbidden_state_writes = 7;
209
+ */
210
+ forbiddenStateWrites: AgentTrajectoryStateExpectation[];
211
+ /**
212
+ * @generated from field: optional int32 max_events = 8;
213
+ */
214
+ maxEvents?: number | undefined;
215
+ /**
216
+ * @generated from field: optional int32 max_tool_calls = 9;
217
+ */
218
+ maxToolCalls?: number | undefined;
219
+ /**
220
+ * @generated from field: optional int32 max_replay_deltas = 10;
221
+ */
222
+ maxReplayDeltas?: number | undefined;
223
+ /**
224
+ * @generated from field: optional int32 max_score_failures = 11;
225
+ */
226
+ maxScoreFailures?: number | undefined;
227
+ /**
228
+ * @generated from field: optional int32 max_score_warnings = 12;
229
+ */
230
+ maxScoreWarnings?: number | undefined;
231
+ /**
232
+ * @generated from field: optional int64 max_latency_ms = 13;
233
+ */
234
+ maxLatencyMs?: bigint | undefined;
235
+ /**
236
+ * @generated from field: optional int64 max_cost_micros = 14;
237
+ */
238
+ maxCostMicros?: bigint | undefined;
239
+ /**
240
+ * @generated from field: optional int32 max_retries = 15;
241
+ */
242
+ maxRetries?: number | undefined;
243
+ /**
244
+ * @generated from field: bool require_idempotent_replay = 16;
245
+ */
246
+ requireIdempotentReplay: boolean;
247
+ /**
248
+ * @generated from field: bool forbid_duplicate_external_actions = 17;
249
+ */
250
+ forbidDuplicateExternalActions: boolean;
251
+ /**
252
+ * @generated from field: repeated string forbidden_external_actions = 18;
253
+ */
254
+ forbiddenExternalActions: string[];
255
+ /**
256
+ * @generated from field: repeated string required_trace_join_keys = 19;
257
+ */
258
+ requiredTraceJoinKeys: string[];
259
+ };
260
+ /**
261
+ * Describes the message fermata.v1.AgentTrajectoryAssertion.
262
+ * Use `create(AgentTrajectoryAssertionSchema)` to create a new message.
263
+ */
264
+ export declare const AgentTrajectoryAssertionSchema: GenMessage<AgentTrajectoryAssertion>;
61
265
  /**
62
266
  * @generated from message fermata.v1.TestSuite
63
267
  */
@@ -170,6 +374,14 @@ export type Assertion = Message<"fermata.v1.Assertion"> & {
170
374
  * @generated from field: fermata.v1.LLMRubricAssertion llm_rubric = 7;
171
375
  */
172
376
  llmRubric?: LLMRubricAssertion | undefined;
377
+ /**
378
+ * @generated from field: fermata.v1.LLMPairwiseRubricAssertion llm_pairwise_rubric = 8;
379
+ */
380
+ llmPairwiseRubric?: LLMPairwiseRubricAssertion | undefined;
381
+ /**
382
+ * @generated from field: fermata.v1.AgentTrajectoryAssertion agent_trajectory = 9;
383
+ */
384
+ agentTrajectory?: AgentTrajectoryAssertion | undefined;
173
385
  };
174
386
  /**
175
387
  * Describes the message fermata.v1.Assertion.
@@ -1310,6 +1522,240 @@ export type ListTestSuitesResponse = Message<"fermata.v1.ListTestSuitesResponse"
1310
1522
  * Use `create(ListTestSuitesResponseSchema)` to create a new message.
1311
1523
  */
1312
1524
  export declare const ListTestSuitesResponseSchema: GenMessage<ListTestSuitesResponse>;
1525
+ /**
1526
+ * @generated from message fermata.v1.EvaluationPackJudgeContract
1527
+ */
1528
+ export type EvaluationPackJudgeContract = Message<"fermata.v1.EvaluationPackJudgeContract"> & {
1529
+ /**
1530
+ * @generated from field: string judge_id = 1;
1531
+ */
1532
+ judgeId: string;
1533
+ /**
1534
+ * @generated from field: string model = 2;
1535
+ */
1536
+ model: string;
1537
+ /**
1538
+ * @generated from field: string rubric_version = 3;
1539
+ */
1540
+ rubricVersion: string;
1541
+ /**
1542
+ * @generated from field: string calibration_cohort = 4;
1543
+ */
1544
+ calibrationCohort: string;
1545
+ /**
1546
+ * @generated from field: optional double max_disagreement_rate = 5;
1547
+ */
1548
+ maxDisagreementRate?: number | undefined;
1549
+ /**
1550
+ * @generated from field: optional double min_validation_accuracy = 6;
1551
+ */
1552
+ minValidationAccuracy?: number | undefined;
1553
+ /**
1554
+ * @generated from field: optional int32 min_validation_count = 7;
1555
+ */
1556
+ minValidationCount?: number | undefined;
1557
+ };
1558
+ /**
1559
+ * Describes the message fermata.v1.EvaluationPackJudgeContract.
1560
+ * Use `create(EvaluationPackJudgeContractSchema)` to create a new message.
1561
+ */
1562
+ export declare const EvaluationPackJudgeContractSchema: GenMessage<EvaluationPackJudgeContract>;
1563
+ /**
1564
+ * @generated from message fermata.v1.ProductionTracePromotionPolicy
1565
+ */
1566
+ export type ProductionTracePromotionPolicy = Message<"fermata.v1.ProductionTracePromotionPolicy"> & {
1567
+ /**
1568
+ * @generated from field: bool require_quality_annotations = 1;
1569
+ */
1570
+ requireQualityAnnotations: boolean;
1571
+ /**
1572
+ * @generated from field: bool failed_annotations_only = 2;
1573
+ */
1574
+ failedAnnotationsOnly: boolean;
1575
+ /**
1576
+ * @generated from field: int32 max_cases = 3;
1577
+ */
1578
+ maxCases: number;
1579
+ /**
1580
+ * @generated from field: repeated string required_tags = 4;
1581
+ */
1582
+ requiredTags: string[];
1583
+ /**
1584
+ * @generated from field: repeated string excluded_tags = 5;
1585
+ */
1586
+ excludedTags: string[];
1587
+ };
1588
+ /**
1589
+ * Describes the message fermata.v1.ProductionTracePromotionPolicy.
1590
+ * Use `create(ProductionTracePromotionPolicySchema)` to create a new message.
1591
+ */
1592
+ export declare const ProductionTracePromotionPolicySchema: GenMessage<ProductionTracePromotionPolicy>;
1593
+ /**
1594
+ * @generated from message fermata.v1.EvaluationPack
1595
+ */
1596
+ export type EvaluationPack = Message<"fermata.v1.EvaluationPack"> & {
1597
+ /**
1598
+ * @generated from field: string id = 1;
1599
+ */
1600
+ id: string;
1601
+ /**
1602
+ * @generated from field: string service = 2;
1603
+ */
1604
+ service: string;
1605
+ /**
1606
+ * @generated from field: string name = 3;
1607
+ */
1608
+ name: string;
1609
+ /**
1610
+ * @generated from field: string description = 4;
1611
+ */
1612
+ description: string;
1613
+ /**
1614
+ * @generated from field: string version = 5;
1615
+ */
1616
+ version: string;
1617
+ /**
1618
+ * @generated from field: repeated fermata.v1.EvaluationPackJudgeContract judge_contracts = 6;
1619
+ */
1620
+ judgeContracts: EvaluationPackJudgeContract[];
1621
+ /**
1622
+ * @generated from field: fermata.v1.ProductionTracePromotionPolicy trace_promotion_policy = 7;
1623
+ */
1624
+ tracePromotionPolicy?: ProductionTracePromotionPolicy | undefined;
1625
+ /**
1626
+ * @generated from field: fermata.v1.QualityGateConfig quality_gate = 8;
1627
+ */
1628
+ qualityGate?: QualityGateConfig | undefined;
1629
+ /**
1630
+ * @generated from field: repeated string source_suite_ids = 9;
1631
+ */
1632
+ sourceSuiteIds: string[];
1633
+ /**
1634
+ * @generated from field: repeated string trace_ids = 10;
1635
+ */
1636
+ traceIds: string[];
1637
+ /**
1638
+ * @generated from field: google.protobuf.Struct metadata = 11;
1639
+ */
1640
+ metadata?: JsonObject | undefined;
1641
+ /**
1642
+ * @generated from field: google.protobuf.Timestamp updated_at = 12;
1643
+ */
1644
+ updatedAt?: Timestamp | undefined;
1645
+ };
1646
+ /**
1647
+ * Describes the message fermata.v1.EvaluationPack.
1648
+ * Use `create(EvaluationPackSchema)` to create a new message.
1649
+ */
1650
+ export declare const EvaluationPackSchema: GenMessage<EvaluationPack>;
1651
+ /**
1652
+ * @generated from message fermata.v1.ListEvaluationPacksRequest
1653
+ */
1654
+ export type ListEvaluationPacksRequest = Message<"fermata.v1.ListEvaluationPacksRequest"> & {
1655
+ /**
1656
+ * @generated from field: string organization_id = 1;
1657
+ */
1658
+ organizationId: string;
1659
+ /**
1660
+ * @generated from field: string workspace_id = 2;
1661
+ */
1662
+ workspaceId: string;
1663
+ /**
1664
+ * @generated from field: string service = 3;
1665
+ */
1666
+ service: string;
1667
+ };
1668
+ /**
1669
+ * Describes the message fermata.v1.ListEvaluationPacksRequest.
1670
+ * Use `create(ListEvaluationPacksRequestSchema)` to create a new message.
1671
+ */
1672
+ export declare const ListEvaluationPacksRequestSchema: GenMessage<ListEvaluationPacksRequest>;
1673
+ /**
1674
+ * @generated from message fermata.v1.ListEvaluationPacksResponse
1675
+ */
1676
+ export type ListEvaluationPacksResponse = Message<"fermata.v1.ListEvaluationPacksResponse"> & {
1677
+ /**
1678
+ * @generated from field: repeated fermata.v1.EvaluationPack packs = 1;
1679
+ */
1680
+ packs: EvaluationPack[];
1681
+ /**
1682
+ * @generated from field: int32 total_count = 2;
1683
+ */
1684
+ totalCount: number;
1685
+ };
1686
+ /**
1687
+ * Describes the message fermata.v1.ListEvaluationPacksResponse.
1688
+ * Use `create(ListEvaluationPacksResponseSchema)` to create a new message.
1689
+ */
1690
+ export declare const ListEvaluationPacksResponseSchema: GenMessage<ListEvaluationPacksResponse>;
1691
+ /**
1692
+ * @generated from message fermata.v1.BuildEvaluationPackRequest
1693
+ */
1694
+ export type BuildEvaluationPackRequest = Message<"fermata.v1.BuildEvaluationPackRequest"> & {
1695
+ /**
1696
+ * @generated from field: string organization_id = 1;
1697
+ */
1698
+ organizationId: string;
1699
+ /**
1700
+ * @generated from field: string workspace_id = 2;
1701
+ */
1702
+ workspaceId: string;
1703
+ /**
1704
+ * @generated from field: string pack_id = 3;
1705
+ */
1706
+ packId: string;
1707
+ /**
1708
+ * @generated from field: string suite_name = 4;
1709
+ */
1710
+ suiteName: string;
1711
+ /**
1712
+ * @generated from field: string created_by = 5;
1713
+ */
1714
+ createdBy: string;
1715
+ /**
1716
+ * @generated from field: repeated string source_suite_ids = 6;
1717
+ */
1718
+ sourceSuiteIds: string[];
1719
+ /**
1720
+ * @generated from field: repeated string trace_ids = 7;
1721
+ */
1722
+ traceIds: string[];
1723
+ /**
1724
+ * @generated from field: int32 limit = 8;
1725
+ */
1726
+ limit: number;
1727
+ };
1728
+ /**
1729
+ * Describes the message fermata.v1.BuildEvaluationPackRequest.
1730
+ * Use `create(BuildEvaluationPackRequestSchema)` to create a new message.
1731
+ */
1732
+ export declare const BuildEvaluationPackRequestSchema: GenMessage<BuildEvaluationPackRequest>;
1733
+ /**
1734
+ * @generated from message fermata.v1.BuildEvaluationPackResponse
1735
+ */
1736
+ export type BuildEvaluationPackResponse = Message<"fermata.v1.BuildEvaluationPackResponse"> & {
1737
+ /**
1738
+ * @generated from field: fermata.v1.EvaluationPack pack = 1;
1739
+ */
1740
+ pack?: EvaluationPack | undefined;
1741
+ /**
1742
+ * @generated from field: fermata.v1.TestSuite suite = 2;
1743
+ */
1744
+ suite?: TestSuite | undefined;
1745
+ /**
1746
+ * @generated from field: fermata.v1.QualityGateConfig quality_gate = 3;
1747
+ */
1748
+ qualityGate?: QualityGateConfig | undefined;
1749
+ /**
1750
+ * @generated from field: fermata.v1.ProductionTracePromotionPolicy trace_promotion_policy = 4;
1751
+ */
1752
+ tracePromotionPolicy?: ProductionTracePromotionPolicy | undefined;
1753
+ };
1754
+ /**
1755
+ * Describes the message fermata.v1.BuildEvaluationPackResponse.
1756
+ * Use `create(BuildEvaluationPackResponseSchema)` to create a new message.
1757
+ */
1758
+ export declare const BuildEvaluationPackResponseSchema: GenMessage<BuildEvaluationPackResponse>;
1313
1759
  /**
1314
1760
  * @generated from message fermata.v1.CreateScenarioRequest
1315
1761
  */
@@ -3992,6 +4438,38 @@ export type ListScenarioRunsRequest = Message<"fermata.v1.ListScenarioRunsReques
3992
4438
  * @generated from field: int32 offset = 5;
3993
4439
  */
3994
4440
  offset: number;
4441
+ /**
4442
+ * @generated from field: fermata.v1.ScenarioRunStatus status = 6;
4443
+ */
4444
+ status: ScenarioRunStatus;
4445
+ /**
4446
+ * @generated from field: string source = 7;
4447
+ */
4448
+ source: string;
4449
+ /**
4450
+ * @generated from field: string evaluation_id = 8;
4451
+ */
4452
+ evaluationId: string;
4453
+ /**
4454
+ * @generated from field: string suite_id = 9;
4455
+ */
4456
+ suiteId: string;
4457
+ /**
4458
+ * @generated from field: string test_case_id = 10;
4459
+ */
4460
+ testCaseId: string;
4461
+ /**
4462
+ * @generated from field: string candidate_id = 11;
4463
+ */
4464
+ candidateId: string;
4465
+ /**
4466
+ * @generated from field: string candidate_label = 12;
4467
+ */
4468
+ candidateLabel: string;
4469
+ /**
4470
+ * @generated from field: string model = 13;
4471
+ */
4472
+ model: string;
3995
4473
  };
3996
4474
  /**
3997
4475
  * Describes the message fermata.v1.ListScenarioRunsRequest.
@@ -4706,6 +5184,10 @@ export type EvaluationComparison = Message<"fermata.v1.EvaluationComparison"> &
4706
5184
  * @generated from field: google.protobuf.Timestamp updated_at = 19;
4707
5185
  */
4708
5186
  updatedAt?: Timestamp | undefined;
5187
+ /**
5188
+ * @generated from field: google.protobuf.Struct metadata = 20;
5189
+ */
5190
+ metadata?: JsonObject | undefined;
4709
5191
  };
4710
5192
  /**
4711
5193
  * Describes the message fermata.v1.EvaluationComparison.
@@ -6150,6 +6632,18 @@ export type ProductionTraceRecord = Message<"fermata.v1.ProductionTraceRecord">
6150
6632
  * @generated from field: optional double provider_cost = 17;
6151
6633
  */
6152
6634
  providerCost?: number | undefined;
6635
+ /**
6636
+ * @generated from field: repeated string must_contain = 18;
6637
+ */
6638
+ mustContain: string[];
6639
+ /**
6640
+ * @generated from field: repeated string must_not_contain = 19;
6641
+ */
6642
+ mustNotContain: string[];
6643
+ /**
6644
+ * @generated from field: optional double min_similarity = 20;
6645
+ */
6646
+ minSimilarity?: number | undefined;
6153
6647
  };
6154
6648
  /**
6155
6649
  * Describes the message fermata.v1.ProductionTraceRecord.
@@ -6268,6 +6762,18 @@ export type BuildProductionTraceRegressionSuiteRequest = Message<"fermata.v1.Bui
6268
6762
  * @generated from field: repeated fermata.v1.ProductionTraceRecord traces = 6;
6269
6763
  */
6270
6764
  traces: ProductionTraceRecord[];
6765
+ /**
6766
+ * @generated from field: repeated string trace_ids = 7;
6767
+ */
6768
+ traceIds: string[];
6769
+ /**
6770
+ * @generated from field: bool require_quality_annotations = 8;
6771
+ */
6772
+ requireQualityAnnotations: boolean;
6773
+ /**
6774
+ * @generated from field: bool failed_annotations_only = 9;
6775
+ */
6776
+ failedAnnotationsOnly: boolean;
6271
6777
  };
6272
6778
  /**
6273
6779
  * Describes the message fermata.v1.BuildProductionTraceRegressionSuiteRequest.
@@ -6558,6 +7064,26 @@ export type CompareProductionTraceReplayRequest = Message<"fermata.v1.ComparePro
6558
7064
  * @generated from field: optional int32 max_diff_lines = 5;
6559
7065
  */
6560
7066
  maxDiffLines?: number | undefined;
7067
+ /**
7068
+ * @generated from field: bool persist_comparison_artifact = 6;
7069
+ */
7070
+ persistComparisonArtifact: boolean;
7071
+ /**
7072
+ * @generated from field: string artifact_name = 7;
7073
+ */
7074
+ artifactName: string;
7075
+ /**
7076
+ * @generated from field: string baseline_run_id = 8;
7077
+ */
7078
+ baselineRunId: string;
7079
+ /**
7080
+ * @generated from field: repeated string comparison_run_ids = 9;
7081
+ */
7082
+ comparisonRunIds: string[];
7083
+ /**
7084
+ * @generated from field: string created_by = 10;
7085
+ */
7086
+ createdBy: string;
6561
7087
  };
6562
7088
  /**
6563
7089
  * Describes the message fermata.v1.CompareProductionTraceReplayRequest.
@@ -6580,6 +7106,14 @@ export type CompareProductionTraceReplayResponse = Message<"fermata.v1.ComparePr
6580
7106
  * @generated from field: repeated fermata.v1.ProductionTraceReplayRegression regressions = 3;
6581
7107
  */
6582
7108
  regressions: ProductionTraceReplayRegression[];
7109
+ /**
7110
+ * @generated from field: string comparison_artifact_id = 4;
7111
+ */
7112
+ comparisonArtifactId: string;
7113
+ /**
7114
+ * @generated from field: fermata.v1.EvaluationComparison comparison_artifact = 5;
7115
+ */
7116
+ comparisonArtifact?: EvaluationComparison | undefined;
6583
7117
  };
6584
7118
  /**
6585
7119
  * Describes the message fermata.v1.CompareProductionTraceReplayResponse.
@@ -7316,6 +7850,110 @@ export type JudgeCalibrationCandidate = Message<"fermata.v1.JudgeCalibrationCand
7316
7850
  * Use `create(JudgeCalibrationCandidateSchema)` to create a new message.
7317
7851
  */
7318
7852
  export declare const JudgeCalibrationCandidateSchema: GenMessage<JudgeCalibrationCandidate>;
7853
+ /**
7854
+ * @generated from message fermata.v1.JudgeValidationSourceMetrics
7855
+ */
7856
+ export type JudgeValidationSourceMetrics = Message<"fermata.v1.JudgeValidationSourceMetrics"> & {
7857
+ /**
7858
+ * @generated from field: string source = 1;
7859
+ */
7860
+ source: string;
7861
+ /**
7862
+ * @generated from field: fermata.v1.JudgeValidationAccuracyMetrics accuracy = 2;
7863
+ */
7864
+ accuracy?: JudgeValidationAccuracyMetrics | undefined;
7865
+ /**
7866
+ * @generated from field: fermata.v1.JudgeValidationBiasMetrics bias = 3;
7867
+ */
7868
+ bias?: JudgeValidationBiasMetrics | undefined;
7869
+ /**
7870
+ * @generated from field: google.protobuf.Timestamp latest_validation = 4;
7871
+ */
7872
+ latestValidation?: Timestamp | undefined;
7873
+ };
7874
+ /**
7875
+ * Describes the message fermata.v1.JudgeValidationSourceMetrics.
7876
+ * Use `create(JudgeValidationSourceMetricsSchema)` to create a new message.
7877
+ */
7878
+ export declare const JudgeValidationSourceMetricsSchema: GenMessage<JudgeValidationSourceMetrics>;
7879
+ /**
7880
+ * @generated from message fermata.v1.JudgeCalibrationReport
7881
+ */
7882
+ export type JudgeCalibrationReport = Message<"fermata.v1.JudgeCalibrationReport"> & {
7883
+ /**
7884
+ * @generated from field: string organization_id = 1;
7885
+ */
7886
+ organizationId: string;
7887
+ /**
7888
+ * @generated from field: string judge_id = 2;
7889
+ */
7890
+ judgeId: string;
7891
+ /**
7892
+ * @generated from field: fermata.v1.JudgeCalibrationStatus status = 3;
7893
+ */
7894
+ status: JudgeCalibrationStatus;
7895
+ /**
7896
+ * @generated from field: bool passed = 4;
7897
+ */
7898
+ passed: boolean;
7899
+ /**
7900
+ * @generated from field: fermata.v1.JudgeValidationAccuracyMetrics accuracy = 5;
7901
+ */
7902
+ accuracy?: JudgeValidationAccuracyMetrics | undefined;
7903
+ /**
7904
+ * @generated from field: fermata.v1.JudgeValidationConfusionMatrix confusion_matrix = 6;
7905
+ */
7906
+ confusionMatrix?: JudgeValidationConfusionMatrix | undefined;
7907
+ /**
7908
+ * @generated from field: fermata.v1.JudgeValidationBiasMetrics bias = 7;
7909
+ */
7910
+ bias?: JudgeValidationBiasMetrics | undefined;
7911
+ /**
7912
+ * @generated from field: int32 min_validations = 8;
7913
+ */
7914
+ minValidations: number;
7915
+ /**
7916
+ * @generated from field: double min_accuracy = 9;
7917
+ */
7918
+ minAccuracy: number;
7919
+ /**
7920
+ * @generated from field: optional double max_absolute_score_drift = 10;
7921
+ */
7922
+ maxAbsoluteScoreDrift?: number | undefined;
7923
+ /**
7924
+ * @generated from field: int32 total_validated_records = 11;
7925
+ */
7926
+ totalValidatedRecords: number;
7927
+ /**
7928
+ * @generated from field: int32 scored_validation_count = 12;
7929
+ */
7930
+ scoredValidationCount: number;
7931
+ /**
7932
+ * @generated from field: int32 unscored_validation_count = 13;
7933
+ */
7934
+ unscoredValidationCount: number;
7935
+ /**
7936
+ * @generated from field: repeated string blocking_reasons = 14;
7937
+ */
7938
+ blockingReasons: string[];
7939
+ /**
7940
+ * @generated from field: repeated fermata.v1.JudgeValidationSourceMetrics source_metrics = 15;
7941
+ */
7942
+ sourceMetrics: JudgeValidationSourceMetrics[];
7943
+ /**
7944
+ * @generated from field: google.protobuf.Timestamp latest_validation = 16;
7945
+ */
7946
+ latestValidation?: Timestamp | undefined;
7947
+ /**
7948
+ * @generated from field: string recommended_action = 17;
7949
+ */
7950
+ recommendedAction: string;
7951
+ };
7952
+ /**
7953
+ * Describes the message fermata.v1.JudgeCalibrationReport.
7954
+ * Use `create(JudgeCalibrationReportSchema)` to create a new message.
7955
+ */
7956
+ export declare const JudgeCalibrationReportSchema: GenMessage<JudgeCalibrationReport>;
7319
7957
  /**
7320
7958
  * @generated from message fermata.v1.GetJudgeValidationAccuracyRequest
7321
7959
  */
@@ -7412,6 +8050,54 @@ export type GetJudgeValidationBiasResponse = Message<"fermata.v1.GetJudgeValidat
7412
8050
  * Use `create(GetJudgeValidationBiasResponseSchema)` to create a new message.
7413
8051
  */
7414
8052
  export declare const GetJudgeValidationBiasResponseSchema: GenMessage<GetJudgeValidationBiasResponse>;
8053
+ /**
8054
+ * @generated from message fermata.v1.GetJudgeCalibrationReportRequest
8055
+ */
8056
+ export type GetJudgeCalibrationReportRequest = Message<"fermata.v1.GetJudgeCalibrationReportRequest"> & {
8057
+ /**
8058
+ * @generated from field: string organization_id = 1;
8059
+ */
8060
+ organizationId: string;
8061
+ /**
8062
+ * @generated from field: string judge_id = 2;
8063
+ */
8064
+ judgeId: string;
8065
+ /**
8066
+ * @generated from field: int32 min_validations = 3;
8067
+ */
8068
+ minValidations: number;
8069
+ /**
8070
+ * @generated from field: optional double min_accuracy = 4;
8071
+ */
8072
+ minAccuracy?: number | undefined;
8073
+ /**
8074
+ * @generated from field: optional double max_absolute_score_drift = 5;
8075
+ */
8076
+ maxAbsoluteScoreDrift?: number | undefined;
8077
+ /**
8078
+ * @generated from field: bool include_source_breakdown = 6;
8079
+ */
8080
+ includeSourceBreakdown: boolean;
8081
+ };
8082
+ /**
8083
+ * Describes the message fermata.v1.GetJudgeCalibrationReportRequest.
8084
+ * Use `create(GetJudgeCalibrationReportRequestSchema)` to create a new message.
8085
+ */
8086
+ export declare const GetJudgeCalibrationReportRequestSchema: GenMessage<GetJudgeCalibrationReportRequest>;
8087
+ /**
8088
+ * @generated from message fermata.v1.GetJudgeCalibrationReportResponse
8089
+ */
8090
+ export type GetJudgeCalibrationReportResponse = Message<"fermata.v1.GetJudgeCalibrationReportResponse"> & {
8091
+ /**
8092
+ * @generated from field: fermata.v1.JudgeCalibrationReport report = 1;
8093
+ */
8094
+ report?: JudgeCalibrationReport | undefined;
8095
+ };
8096
+ /**
8097
+ * Describes the message fermata.v1.GetJudgeCalibrationReportResponse.
8098
+ * Use `create(GetJudgeCalibrationReportResponseSchema)` to create a new message.
8099
+ */
8100
+ export declare const GetJudgeCalibrationReportResponseSchema: GenMessage<GetJudgeCalibrationReportResponse>;
7415
8101
  /**
7416
8102
  * @generated from message fermata.v1.RecalibrateJudgeValidationRequest
7417
8103
  */
@@ -7664,6 +8350,22 @@ export type QualityGateConfig = Message<"fermata.v1.QualityGateConfig"> & {
7664
8350
  * @generated from field: optional bool block_on_judges_needing_calibration = 13;
7665
8351
  */
7666
8352
  blockOnJudgesNeedingCalibration?: boolean | undefined;
8353
+ /**
8354
+ * @generated from field: repeated string required_judge_models = 14;
8355
+ */
8356
+ requiredJudgeModels: string[];
8357
+ /**
8358
+ * @generated from field: repeated string required_rubric_versions = 15;
8359
+ */
8360
+ requiredRubricVersions: string[];
8361
+ /**
8362
+ * @generated from field: repeated string required_calibration_cohorts = 16;
8363
+ */
8364
+ requiredCalibrationCohorts: string[];
8365
+ /**
8366
+ * @generated from field: optional double max_judge_disagreement_rate = 17;
8367
+ */
8368
+ maxJudgeDisagreementRate?: number | undefined;
7667
8369
  };
7668
8370
  /**
7669
8371
  * Describes the message fermata.v1.QualityGateConfig.
@@ -8673,7 +9375,15 @@ export declare enum AssertionKind {
8673
9375
  /**
8674
9376
  * @generated from enum value: ASSERTION_KIND_LLM_RUBRIC = 23;
8675
9377
  */
8676
- LLM_RUBRIC = 23
9378
+ LLM_RUBRIC = 23,
9379
+ /**
9380
+ * @generated from enum value: ASSERTION_KIND_LLM_PAIRWISE_RUBRIC = 24;
9381
+ */
9382
+ LLM_PAIRWISE_RUBRIC = 24,
9383
+ /**
9384
+ * @generated from enum value: ASSERTION_KIND_AGENT_TRAJECTORY = 25;
9385
+ */
9386
+ AGENT_TRAJECTORY = 25
8677
9387
  }
8678
9388
  /**
8679
9389
  * Describes the enum fermata.v1.AssertionKind.
@@ -9026,6 +9736,35 @@ export declare enum AgentApprovalDecision {
9026
9736
  * Describes the enum fermata.v1.AgentApprovalDecision.
9027
9737
  */
9028
9738
  export declare const AgentApprovalDecisionSchema: GenEnum<AgentApprovalDecision>;
9739
+ /**
9740
+ * @generated from enum fermata.v1.JudgeCalibrationStatus
9741
+ */
9742
+ export declare enum JudgeCalibrationStatus {
9743
+ /**
9744
+ * @generated from enum value: JUDGE_CALIBRATION_STATUS_UNSPECIFIED = 0;
9745
+ */
9746
+ UNSPECIFIED = 0,
9747
+ /**
9748
+ * @generated from enum value: JUDGE_CALIBRATION_STATUS_READY = 1;
9749
+ */
9750
+ READY = 1,
9751
+ /**
9752
+ * @generated from enum value: JUDGE_CALIBRATION_STATUS_NEEDS_LABELS = 2;
9753
+ */
9754
+ NEEDS_LABELS = 2,
9755
+ /**
9756
+ * @generated from enum value: JUDGE_CALIBRATION_STATUS_NEEDS_ACCURACY = 3;
9757
+ */
9758
+ NEEDS_ACCURACY = 3,
9759
+ /**
9760
+ * @generated from enum value: JUDGE_CALIBRATION_STATUS_NEEDS_BIAS_REVIEW = 4;
9761
+ */
9762
+ NEEDS_BIAS_REVIEW = 4
9763
+ }
9764
+ /**
9765
+ * Describes the enum fermata.v1.JudgeCalibrationStatus.
9766
+ */
9767
+ export declare const JudgeCalibrationStatusSchema: GenEnum<JudgeCalibrationStatus>;
9029
9768
  /**
9030
9769
  * FermataService is the Go control-plane surface for migrated Fermata routes.
9031
9770
  *
@@ -9056,6 +9795,22 @@ export declare const FermataService: GenService<{
9056
9795
  input: typeof ListTestSuitesRequestSchema;
9057
9796
  output: typeof ListTestSuitesResponseSchema;
9058
9797
  };
9798
+ /**
9799
+ * @generated from rpc fermata.v1.FermataService.ListEvaluationPacks
9800
+ */
9801
+ listEvaluationPacks: {
9802
+ methodKind: "unary";
9803
+ input: typeof ListEvaluationPacksRequestSchema;
9804
+ output: typeof ListEvaluationPacksResponseSchema;
9805
+ };
9806
+ /**
9807
+ * @generated from rpc fermata.v1.FermataService.BuildEvaluationPack
9808
+ */
9809
+ buildEvaluationPack: {
9810
+ methodKind: "unary";
9811
+ input: typeof BuildEvaluationPackRequestSchema;
9812
+ output: typeof BuildEvaluationPackResponseSchema;
9813
+ };
9059
9814
  /**
9060
9815
  * @generated from rpc fermata.v1.FermataService.CreateScenario
9061
9816
  */
@@ -9640,6 +10395,14 @@ export declare const FermataService: GenService<{
9640
10395
  input: typeof GetJudgeValidationBiasRequestSchema;
9641
10396
  output: typeof GetJudgeValidationBiasResponseSchema;
9642
10397
  };
10398
+ /**
10399
+ * @generated from rpc fermata.v1.FermataService.GetJudgeCalibrationReport
10400
+ */
10401
+ getJudgeCalibrationReport: {
10402
+ methodKind: "unary";
10403
+ input: typeof GetJudgeCalibrationReportRequestSchema;
10404
+ output: typeof GetJudgeCalibrationReportResponseSchema;
10405
+ };
9643
10406
  /**
9644
10407
  * @generated from rpc fermata.v1.FermataService.RecalibrateJudgeValidation
9645
10408
  */