@evalops/sdk-ts 0.1.100 → 0.1.102

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,6 +8,260 @@ import type { JsonObject, Message } from "@bufbuild/protobuf";
8
8
  * Describes the file fermata/v1/fermata.proto.
9
9
  */
10
10
  export declare const file_fermata_v1_fermata: GenFile;
11
+ /**
12
+ * @generated from message fermata.v1.LLMRubricAssertion
13
+ */
14
+ export type LLMRubricAssertion = Message<"fermata.v1.LLMRubricAssertion"> & {
15
+ /**
16
+ * @generated from field: string judge_id = 1;
17
+ */
18
+ judgeId: string;
19
+ /**
20
+ * @generated from field: string verifier_judge_id = 2;
21
+ */
22
+ verifierJudgeId: string;
23
+ /**
24
+ * @generated from field: string rubric = 3;
25
+ */
26
+ rubric: string;
27
+ /**
28
+ * @generated from field: optional double min_score = 4;
29
+ */
30
+ minScore?: number | undefined;
31
+ /**
32
+ * @generated from field: int32 repeat = 5;
33
+ */
34
+ repeat: number;
35
+ /**
36
+ * @generated from field: int32 quorum = 6;
37
+ */
38
+ quorum: number;
39
+ /**
40
+ * @generated from field: bool record_judge_validation = 7;
41
+ */
42
+ recordJudgeValidation: boolean;
43
+ /**
44
+ * @generated from field: bool require_calibrated_judge = 8;
45
+ */
46
+ requireCalibratedJudge: boolean;
47
+ /**
48
+ * @generated from field: optional double min_judge_validation_accuracy = 9;
49
+ */
50
+ minJudgeValidationAccuracy?: number | undefined;
51
+ /**
52
+ * @generated from field: optional int32 min_judge_validation_count = 10;
53
+ */
54
+ minJudgeValidationCount?: number | undefined;
55
+ /**
56
+ * @generated from field: string rubric_version = 11;
57
+ */
58
+ rubricVersion: string;
59
+ /**
60
+ * @generated from field: string calibration_cohort = 12;
61
+ */
62
+ calibrationCohort: string;
63
+ /**
64
+ * @generated from field: bool advisory_only = 13;
65
+ */
66
+ advisoryOnly: boolean;
67
+ };
68
+ /**
69
+ * Describes the message fermata.v1.LLMRubricAssertion.
70
+ * Use `create(LLMRubricAssertionSchema)` to create a new message.
71
+ */
72
+ export declare const LLMRubricAssertionSchema: GenMessage<LLMRubricAssertion>;
73
+ /**
74
+ * @generated from message fermata.v1.LLMPairwiseRubricAssertion
75
+ */
76
+ export type LLMPairwiseRubricAssertion = Message<"fermata.v1.LLMPairwiseRubricAssertion"> & {
77
+ /**
78
+ * @generated from field: string judge_id = 1;
79
+ */
80
+ judgeId: string;
81
+ /**
82
+ * @generated from field: string verifier_judge_id = 2;
83
+ */
84
+ verifierJudgeId: string;
85
+ /**
86
+ * @generated from field: string rubric = 3;
87
+ */
88
+ rubric: string;
89
+ /**
90
+ * @generated from field: string baseline_label = 4;
91
+ */
92
+ baselineLabel: string;
93
+ /**
94
+ * @generated from field: string candidate_label = 5;
95
+ */
96
+ candidateLabel: string;
97
+ /**
98
+ * @generated from field: optional double min_score = 6;
99
+ */
100
+ minScore?: number | undefined;
101
+ /**
102
+ * @generated from field: int32 repeat = 7;
103
+ */
104
+ repeat: number;
105
+ /**
106
+ * @generated from field: int32 quorum = 8;
107
+ */
108
+ quorum: number;
109
+ /**
110
+ * @generated from field: bool record_judge_validation = 9;
111
+ */
112
+ recordJudgeValidation: boolean;
113
+ /**
114
+ * @generated from field: bool require_calibrated_judge = 10;
115
+ */
116
+ requireCalibratedJudge: boolean;
117
+ /**
118
+ * @generated from field: optional double min_judge_validation_accuracy = 11;
119
+ */
120
+ minJudgeValidationAccuracy?: number | undefined;
121
+ /**
122
+ * @generated from field: optional int32 min_judge_validation_count = 12;
123
+ */
124
+ minJudgeValidationCount?: number | undefined;
125
+ /**
126
+ * @generated from field: string rubric_version = 13;
127
+ */
128
+ rubricVersion: string;
129
+ /**
130
+ * @generated from field: string calibration_cohort = 14;
131
+ */
132
+ calibrationCohort: string;
133
+ /**
134
+ * @generated from field: bool advisory_only = 15;
135
+ */
136
+ advisoryOnly: boolean;
137
+ };
138
+ /**
139
+ * Describes the message fermata.v1.LLMPairwiseRubricAssertion.
140
+ * Use `create(LLMPairwiseRubricAssertionSchema)` to create a new message.
141
+ */
142
+ export declare const LLMPairwiseRubricAssertionSchema: GenMessage<LLMPairwiseRubricAssertion>;
143
+ /**
144
+ * @generated from message fermata.v1.AgentTrajectoryStatusExpectation
145
+ */
146
+ export type AgentTrajectoryStatusExpectation = Message<"fermata.v1.AgentTrajectoryStatusExpectation"> & {
147
+ /**
148
+ * @generated from field: string id = 1;
149
+ */
150
+ id: string;
151
+ /**
152
+ * @generated from field: string status = 2;
153
+ */
154
+ status: string;
155
+ };
156
+ /**
157
+ * Describes the message fermata.v1.AgentTrajectoryStatusExpectation.
158
+ * Use `create(AgentTrajectoryStatusExpectationSchema)` to create a new message.
159
+ */
160
+ export declare const AgentTrajectoryStatusExpectationSchema: GenMessage<AgentTrajectoryStatusExpectation>;
161
+ /**
162
+ * @generated from message fermata.v1.AgentTrajectoryStateExpectation
163
+ */
164
+ export type AgentTrajectoryStateExpectation = Message<"fermata.v1.AgentTrajectoryStateExpectation"> & {
165
+ /**
166
+ * @generated from field: string path = 1;
167
+ */
168
+ path: string;
169
+ /**
170
+ * @generated from field: string value_json = 2;
171
+ */
172
+ valueJson: string;
173
+ };
174
+ /**
175
+ * Describes the message fermata.v1.AgentTrajectoryStateExpectation.
176
+ * Use `create(AgentTrajectoryStateExpectationSchema)` to create a new message.
177
+ */
178
+ export declare const AgentTrajectoryStateExpectationSchema: GenMessage<AgentTrajectoryStateExpectation>;
179
+ /**
180
+ * @generated from message fermata.v1.AgentTrajectoryAssertion
181
+ */
182
+ export type AgentTrajectoryAssertion = Message<"fermata.v1.AgentTrajectoryAssertion"> & {
183
+ /**
184
+ * @generated from field: repeated string required_tools = 1;
185
+ */
186
+ requiredTools: string[];
187
+ /**
188
+ * @generated from field: repeated string forbidden_tools = 2;
189
+ */
190
+ forbiddenTools: string[];
191
+ /**
192
+ * @generated from field: repeated string required_events = 3;
193
+ */
194
+ requiredEvents: string[];
195
+ /**
196
+ * @generated from field: repeated string forbidden_events = 4;
197
+ */
198
+ forbiddenEvents: string[];
199
+ /**
200
+ * @generated from field: repeated fermata.v1.AgentTrajectoryStatusExpectation required_assertion_statuses = 5;
201
+ */
202
+ requiredAssertionStatuses: AgentTrajectoryStatusExpectation[];
203
+ /**
204
+ * @generated from field: repeated fermata.v1.AgentTrajectoryStateExpectation required_state_writes = 6;
205
+ */
206
+ requiredStateWrites: AgentTrajectoryStateExpectation[];
207
+ /**
208
+ * @generated from field: repeated fermata.v1.AgentTrajectoryStateExpectation forbidden_state_writes = 7;
209
+ */
210
+ forbiddenStateWrites: AgentTrajectoryStateExpectation[];
211
+ /**
212
+ * @generated from field: optional int32 max_events = 8;
213
+ */
214
+ maxEvents?: number | undefined;
215
+ /**
216
+ * @generated from field: optional int32 max_tool_calls = 9;
217
+ */
218
+ maxToolCalls?: number | undefined;
219
+ /**
220
+ * @generated from field: optional int32 max_replay_deltas = 10;
221
+ */
222
+ maxReplayDeltas?: number | undefined;
223
+ /**
224
+ * @generated from field: optional int32 max_score_failures = 11;
225
+ */
226
+ maxScoreFailures?: number | undefined;
227
+ /**
228
+ * @generated from field: optional int32 max_score_warnings = 12;
229
+ */
230
+ maxScoreWarnings?: number | undefined;
231
+ /**
232
+ * @generated from field: optional int64 max_latency_ms = 13;
233
+ */
234
+ maxLatencyMs?: bigint | undefined;
235
+ /**
236
+ * @generated from field: optional int64 max_cost_micros = 14;
237
+ */
238
+ maxCostMicros?: bigint | undefined;
239
+ /**
240
+ * @generated from field: optional int32 max_retries = 15;
241
+ */
242
+ maxRetries?: number | undefined;
243
+ /**
244
+ * @generated from field: bool require_idempotent_replay = 16;
245
+ */
246
+ requireIdempotentReplay: boolean;
247
+ /**
248
+ * @generated from field: bool forbid_duplicate_external_actions = 17;
249
+ */
250
+ forbidDuplicateExternalActions: boolean;
251
+ /**
252
+ * @generated from field: repeated string forbidden_external_actions = 18;
253
+ */
254
+ forbiddenExternalActions: string[];
255
+ /**
256
+ * @generated from field: repeated string required_trace_join_keys = 19;
257
+ */
258
+ requiredTraceJoinKeys: string[];
259
+ };
260
+ /**
261
+ * Describes the message fermata.v1.AgentTrajectoryAssertion.
262
+ * Use `create(AgentTrajectoryAssertionSchema)` to create a new message.
263
+ */
264
+ export declare const AgentTrajectoryAssertionSchema: GenMessage<AgentTrajectoryAssertion>;
11
265
  /**
12
266
  * @generated from message fermata.v1.TestSuite
13
267
  */
@@ -116,6 +370,18 @@ export type Assertion = Message<"fermata.v1.Assertion"> & {
116
370
  * @generated from field: google.protobuf.Struct metadata = 6;
117
371
  */
118
372
  metadata?: JsonObject | undefined;
373
+ /**
374
+ * @generated from field: fermata.v1.LLMRubricAssertion llm_rubric = 7;
375
+ */
376
+ llmRubric?: LLMRubricAssertion | undefined;
377
+ /**
378
+ * @generated from field: fermata.v1.LLMPairwiseRubricAssertion llm_pairwise_rubric = 8;
379
+ */
380
+ llmPairwiseRubric?: LLMPairwiseRubricAssertion | undefined;
381
+ /**
382
+ * @generated from field: fermata.v1.AgentTrajectoryAssertion agent_trajectory = 9;
383
+ */
384
+ agentTrajectory?: AgentTrajectoryAssertion | undefined;
119
385
  };
120
386
  /**
121
387
  * Describes the message fermata.v1.Assertion.
@@ -1256,6 +1522,240 @@ export type ListTestSuitesResponse = Message<"fermata.v1.ListTestSuitesResponse"
1256
1522
  * Use `create(ListTestSuitesResponseSchema)` to create a new message.
1257
1523
  */
1258
1524
  export declare const ListTestSuitesResponseSchema: GenMessage<ListTestSuitesResponse>;
1525
+ /**
1526
+ * @generated from message fermata.v1.EvaluationPackJudgeContract
1527
+ */
1528
+ export type EvaluationPackJudgeContract = Message<"fermata.v1.EvaluationPackJudgeContract"> & {
1529
+ /**
1530
+ * @generated from field: string judge_id = 1;
1531
+ */
1532
+ judgeId: string;
1533
+ /**
1534
+ * @generated from field: string model = 2;
1535
+ */
1536
+ model: string;
1537
+ /**
1538
+ * @generated from field: string rubric_version = 3;
1539
+ */
1540
+ rubricVersion: string;
1541
+ /**
1542
+ * @generated from field: string calibration_cohort = 4;
1543
+ */
1544
+ calibrationCohort: string;
1545
+ /**
1546
+ * @generated from field: optional double max_disagreement_rate = 5;
1547
+ */
1548
+ maxDisagreementRate?: number | undefined;
1549
+ /**
1550
+ * @generated from field: optional double min_validation_accuracy = 6;
1551
+ */
1552
+ minValidationAccuracy?: number | undefined;
1553
+ /**
1554
+ * @generated from field: optional int32 min_validation_count = 7;
1555
+ */
1556
+ minValidationCount?: number | undefined;
1557
+ };
1558
+ /**
1559
+ * Describes the message fermata.v1.EvaluationPackJudgeContract.
1560
+ * Use `create(EvaluationPackJudgeContractSchema)` to create a new message.
1561
+ */
1562
+ export declare const EvaluationPackJudgeContractSchema: GenMessage<EvaluationPackJudgeContract>;
1563
+ /**
1564
+ * @generated from message fermata.v1.ProductionTracePromotionPolicy
1565
+ */
1566
+ export type ProductionTracePromotionPolicy = Message<"fermata.v1.ProductionTracePromotionPolicy"> & {
1567
+ /**
1568
+ * @generated from field: bool require_quality_annotations = 1;
1569
+ */
1570
+ requireQualityAnnotations: boolean;
1571
+ /**
1572
+ * @generated from field: bool failed_annotations_only = 2;
1573
+ */
1574
+ failedAnnotationsOnly: boolean;
1575
+ /**
1576
+ * @generated from field: int32 max_cases = 3;
1577
+ */
1578
+ maxCases: number;
1579
+ /**
1580
+ * @generated from field: repeated string required_tags = 4;
1581
+ */
1582
+ requiredTags: string[];
1583
+ /**
1584
+ * @generated from field: repeated string excluded_tags = 5;
1585
+ */
1586
+ excludedTags: string[];
1587
+ };
1588
+ /**
1589
+ * Describes the message fermata.v1.ProductionTracePromotionPolicy.
1590
+ * Use `create(ProductionTracePromotionPolicySchema)` to create a new message.
1591
+ */
1592
+ export declare const ProductionTracePromotionPolicySchema: GenMessage<ProductionTracePromotionPolicy>;
1593
+ /**
1594
+ * @generated from message fermata.v1.EvaluationPack
1595
+ */
1596
+ export type EvaluationPack = Message<"fermata.v1.EvaluationPack"> & {
1597
+ /**
1598
+ * @generated from field: string id = 1;
1599
+ */
1600
+ id: string;
1601
+ /**
1602
+ * @generated from field: string service = 2;
1603
+ */
1604
+ service: string;
1605
+ /**
1606
+ * @generated from field: string name = 3;
1607
+ */
1608
+ name: string;
1609
+ /**
1610
+ * @generated from field: string description = 4;
1611
+ */
1612
+ description: string;
1613
+ /**
1614
+ * @generated from field: string version = 5;
1615
+ */
1616
+ version: string;
1617
+ /**
1618
+ * @generated from field: repeated fermata.v1.EvaluationPackJudgeContract judge_contracts = 6;
1619
+ */
1620
+ judgeContracts: EvaluationPackJudgeContract[];
1621
+ /**
1622
+ * @generated from field: fermata.v1.ProductionTracePromotionPolicy trace_promotion_policy = 7;
1623
+ */
1624
+ tracePromotionPolicy?: ProductionTracePromotionPolicy | undefined;
1625
+ /**
1626
+ * @generated from field: fermata.v1.QualityGateConfig quality_gate = 8;
1627
+ */
1628
+ qualityGate?: QualityGateConfig | undefined;
1629
+ /**
1630
+ * @generated from field: repeated string source_suite_ids = 9;
1631
+ */
1632
+ sourceSuiteIds: string[];
1633
+ /**
1634
+ * @generated from field: repeated string trace_ids = 10;
1635
+ */
1636
+ traceIds: string[];
1637
+ /**
1638
+ * @generated from field: google.protobuf.Struct metadata = 11;
1639
+ */
1640
+ metadata?: JsonObject | undefined;
1641
+ /**
1642
+ * @generated from field: google.protobuf.Timestamp updated_at = 12;
1643
+ */
1644
+ updatedAt?: Timestamp | undefined;
1645
+ };
1646
+ /**
1647
+ * Describes the message fermata.v1.EvaluationPack.
1648
+ * Use `create(EvaluationPackSchema)` to create a new message.
1649
+ */
1650
+ export declare const EvaluationPackSchema: GenMessage<EvaluationPack>;
1651
+ /**
1652
+ * @generated from message fermata.v1.ListEvaluationPacksRequest
1653
+ */
1654
+ export type ListEvaluationPacksRequest = Message<"fermata.v1.ListEvaluationPacksRequest"> & {
1655
+ /**
1656
+ * @generated from field: string organization_id = 1;
1657
+ */
1658
+ organizationId: string;
1659
+ /**
1660
+ * @generated from field: string workspace_id = 2;
1661
+ */
1662
+ workspaceId: string;
1663
+ /**
1664
+ * @generated from field: string service = 3;
1665
+ */
1666
+ service: string;
1667
+ };
1668
+ /**
1669
+ * Describes the message fermata.v1.ListEvaluationPacksRequest.
1670
+ * Use `create(ListEvaluationPacksRequestSchema)` to create a new message.
1671
+ */
1672
+ export declare const ListEvaluationPacksRequestSchema: GenMessage<ListEvaluationPacksRequest>;
1673
+ /**
1674
+ * @generated from message fermata.v1.ListEvaluationPacksResponse
1675
+ */
1676
+ export type ListEvaluationPacksResponse = Message<"fermata.v1.ListEvaluationPacksResponse"> & {
1677
+ /**
1678
+ * @generated from field: repeated fermata.v1.EvaluationPack packs = 1;
1679
+ */
1680
+ packs: EvaluationPack[];
1681
+ /**
1682
+ * @generated from field: int32 total_count = 2;
1683
+ */
1684
+ totalCount: number;
1685
+ };
1686
+ /**
1687
+ * Describes the message fermata.v1.ListEvaluationPacksResponse.
1688
+ * Use `create(ListEvaluationPacksResponseSchema)` to create a new message.
1689
+ */
1690
+ export declare const ListEvaluationPacksResponseSchema: GenMessage<ListEvaluationPacksResponse>;
1691
+ /**
1692
+ * @generated from message fermata.v1.BuildEvaluationPackRequest
1693
+ */
1694
+ export type BuildEvaluationPackRequest = Message<"fermata.v1.BuildEvaluationPackRequest"> & {
1695
+ /**
1696
+ * @generated from field: string organization_id = 1;
1697
+ */
1698
+ organizationId: string;
1699
+ /**
1700
+ * @generated from field: string workspace_id = 2;
1701
+ */
1702
+ workspaceId: string;
1703
+ /**
1704
+ * @generated from field: string pack_id = 3;
1705
+ */
1706
+ packId: string;
1707
+ /**
1708
+ * @generated from field: string suite_name = 4;
1709
+ */
1710
+ suiteName: string;
1711
+ /**
1712
+ * @generated from field: string created_by = 5;
1713
+ */
1714
+ createdBy: string;
1715
+ /**
1716
+ * @generated from field: repeated string source_suite_ids = 6;
1717
+ */
1718
+ sourceSuiteIds: string[];
1719
+ /**
1720
+ * @generated from field: repeated string trace_ids = 7;
1721
+ */
1722
+ traceIds: string[];
1723
+ /**
1724
+ * @generated from field: int32 limit = 8;
1725
+ */
1726
+ limit: number;
1727
+ };
1728
+ /**
1729
+ * Describes the message fermata.v1.BuildEvaluationPackRequest.
1730
+ * Use `create(BuildEvaluationPackRequestSchema)` to create a new message.
1731
+ */
1732
+ export declare const BuildEvaluationPackRequestSchema: GenMessage<BuildEvaluationPackRequest>;
1733
+ /**
1734
+ * @generated from message fermata.v1.BuildEvaluationPackResponse
1735
+ */
1736
+ export type BuildEvaluationPackResponse = Message<"fermata.v1.BuildEvaluationPackResponse"> & {
1737
+ /**
1738
+ * @generated from field: fermata.v1.EvaluationPack pack = 1;
1739
+ */
1740
+ pack?: EvaluationPack | undefined;
1741
+ /**
1742
+ * @generated from field: fermata.v1.TestSuite suite = 2;
1743
+ */
1744
+ suite?: TestSuite | undefined;
1745
+ /**
1746
+ * @generated from field: fermata.v1.QualityGateConfig quality_gate = 3;
1747
+ */
1748
+ qualityGate?: QualityGateConfig | undefined;
1749
+ /**
1750
+ * @generated from field: fermata.v1.ProductionTracePromotionPolicy trace_promotion_policy = 4;
1751
+ */
1752
+ tracePromotionPolicy?: ProductionTracePromotionPolicy | undefined;
1753
+ };
1754
+ /**
1755
+ * Describes the message fermata.v1.BuildEvaluationPackResponse.
1756
+ * Use `create(BuildEvaluationPackResponseSchema)` to create a new message.
1757
+ */
1758
+ export declare const BuildEvaluationPackResponseSchema: GenMessage<BuildEvaluationPackResponse>;
1259
1759
  /**
1260
1760
  * @generated from message fermata.v1.CreateScenarioRequest
1261
1761
  */
@@ -3938,6 +4438,38 @@ export type ListScenarioRunsRequest = Message<"fermata.v1.ListScenarioRunsReques
3938
4438
  * @generated from field: int32 offset = 5;
3939
4439
  */
3940
4440
  offset: number;
4441
+ /**
4442
+ * @generated from field: fermata.v1.ScenarioRunStatus status = 6;
4443
+ */
4444
+ status: ScenarioRunStatus;
4445
+ /**
4446
+ * @generated from field: string source = 7;
4447
+ */
4448
+ source: string;
4449
+ /**
4450
+ * @generated from field: string evaluation_id = 8;
4451
+ */
4452
+ evaluationId: string;
4453
+ /**
4454
+ * @generated from field: string suite_id = 9;
4455
+ */
4456
+ suiteId: string;
4457
+ /**
4458
+ * @generated from field: string test_case_id = 10;
4459
+ */
4460
+ testCaseId: string;
4461
+ /**
4462
+ * @generated from field: string candidate_id = 11;
4463
+ */
4464
+ candidateId: string;
4465
+ /**
4466
+ * @generated from field: string candidate_label = 12;
4467
+ */
4468
+ candidateLabel: string;
4469
+ /**
4470
+ * @generated from field: string model = 13;
4471
+ */
4472
+ model: string;
3941
4473
  };
3942
4474
  /**
3943
4475
  * Describes the message fermata.v1.ListScenarioRunsRequest.
@@ -4652,6 +5184,10 @@ export type EvaluationComparison = Message<"fermata.v1.EvaluationComparison"> &
4652
5184
  * @generated from field: google.protobuf.Timestamp updated_at = 19;
4653
5185
  */
4654
5186
  updatedAt?: Timestamp | undefined;
5187
+ /**
5188
+ * @generated from field: google.protobuf.Struct metadata = 20;
5189
+ */
5190
+ metadata?: JsonObject | undefined;
4655
5191
  };
4656
5192
  /**
4657
5193
  * Describes the message fermata.v1.EvaluationComparison.
@@ -6096,6 +6632,18 @@ export type ProductionTraceRecord = Message<"fermata.v1.ProductionTraceRecord">
6096
6632
  * @generated from field: optional double provider_cost = 17;
6097
6633
  */
6098
6634
  providerCost?: number | undefined;
6635
+ /**
6636
+ * @generated from field: repeated string must_contain = 18;
6637
+ */
6638
+ mustContain: string[];
6639
+ /**
6640
+ * @generated from field: repeated string must_not_contain = 19;
6641
+ */
6642
+ mustNotContain: string[];
6643
+ /**
6644
+ * @generated from field: optional double min_similarity = 20;
6645
+ */
6646
+ minSimilarity?: number | undefined;
6099
6647
  };
6100
6648
  /**
6101
6649
  * Describes the message fermata.v1.ProductionTraceRecord.
@@ -6214,6 +6762,18 @@ export type BuildProductionTraceRegressionSuiteRequest = Message<"fermata.v1.Bui
6214
6762
  * @generated from field: repeated fermata.v1.ProductionTraceRecord traces = 6;
6215
6763
  */
6216
6764
  traces: ProductionTraceRecord[];
6765
+ /**
6766
+ * @generated from field: repeated string trace_ids = 7;
6767
+ */
6768
+ traceIds: string[];
6769
+ /**
6770
+ * @generated from field: bool require_quality_annotations = 8;
6771
+ */
6772
+ requireQualityAnnotations: boolean;
6773
+ /**
6774
+ * @generated from field: bool failed_annotations_only = 9;
6775
+ */
6776
+ failedAnnotationsOnly: boolean;
6217
6777
  };
6218
6778
  /**
6219
6779
  * Describes the message fermata.v1.BuildProductionTraceRegressionSuiteRequest.
@@ -6504,6 +7064,26 @@ export type CompareProductionTraceReplayRequest = Message<"fermata.v1.ComparePro
6504
7064
  * @generated from field: optional int32 max_diff_lines = 5;
6505
7065
  */
6506
7066
  maxDiffLines?: number | undefined;
7067
+ /**
7068
+ * @generated from field: bool persist_comparison_artifact = 6;
7069
+ */
7070
+ persistComparisonArtifact: boolean;
7071
+ /**
7072
+ * @generated from field: string artifact_name = 7;
7073
+ */
7074
+ artifactName: string;
7075
+ /**
7076
+ * @generated from field: string baseline_run_id = 8;
7077
+ */
7078
+ baselineRunId: string;
7079
+ /**
7080
+ * @generated from field: repeated string comparison_run_ids = 9;
7081
+ */
7082
+ comparisonRunIds: string[];
7083
+ /**
7084
+ * @generated from field: string created_by = 10;
7085
+ */
7086
+ createdBy: string;
6507
7087
  };
6508
7088
  /**
6509
7089
  * Describes the message fermata.v1.CompareProductionTraceReplayRequest.
@@ -6526,6 +7106,14 @@ export type CompareProductionTraceReplayResponse = Message<"fermata.v1.ComparePr
6526
7106
  * @generated from field: repeated fermata.v1.ProductionTraceReplayRegression regressions = 3;
6527
7107
  */
6528
7108
  regressions: ProductionTraceReplayRegression[];
7109
+ /**
7110
+ * @generated from field: string comparison_artifact_id = 4;
7111
+ */
7112
+ comparisonArtifactId: string;
7113
+ /**
7114
+ * @generated from field: fermata.v1.EvaluationComparison comparison_artifact = 5;
7115
+ */
7116
+ comparisonArtifact?: EvaluationComparison | undefined;
6529
7117
  };
6530
7118
  /**
6531
7119
  * Describes the message fermata.v1.CompareProductionTraceReplayResponse.
@@ -7262,6 +7850,110 @@ export type JudgeCalibrationCandidate = Message<"fermata.v1.JudgeCalibrationCand
7262
7850
  * Use `create(JudgeCalibrationCandidateSchema)` to create a new message.
7263
7851
  */
7264
7852
  export declare const JudgeCalibrationCandidateSchema: GenMessage<JudgeCalibrationCandidate>;
7853
+ /**
7854
+ * @generated from message fermata.v1.JudgeValidationSourceMetrics
7855
+ */
7856
+ export type JudgeValidationSourceMetrics = Message<"fermata.v1.JudgeValidationSourceMetrics"> & {
7857
+ /**
7858
+ * @generated from field: string source = 1;
7859
+ */
7860
+ source: string;
7861
+ /**
7862
+ * @generated from field: fermata.v1.JudgeValidationAccuracyMetrics accuracy = 2;
7863
+ */
7864
+ accuracy?: JudgeValidationAccuracyMetrics | undefined;
7865
+ /**
7866
+ * @generated from field: fermata.v1.JudgeValidationBiasMetrics bias = 3;
7867
+ */
7868
+ bias?: JudgeValidationBiasMetrics | undefined;
7869
+ /**
7870
+ * @generated from field: google.protobuf.Timestamp latest_validation = 4;
7871
+ */
7872
+ latestValidation?: Timestamp | undefined;
7873
+ };
7874
+ /**
7875
+ * Describes the message fermata.v1.JudgeValidationSourceMetrics.
7876
+ * Use `create(JudgeValidationSourceMetricsSchema)` to create a new message.
7877
+ */
7878
+ export declare const JudgeValidationSourceMetricsSchema: GenMessage<JudgeValidationSourceMetrics>;
7879
+ /**
7880
+ * @generated from message fermata.v1.JudgeCalibrationReport
7881
+ */
7882
+ export type JudgeCalibrationReport = Message<"fermata.v1.JudgeCalibrationReport"> & {
7883
+ /**
7884
+ * @generated from field: string organization_id = 1;
7885
+ */
7886
+ organizationId: string;
7887
+ /**
7888
+ * @generated from field: string judge_id = 2;
7889
+ */
7890
+ judgeId: string;
7891
+ /**
7892
+ * @generated from field: fermata.v1.JudgeCalibrationStatus status = 3;
7893
+ */
7894
+ status: JudgeCalibrationStatus;
7895
+ /**
7896
+ * @generated from field: bool passed = 4;
7897
+ */
7898
+ passed: boolean;
7899
+ /**
7900
+ * @generated from field: fermata.v1.JudgeValidationAccuracyMetrics accuracy = 5;
7901
+ */
7902
+ accuracy?: JudgeValidationAccuracyMetrics | undefined;
7903
+ /**
7904
+ * @generated from field: fermata.v1.JudgeValidationConfusionMatrix confusion_matrix = 6;
7905
+ */
7906
+ confusionMatrix?: JudgeValidationConfusionMatrix | undefined;
7907
+ /**
7908
+ * @generated from field: fermata.v1.JudgeValidationBiasMetrics bias = 7;
7909
+ */
7910
+ bias?: JudgeValidationBiasMetrics | undefined;
7911
+ /**
7912
+ * @generated from field: int32 min_validations = 8;
7913
+ */
7914
+ minValidations: number;
7915
+ /**
7916
+ * @generated from field: double min_accuracy = 9;
7917
+ */
7918
+ minAccuracy: number;
7919
+ /**
7920
+ * @generated from field: optional double max_absolute_score_drift = 10;
7921
+ */
7922
+ maxAbsoluteScoreDrift?: number | undefined;
7923
+ /**
7924
+ * @generated from field: int32 total_validated_records = 11;
7925
+ */
7926
+ totalValidatedRecords: number;
7927
+ /**
7928
+ * @generated from field: int32 scored_validation_count = 12;
7929
+ */
7930
+ scoredValidationCount: number;
7931
+ /**
7932
+ * @generated from field: int32 unscored_validation_count = 13;
7933
+ */
7934
+ unscoredValidationCount: number;
7935
+ /**
7936
+ * @generated from field: repeated string blocking_reasons = 14;
7937
+ */
7938
+ blockingReasons: string[];
7939
+ /**
7940
+ * @generated from field: repeated fermata.v1.JudgeValidationSourceMetrics source_metrics = 15;
7941
+ */
7942
+ sourceMetrics: JudgeValidationSourceMetrics[];
7943
+ /**
7944
+ * @generated from field: google.protobuf.Timestamp latest_validation = 16;
7945
+ */
7946
+ latestValidation?: Timestamp | undefined;
7947
+ /**
7948
+ * @generated from field: string recommended_action = 17;
7949
+ */
7950
+ recommendedAction: string;
7951
+ };
7952
+ /**
7953
+ * Describes the message fermata.v1.JudgeCalibrationReport.
7954
+ * Use `create(JudgeCalibrationReportSchema)` to create a new message.
7955
+ */
7956
+ export declare const JudgeCalibrationReportSchema: GenMessage<JudgeCalibrationReport>;
7265
7957
  /**
7266
7958
  * @generated from message fermata.v1.GetJudgeValidationAccuracyRequest
7267
7959
  */
@@ -7358,6 +8050,54 @@ export type GetJudgeValidationBiasResponse = Message<"fermata.v1.GetJudgeValidat
7358
8050
  * Use `create(GetJudgeValidationBiasResponseSchema)` to create a new message.
7359
8051
  */
7360
8052
  export declare const GetJudgeValidationBiasResponseSchema: GenMessage<GetJudgeValidationBiasResponse>;
8053
+ /**
8054
+ * @generated from message fermata.v1.GetJudgeCalibrationReportRequest
8055
+ */
8056
+ export type GetJudgeCalibrationReportRequest = Message<"fermata.v1.GetJudgeCalibrationReportRequest"> & {
8057
+ /**
8058
+ * @generated from field: string organization_id = 1;
8059
+ */
8060
+ organizationId: string;
8061
+ /**
8062
+ * @generated from field: string judge_id = 2;
8063
+ */
8064
+ judgeId: string;
8065
+ /**
8066
+ * @generated from field: int32 min_validations = 3;
8067
+ */
8068
+ minValidations: number;
8069
+ /**
8070
+ * @generated from field: optional double min_accuracy = 4;
8071
+ */
8072
+ minAccuracy?: number | undefined;
8073
+ /**
8074
+ * @generated from field: optional double max_absolute_score_drift = 5;
8075
+ */
8076
+ maxAbsoluteScoreDrift?: number | undefined;
8077
+ /**
8078
+ * @generated from field: bool include_source_breakdown = 6;
8079
+ */
8080
+ includeSourceBreakdown: boolean;
8081
+ };
8082
+ /**
8083
+ * Describes the message fermata.v1.GetJudgeCalibrationReportRequest.
8084
+ * Use `create(GetJudgeCalibrationReportRequestSchema)` to create a new message.
8085
+ */
8086
+ export declare const GetJudgeCalibrationReportRequestSchema: GenMessage<GetJudgeCalibrationReportRequest>;
8087
+ /**
8088
+ * @generated from message fermata.v1.GetJudgeCalibrationReportResponse
8089
+ */
8090
+ export type GetJudgeCalibrationReportResponse = Message<"fermata.v1.GetJudgeCalibrationReportResponse"> & {
8091
+ /**
8092
+ * @generated from field: fermata.v1.JudgeCalibrationReport report = 1;
8093
+ */
8094
+ report?: JudgeCalibrationReport | undefined;
8095
+ };
8096
+ /**
8097
+ * Describes the message fermata.v1.GetJudgeCalibrationReportResponse.
8098
+ * Use `create(GetJudgeCalibrationReportResponseSchema)` to create a new message.
8099
+ */
8100
+ export declare const GetJudgeCalibrationReportResponseSchema: GenMessage<GetJudgeCalibrationReportResponse>;
7361
8101
  /**
7362
8102
  * @generated from message fermata.v1.RecalibrateJudgeValidationRequest
7363
8103
  */
@@ -7610,6 +8350,22 @@ export type QualityGateConfig = Message<"fermata.v1.QualityGateConfig"> & {
7610
8350
  * @generated from field: optional bool block_on_judges_needing_calibration = 13;
7611
8351
  */
7612
8352
  blockOnJudgesNeedingCalibration?: boolean | undefined;
8353
+ /**
8354
+ * @generated from field: repeated string required_judge_models = 14;
8355
+ */
8356
+ requiredJudgeModels: string[];
8357
+ /**
8358
+ * @generated from field: repeated string required_rubric_versions = 15;
8359
+ */
8360
+ requiredRubricVersions: string[];
8361
+ /**
8362
+ * @generated from field: repeated string required_calibration_cohorts = 16;
8363
+ */
8364
+ requiredCalibrationCohorts: string[];
8365
+ /**
8366
+ * @generated from field: optional double max_judge_disagreement_rate = 17;
8367
+ */
8368
+ maxJudgeDisagreementRate?: number | undefined;
7613
8369
  };
7614
8370
  /**
7615
8371
  * Describes the message fermata.v1.QualityGateConfig.
@@ -8615,7 +9371,19 @@ export declare enum AssertionKind {
8615
9371
  /**
8616
9372
  * @generated from enum value: ASSERTION_KIND_TOOL_CALL_F1 = 22;
8617
9373
  */
8618
- TOOL_CALL_F1 = 22
9374
+ TOOL_CALL_F1 = 22,
9375
+ /**
9376
+ * @generated from enum value: ASSERTION_KIND_LLM_RUBRIC = 23;
9377
+ */
9378
+ LLM_RUBRIC = 23,
9379
+ /**
9380
+ * @generated from enum value: ASSERTION_KIND_LLM_PAIRWISE_RUBRIC = 24;
9381
+ */
9382
+ LLM_PAIRWISE_RUBRIC = 24,
9383
+ /**
9384
+ * @generated from enum value: ASSERTION_KIND_AGENT_TRAJECTORY = 25;
9385
+ */
9386
+ AGENT_TRAJECTORY = 25
8619
9387
  }
8620
9388
  /**
8621
9389
  * Describes the enum fermata.v1.AssertionKind.
@@ -8968,6 +9736,35 @@ export declare enum AgentApprovalDecision {
8968
9736
  * Describes the enum fermata.v1.AgentApprovalDecision.
8969
9737
  */
8970
9738
  export declare const AgentApprovalDecisionSchema: GenEnum<AgentApprovalDecision>;
9739
+ /**
9740
+ * @generated from enum fermata.v1.JudgeCalibrationStatus
9741
+ */
9742
+ export declare enum JudgeCalibrationStatus {
9743
+ /**
9744
+ * @generated from enum value: JUDGE_CALIBRATION_STATUS_UNSPECIFIED = 0;
9745
+ */
9746
+ UNSPECIFIED = 0,
9747
+ /**
9748
+ * @generated from enum value: JUDGE_CALIBRATION_STATUS_READY = 1;
9749
+ */
9750
+ READY = 1,
9751
+ /**
9752
+ * @generated from enum value: JUDGE_CALIBRATION_STATUS_NEEDS_LABELS = 2;
9753
+ */
9754
+ NEEDS_LABELS = 2,
9755
+ /**
9756
+ * @generated from enum value: JUDGE_CALIBRATION_STATUS_NEEDS_ACCURACY = 3;
9757
+ */
9758
+ NEEDS_ACCURACY = 3,
9759
+ /**
9760
+ * @generated from enum value: JUDGE_CALIBRATION_STATUS_NEEDS_BIAS_REVIEW = 4;
9761
+ */
9762
+ NEEDS_BIAS_REVIEW = 4
9763
+ }
9764
+ /**
9765
+ * Describes the enum fermata.v1.JudgeCalibrationStatus.
9766
+ */
9767
+ export declare const JudgeCalibrationStatusSchema: GenEnum<JudgeCalibrationStatus>;
8971
9768
  /**
8972
9769
  * FermataService is the Go control-plane surface for migrated Fermata routes.
8973
9770
  *
@@ -8998,6 +9795,22 @@ export declare const FermataService: GenService<{
8998
9795
  input: typeof ListTestSuitesRequestSchema;
8999
9796
  output: typeof ListTestSuitesResponseSchema;
9000
9797
  };
9798
+ /**
9799
+ * @generated from rpc fermata.v1.FermataService.ListEvaluationPacks
9800
+ */
9801
+ listEvaluationPacks: {
9802
+ methodKind: "unary";
9803
+ input: typeof ListEvaluationPacksRequestSchema;
9804
+ output: typeof ListEvaluationPacksResponseSchema;
9805
+ };
9806
+ /**
9807
+ * @generated from rpc fermata.v1.FermataService.BuildEvaluationPack
9808
+ */
9809
+ buildEvaluationPack: {
9810
+ methodKind: "unary";
9811
+ input: typeof BuildEvaluationPackRequestSchema;
9812
+ output: typeof BuildEvaluationPackResponseSchema;
9813
+ };
9001
9814
  /**
9002
9815
  * @generated from rpc fermata.v1.FermataService.CreateScenario
9003
9816
  */
@@ -9582,6 +10395,14 @@ export declare const FermataService: GenService<{
9582
10395
  input: typeof GetJudgeValidationBiasRequestSchema;
9583
10396
  output: typeof GetJudgeValidationBiasResponseSchema;
9584
10397
  };
10398
+ /**
10399
+ * @generated from rpc fermata.v1.FermataService.GetJudgeCalibrationReport
10400
+ */
10401
+ getJudgeCalibrationReport: {
10402
+ methodKind: "unary";
10403
+ input: typeof GetJudgeCalibrationReportRequestSchema;
10404
+ output: typeof GetJudgeCalibrationReportResponseSchema;
10405
+ };
9585
10406
  /**
9586
10407
  * @generated from rpc fermata.v1.FermataService.RecalibrateJudgeValidation
9587
10408
  */