@thinkhive/sdk 4.2.2 → 4.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -89,28 +89,38 @@ exports.conversationEval = {
89
89
  */
90
90
  function aggregateWorst(turnResults) {
91
91
  if (turnResults.length === 0)
92
- return { passed: false, score: 0 };
93
- const worstScore = Math.min(...turnResults.map(t => t.score));
94
- const passed = turnResults.every(t => t.passed);
92
+ return typeof turnResults[0] === 'number' ? 0 : { passed: false, score: 0 };
93
+ // Simple number array: return the minimum
94
+ if (typeof turnResults[0] === 'number') {
95
+ return Math.min(...turnResults);
96
+ }
97
+ const results = turnResults;
98
+ const worstScore = Math.min(...results.map(t => t.score));
99
+ const passed = results.every(t => t.passed);
95
100
  return { passed, score: worstScore };
96
101
  }
97
102
  /**
98
103
  * Calculate average aggregation
99
104
  *
100
- * @param turnResults - Array of turn evaluation results
101
- * @returns Aggregated result using average logic
105
+ * Accepts TurnEvaluation[] or simple number[]:
106
+ * - aggregateAverage(turnResults) — returns {passed, score}
107
+ * - aggregateAverage([0.6, 0.8]) — returns 0.7
102
108
  *
103
- * @example
104
- * ```typescript
105
- * const result = aggregateAverage(turnResults);
106
- * ```
109
+ * @param turnResults - Array of turn evaluation results or numbers
110
+ * @returns Aggregated result
107
111
  */
108
112
  function aggregateAverage(turnResults) {
109
113
  if (turnResults.length === 0)
110
- return { passed: false, score: 0 };
111
- const avgScore = turnResults.reduce((sum, t) => sum + t.score, 0) / turnResults.length;
112
- const passedCount = turnResults.filter(t => t.passed).length;
113
- const passed = passedCount > turnResults.length / 2;
114
+ return typeof turnResults[0] === 'number' ? 0 : { passed: false, score: 0 };
115
+ // Simple number array: return the mean
116
+ if (typeof turnResults[0] === 'number') {
117
+ const scores = turnResults;
118
+ return scores.reduce((sum, s) => sum + s, 0) / scores.length;
119
+ }
120
+ const results = turnResults;
121
+ const avgScore = results.reduce((sum, t) => sum + t.score, 0) / results.length;
122
+ const passedCount = results.filter(t => t.passed).length;
123
+ const passed = passedCount > results.length / 2;
114
124
  return { passed, score: avgScore };
115
125
  }
116
126
  /**
@@ -187,13 +197,16 @@ function aggregateMajority(turnResults) {
187
197
  * @returns Aggregation function
188
198
  */
189
199
  function getAggregator(method) {
200
+ // Wrap overloaded functions to match the expected signature
201
+ const wrapWorst = (t) => aggregateWorst(t);
202
+ const wrapAverage = (t) => aggregateAverage(t);
190
203
  switch (method) {
191
- case 'worst': return aggregateWorst;
192
- case 'average': return aggregateAverage;
204
+ case 'worst': return wrapWorst;
205
+ case 'average': return wrapAverage;
193
206
  case 'weighted': return aggregateWeighted;
194
207
  case 'final_turn': return aggregateFinalTurn;
195
208
  case 'majority': return aggregateMajority;
196
- default: return aggregateAverage;
209
+ default: return wrapAverage;
197
210
  }
198
211
  }
199
212
  /**
@@ -232,4 +245,4 @@ function analyzeConversationTrend(result) {
232
245
  direction = 'stable';
233
246
  return { direction, firstHalfAvg, secondHalfAvg };
234
247
  }
235
- //# sourceMappingURL=data:application/json;base64,{"version":3,"file":"conversation-eval.js","sourceRoot":"","sources":["../../src/api/conversation-eval.ts"],"names":[],"mappings":";AAAA;;;;GAIG;;;AA4IH,wCAOC;AAaD,4CAQC;AAcD,8CAmBC;AAcD,gDAKC;AAcD,8CAQC;AAQD,sCAWC;AASD,kDAKC;AAQD,4DAyBC;AAlTD,2CAAoD;AAuDpD,+EAA+E;AAC/E,+BAA+B;AAC/B,+EAA+E;AAE/E;;GAEG;AACU,QAAA,gBAAgB,GAAG;IAC9B;;;;;;;;OAQG;IACH,KAAK,CAAC,gBAAgB,CAAC,SAAiB;QACtC,OAAO,IAAA,2BAAkB,EACvB,uCAAuC,SAAS,EAAE,EAClD,EAAE,UAAU,EAAE,MAAM,EAAE,CACvB,CAAC;IACJ,CAAC;IAED;;;;;;;;;;;;;;;OAeG;IACH,KAAK,CAAC,QAAQ,CAAC,OAAoC;QACjD,OAAO,IAAA,2BAAkB,EAAyB,6BAA6B,EAAE;YAC/E,MAAM,EAAE,MAAM;YACd,IAAI,EAAE,OAAO;YACb,UAAU,EAAE,MAAM;SACnB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;;;OAUG;IACH,KAAK,CAAC,qBAAqB;QACzB,OAAO,IAAA,2BAAkB,EACvB,wCAAwC,EACxC,EAAE,UAAU,EAAE,MAAM,EAAE,CACvB,CAAC;IACJ,CAAC;CACF,CAAC;AAEF,+EAA+E;AAC/E,mBAAmB;AACnB,+EAA+E;AAE/E;;;;;;;;;;;GAWG;AACH,SAAgB,cAAc,CAAC,WAA6B;IAC1D,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;IAEjE,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC;IAC9D,MAAM,MAAM,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;IAEhD,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC;AACvC,CAAC;AAED;;;;;;;;;;GAUG;AACH,SAAgB,gBAAgB,CAAC,WAA6B;IAC5D,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;IAEjE,MAAM,QAAQ,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,WAAW,CAAC,MAAM,CAAC;IACvF,MAAM,WAAW,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IAC7D,MAAM,MAAM,GAAG,WAAW,GAAG,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC;IAEpD,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;AACrC,CAAC;AAED;;;;;;;;;;;GAWG;AACH,SAAgB,iBAAiB,CAAC,WAA6B;IAC7D,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;IAEjE,+BAA+B;IAC/B,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,IAAI,eAAe,GAAG,CAAC,CAAC;IAExB,WAAW,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;QAClC,MAAM,MAAM,GAAG,KAAK,GAAG,CAAC,CAAC;QACzB,WAAW,IAAI,IAAI,CAAC,KAAK,GAAG,MAAM,CAAC;QACnC,eAAe,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC;QAClD,WAAW,IAAI,MAAM,CAAC;IACxB,CAAC,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAG,WAAW,GAAG,WAAW,CAAC;IAC3C,MAAM,MAAM,GAAG,CAAC,eAAe,GAAG,WAAW,CAAC,GAAG,GAAG,CAAC;IAErD,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;AACrC,CAAC;AAED;;;;;;;;;;;GAWG;AACH,SAAgB,kBAAkB,CAAC,WAA6B;IAC9D,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;IAEjE,MAAM,SAAS,GAAG,WAAW,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACtD,OAAO,EAAE,MAAM,EAAE,SAAS,CAAC,MAAM,EAAE,KAAK,EAAE,SAAS,CAAC,KAAK,EAAE,CAAC;AAC9D,CAAC;AAED;;;;;;;;;;;GAWG;AACH,SAAgB,iBAAiB,CAAC,WAA6B;IAC7D,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;IAEjE,MAAM,WAAW,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IAC7D,MAAM,MAAM,GAAG,WAAW,GAAG,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC;IACpD,MAAM,QAAQ,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,WAAW,CAAC,MAAM,CAAC;IAEvF,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;AACrC,CAAC;AAED;;;;;GAKG;AACH,SAAgB,aAAa,CAC3B,MAAuB;IAEvB,QAAQ,MAAM,EAAE,CAAC;QACf,KAAK,OAAO,CAAC,CAAC,OAAO,cAAc,CAAC;QACpC,KAAK,SAAS,CAAC,CAAC,OAAO,gBAAgB,CAAC;QACxC,KAAK,UAAU,CAAC,CAAC,OAAO,iBAAiB,CAAC;QAC1C,KAAK,YAAY,CAAC,CAAC,OAAO,kBAAkB,CAAC;QAC7C,KAAK,UAAU,CAAC,CAAC,OAAO,iBAAiB,CAAC;QAC1C,OAAO,CAAC,CAAC,OAAO,gBAAgB,CAAC;IACnC,CAAC;AACH,CAAC;AAED;;;;;;GAMG;AACH,SAAgB,mBAAmB,CACjC,MAA8B,EAC9B,cAAc,GAAG,EAAE;IAEnB,OAAO,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,KAAK,GAAG,cAAc,CAAC,CAAC;AAC/E,CAAC;AAED;;;;;GAKG;AACH,SAAgB,wBAAwB,CAAC,MAA8B;IAKrE,MAAM,KAAK,GAAG,MAAM,CAAC,WAAW,CAAC;IACjC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrB,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,YAAY,EAAE,CAAC,EAAE,aAAa,EAAE,CAAC,EAAE,CAAC;IACpE,CAAC;IAED,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC9C,MAAM,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;IAC3C,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;IAEzC,MAAM,YAAY,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,SAAS,CAAC,MAAM,CAAC;IACvF,MAAM,aAAa,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,UAAU,CAAC,MAAM,CAAC;IAE1F,MAAM,IAAI,GAAG,aAAa,GAAG,YAAY,CAAC;IAC1C,IAAI,SAA+C,CAAC;IAEpD,IAAI,IAAI,GAAG,CAAC;QAAE,SAAS,GAAG,WAAW,CAAC;SACjC,IAAI,IAAI,GAAG,CAAC,CAAC;QAAE,SAAS,GAAG,WAAW,CAAC;;QACvC,SAAS,GAAG,QAAQ,CAAC;IAE1B,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,aAAa,EAAE,CAAC;AACpD,CAAC","sourcesContent":["/**\n * ThinkHive SDK v3.0 - Conversation Evaluation API\n *\n * API for multi-turn conversation evaluation\n */\n\nimport { apiRequestWithData } from '../core/client';\n\n// ============================================================================\n// TYPES\n// ============================================================================\n\nexport type AggregateMethod = 'worst' | 'average' | 'weighted' | 'final_turn' | 'majority';\n\nexport interface SessionTrace {\n  id: string;\n  sessionId: string;\n  turnNumber: number;\n  userMessage: string;\n  agentResponse: string;\n  timestamp: string;\n  metadata?: Record<string, unknown>;\n}\n\nexport interface TurnEvaluation {\n  traceId: string;\n  turnNumber: number;\n  passed: boolean;\n  score: number;\n  reasoning: string;\n}\n\nexport interface ConversationEvalResult {\n  sessionId: string;\n  criterionId: string;\n  turnCount: number;\n  turnResults: TurnEvaluation[];\n  aggregatePassed: boolean;\n  aggregateScore: number;\n  aggregateMethod: AggregateMethod;\n  reasoning: string;\n  metadata?: Record<string, unknown>;\n}\n\nexport interface EvaluateConversationOptions {\n  sessionId: string;\n  criterionId: string;\n  options?: {\n    aggregateMethod?: AggregateMethod;\n    minTurns?: number;\n    maxTurns?: number;\n  };\n}\n\nexport interface AggregationMethodInfo {\n  id: AggregateMethod;\n  name: string;\n  description: string;\n  useCase: string;\n}\n\n// ============================================================================\n// CONVERSATION EVAL API CLIENT\n// ============================================================================\n\n/**\n * Conversation Evaluation API client for multi-turn evaluation\n */\nexport const conversationEval = {\n  /**\n   * Get traces for a conversation session\n   *\n   * @example\n   * ```typescript\n   * const traces = await conversationEval.getSessionTraces('session_123');\n   * console.log(`Conversation has ${traces.length} turns`);\n   * ```\n   */\n  async getSessionTraces(sessionId: string): Promise<SessionTrace[]> {\n    return apiRequestWithData<SessionTrace[]>(\n      `/conversation-eval/traces?sessionId=${sessionId}`,\n      { apiVersion: 'none' }\n    );\n  },\n\n  /**\n   * Run conversation-level evaluation\n   *\n   * @example\n   * ```typescript\n   * const result = await conversationEval.evaluate({\n   *   sessionId: 'session_123',\n   *   criterionId: 'criterion_456',\n   *   options: {\n   *     aggregateMethod: 'average',\n   *     minTurns: 2,\n   *   },\n   * });\n   * console.log(`Conversation score: ${result.aggregateScore}`);\n   * ```\n   */\n  async evaluate(options: EvaluateConversationOptions): Promise<ConversationEvalResult> {\n    return apiRequestWithData<ConversationEvalResult>('/conversation-eval/evaluate', {\n      method: 'POST',\n      body: options,\n      apiVersion: 'none',\n    });\n  },\n\n  /**\n   * Get available aggregation methods with descriptions\n   *\n   * @example\n   * ```typescript\n   * const methods = await conversationEval.getAggregationMethods();\n   * for (const method of methods) {\n   *   console.log(`${method.name}: ${method.description}`);\n   * }\n   * ```\n   */\n  async getAggregationMethods(): Promise<AggregationMethodInfo[]> {\n    return apiRequestWithData<AggregationMethodInfo[]>(\n      '/conversation-eval/aggregation-methods',\n      { apiVersion: 'none' }\n    );\n  },\n};\n\n// ============================================================================\n// HELPER FUNCTIONS\n// ============================================================================\n\n/**\n * Calculate worst-turn aggregation\n *\n * @param turnResults - Array of turn evaluation results\n * @returns Aggregated result using worst turn logic\n *\n * @example\n * ```typescript\n * const result = aggregateWorst(turnResults);\n * // Fails if any turn fails\n * ```\n */\nexport function aggregateWorst(turnResults: TurnEvaluation[]): { passed: boolean; score: number } {\n  if (turnResults.length === 0) return { passed: false, score: 0 };\n\n  const worstScore = Math.min(...turnResults.map(t => t.score));\n  const passed = turnResults.every(t => t.passed);\n\n  return { passed, score: worstScore };\n}\n\n/**\n * Calculate average aggregation\n *\n * @param turnResults - Array of turn evaluation results\n * @returns Aggregated result using average logic\n *\n * @example\n * ```typescript\n * const result = aggregateAverage(turnResults);\n * ```\n */\nexport function aggregateAverage(turnResults: TurnEvaluation[]): { passed: boolean; score: number } {\n  if (turnResults.length === 0) return { passed: false, score: 0 };\n\n  const avgScore = turnResults.reduce((sum, t) => sum + t.score, 0) / turnResults.length;\n  const passedCount = turnResults.filter(t => t.passed).length;\n  const passed = passedCount > turnResults.length / 2;\n\n  return { passed, score: avgScore };\n}\n\n/**\n * Calculate weighted average aggregation (later turns weighted more)\n *\n * @param turnResults - Array of turn evaluation results\n * @returns Aggregated result using weighted average logic\n *\n * @example\n * ```typescript\n * const result = aggregateWeighted(turnResults);\n * // Later turns have higher weight\n * ```\n */\nexport function aggregateWeighted(turnResults: TurnEvaluation[]): { passed: boolean; score: number } {\n  if (turnResults.length === 0) return { passed: false, score: 0 };\n\n  // Linear weights: 1, 2, 3, ...\n  let weightedSum = 0;\n  let weightTotal = 0;\n  let weightedPassSum = 0;\n\n  turnResults.forEach((turn, index) => {\n    const weight = index + 1;\n    weightedSum += turn.score * weight;\n    weightedPassSum += (turn.passed ? 1 : 0) * weight;\n    weightTotal += weight;\n  });\n\n  const avgScore = weightedSum / weightTotal;\n  const passed = (weightedPassSum / weightTotal) > 0.5;\n\n  return { passed, score: avgScore };\n}\n\n/**\n * Calculate final-turn aggregation\n *\n * @param turnResults - Array of turn evaluation results\n * @returns Aggregated result using only the final turn\n *\n * @example\n * ```typescript\n * const result = aggregateFinalTurn(turnResults);\n * // Only final turn matters\n * ```\n */\nexport function aggregateFinalTurn(turnResults: TurnEvaluation[]): { passed: boolean; score: number } {\n  if (turnResults.length === 0) return { passed: false, score: 0 };\n\n  const finalTurn = turnResults[turnResults.length - 1];\n  return { passed: finalTurn.passed, score: finalTurn.score };\n}\n\n/**\n * Calculate majority vote aggregation\n *\n * @param turnResults - Array of turn evaluation results\n * @returns Aggregated result using majority vote logic\n *\n * @example\n * ```typescript\n * const result = aggregateMajority(turnResults);\n * // Passes if majority of turns pass\n * ```\n */\nexport function aggregateMajority(turnResults: TurnEvaluation[]): { passed: boolean; score: number } {\n  if (turnResults.length === 0) return { passed: false, score: 0 };\n\n  const passedCount = turnResults.filter(t => t.passed).length;\n  const passed = passedCount > turnResults.length / 2;\n  const avgScore = turnResults.reduce((sum, t) => sum + t.score, 0) / turnResults.length;\n\n  return { passed, score: avgScore };\n}\n\n/**\n * Get appropriate aggregation function for a method\n *\n * @param method - Aggregation method name\n * @returns Aggregation function\n */\nexport function getAggregator(\n  method: AggregateMethod\n): (turnResults: TurnEvaluation[]) => { passed: boolean; score: number } {\n  switch (method) {\n    case 'worst': return aggregateWorst;\n    case 'average': return aggregateAverage;\n    case 'weighted': return aggregateWeighted;\n    case 'final_turn': return aggregateFinalTurn;\n    case 'majority': return aggregateMajority;\n    default: return aggregateAverage;\n  }\n}\n\n/**\n * Find problematic turns in a conversation\n *\n * @param result - Conversation evaluation result\n * @param scoreThreshold - Minimum acceptable score (default 70)\n * @returns Array of problematic turn results\n */\nexport function getProblematicTurns(\n  result: ConversationEvalResult,\n  scoreThreshold = 70\n): TurnEvaluation[] {\n  return result.turnResults.filter(t => !t.passed || t.score < scoreThreshold);\n}\n\n/**\n * Calculate conversation quality trend\n *\n * @param result - Conversation evaluation result\n * @returns Trend analysis\n */\nexport function analyzeConversationTrend(result: ConversationEvalResult): {\n  direction: 'improving' | 'declining' | 'stable';\n  firstHalfAvg: number;\n  secondHalfAvg: number;\n} {\n  const turns = result.turnResults;\n  if (turns.length < 2) {\n    return { direction: 'stable', firstHalfAvg: 0, secondHalfAvg: 0 };\n  }\n\n  const midpoint = Math.floor(turns.length / 2);\n  const firstHalf = turns.slice(0, midpoint);\n  const secondHalf = turns.slice(midpoint);\n\n  const firstHalfAvg = firstHalf.reduce((sum, t) => sum + t.score, 0) / firstHalf.length;\n  const secondHalfAvg = secondHalf.reduce((sum, t) => sum + t.score, 0) / secondHalf.length;\n\n  const diff = secondHalfAvg - firstHalfAvg;\n  let direction: 'improving' | 'declining' | 'stable';\n\n  if (diff > 5) direction = 'improving';\n  else if (diff < -5) direction = 'declining';\n  else direction = 'stable';\n\n  return { direction, firstHalfAvg, secondHalfAvg };\n}\n"]}
248
+ //# sourceMappingURL=data:application/json;base64,{"version":3,"file":"conversation-eval.js","sourceRoot":"","sources":["../../src/api/conversation-eval.ts"],"names":[],"mappings":";AAAA;;;;GAIG;;;AA4IH,wCAaC;AAYD,4CAeC;AAcD,8CAmBC;AAcD,gDAKC;AAcD,8CAQC;AAQD,sCAeC;AASD,kDAKC;AAQD,4DAyBC;AAlUD,2CAAoD;AAuDpD,+EAA+E;AAC/E,+BAA+B;AAC/B,+EAA+E;AAE/E;;GAEG;AACU,QAAA,gBAAgB,GAAG;IAC9B;;;;;;;;OAQG;IACH,KAAK,CAAC,gBAAgB,CAAC,SAAiB;QACtC,OAAO,IAAA,2BAAkB,EACvB,uCAAuC,SAAS,EAAE,EAClD,EAAE,UAAU,EAAE,MAAM,EAAE,CACvB,CAAC;IACJ,CAAC;IAED;;;;;;;;;;;;;;;OAeG;IACH,KAAK,CAAC,QAAQ,CAAC,OAAoC;QACjD,OAAO,IAAA,2BAAkB,EAAyB,6BAA6B,EAAE;YAC/E,MAAM,EAAE,MAAM;YACd,IAAI,EAAE,OAAO;YACb,UAAU,EAAE,MAAM;SACnB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;;;OAUG;IACH,KAAK,CAAC,qBAAqB;QACzB,OAAO,IAAA,2BAAkB,EACvB,wCAAwC,EACxC,EAAE,UAAU,EAAE,MAAM,EAAE,CACvB,CAAC;IACJ,CAAC;CACF,CAAC;AAEF,+EAA+E;AAC/E,mBAAmB;AACnB,+EAA+E;AAE/E;;;;;;;;;;;GAWG;AACH,SAAgB,cAAc,CAAC,WAAwC;IACrE,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,OAAO,WAAW,CAAC,CAAC,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;IAE1G,0CAA0C;IAC1C,IAAI,OAAO,WAAW,CAAC,CAAC,CAAC,KAAK,QAAQ,EAAE,CAAC;QACvC,OAAO,IAAI,CAAC,GAAG,CAAC,GAAI,WAAwB,CAAC,CAAC;IAChD,CAAC;IAED,MAAM,OAAO,GAAG,WAA+B,CAAC;IAChD,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC;IAC1D,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;IAE5C,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC;AACvC,CAAC;AAED;;;;;;;;;GASG;AACH,SAAgB,gBAAgB,CAAC,WAAwC;IACvE,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,OAAO,WAAW,CAAC,CAAC,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;IAE1G,uCAAuC;IACvC,IAAI,OAAO,WAAW,CAAC,CAAC,CAAC,KAAK,QAAQ,EAAE,CAAC;QACvC,MAAM,MAAM,GAAG,WAAuB,CAAC;QACvC,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;IAC/D,CAAC;IAED,MAAM,OAAO,GAAG,WAA+B,CAAC;IAChD,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;IAC/E,MAAM,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IACzD,MAAM,MAAM,GAAG,WAAW,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC;IAEhD,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;AACrC,CAAC;AAED;;;;;;;;;;;GAWG;AACH,SAAgB,iBAAiB,CAAC,WAA6B;IAC7D,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;IAEjE,+BAA+B;IAC/B,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,IAAI,eAAe,GAAG,CAAC,CAAC;IAExB,WAAW,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;QAClC,MAAM,MAAM,GAAG,KAAK,GAAG,CAAC,CAAC;QACzB,WAAW,IAAI,IAAI,CAAC,KAAK,GAAG,MAAM,CAAC;QACnC,eAAe,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC;QAClD,WAAW,IAAI,MAAM,CAAC;IACxB,CAAC,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAG,WAAW,GAAG,WAAW,CAAC;IAC3C,MAAM,MAAM,GAAG,CAAC,eAAe,GAAG,WAAW,CAAC,GAAG,GAAG,CAAC;IAErD,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;AACrC,CAAC;AAED;;;;;;;;;;;GAWG;AACH,SAAgB,kBAAkB,CAAC,WAA6B;IAC9D,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;IAEjE,MAAM,SAAS,GAAG,WAAW,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACtD,OAAO,EAAE,MAAM,EAAE,SAAS,CAAC,MAAM,EAAE,KAAK,EAAE,SAAS,CAAC,KAAK,EAAE,CAAC;AAC9D,CAAC;AAED;;;;;;;;;;;GAWG;AACH,SAAgB,iBAAiB,CAAC,WAA6B;IAC7D,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;IAEjE,MAAM,WAAW,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IAC7D,MAAM,MAAM,GAAG,WAAW,GAAG,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC;IACpD,MAAM,QAAQ,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,WAAW,CAAC,MAAM,CAAC;IAEvF,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;AACrC,CAAC;AAED;;;;;GAKG;AACH,SAAgB,aAAa,CAC3B,MAAuB;IAEvB,4DAA4D;IAC5D,MAAM,SAAS,GAAG,CAAC,CAAmB,EAAE,EAAE,CAAC,cAAc,CAAC,CAAC,CAAuC,CAAC;IACnG,MAAM,WAAW,GAAG,CAAC,CAAmB,EAAE,EAAE,CAAC,gBAAgB,CAAC,CAAC,CAAuC,CAAC;IAEvG,QAAQ,MAAM,EAAE,CAAC;QACf,KAAK,OAAO,CAAC,CAAC,OAAO,SAAS,CAAC;QAC/B,KAAK,SAAS,CAAC,CAAC,OAAO,WAAW,CAAC;QACnC,KAAK,UAAU,CAAC,CAAC,OAAO,iBAAiB,CAAC;QAC1C,KAAK,YAAY,CAAC,CAAC,OAAO,kBAAkB,CAAC;QAC7C,KAAK,UAAU,CAAC,CAAC,OAAO,iBAAiB,CAAC;QAC1C,OAAO,CAAC,CAAC,OAAO,WAAW,CAAC;IAC9B,CAAC;AACH,CAAC;AAED;;;;;;GAMG;AACH,SAAgB,mBAAmB,CACjC,MAA8B,EAC9B,cAAc,GAAG,EAAE;IAEnB,OAAO,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,KAAK,GAAG,cAAc,CAAC,CAAC;AAC/E,CAAC;AAED;;;;;GAKG;AACH,SAAgB,wBAAwB,CAAC,MAA8B;IAKrE,MAAM,KAAK,GAAG,MAAM,CAAC,WAAW,CAAC;IACjC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrB,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,YAAY,EAAE,CAAC,EAAE,aAAa,EAAE,CAAC,EAAE,CAAC;IACpE,CAAC;IAED,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC9C,MAAM,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;IAC3C,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;IAEzC,MAAM,YAAY,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,SAAS,CAAC,MAAM,CAAC;IACvF,MAAM,aAAa,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,UAAU,CAAC,MAAM,CAAC;IAE1F,MAAM,IAAI,GAAG,aAAa,GAAG,YAAY,CAAC;IAC1C,IAAI,SAA+C,CAAC;IAEpD,IAAI,IAAI,GAAG,CAAC;QAAE,SAAS,GAAG,WAAW,CAAC;SACjC,IAAI,IAAI,GAAG,CAAC,CAAC;QAAE,SAAS,GAAG,WAAW,CAAC;;QACvC,SAAS,GAAG,QAAQ,CAAC;IAE1B,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,aAAa,EAAE,CAAC;AACpD,CAAC","sourcesContent":["/**\n * ThinkHive SDK v3.0 - Conversation Evaluation API\n *\n * API for multi-turn conversation evaluation\n */\n\nimport { apiRequestWithData } from '../core/client';\n\n// ============================================================================\n// TYPES\n// ============================================================================\n\nexport type AggregateMethod = 'worst' | 'average' | 'weighted' | 'final_turn' | 'majority';\n\nexport interface SessionTrace {\n  id: string;\n  sessionId: string;\n  turnNumber: number;\n  userMessage: string;\n  agentResponse: string;\n  timestamp: string;\n  metadata?: Record<string, unknown>;\n}\n\nexport interface TurnEvaluation {\n  traceId: string;\n  turnNumber: number;\n  passed: boolean;\n  score: number;\n  reasoning: string;\n}\n\nexport interface ConversationEvalResult {\n  sessionId: string;\n  criterionId: string;\n  turnCount: number;\n  turnResults: TurnEvaluation[];\n  aggregatePassed: boolean;\n  aggregateScore: number;\n  aggregateMethod: AggregateMethod;\n  reasoning: string;\n  metadata?: Record<string, unknown>;\n}\n\nexport interface EvaluateConversationOptions {\n  sessionId: string;\n  criterionId: string;\n  options?: {\n    aggregateMethod?: AggregateMethod;\n    minTurns?: number;\n    maxTurns?: number;\n  };\n}\n\nexport interface AggregationMethodInfo {\n  id: AggregateMethod;\n  name: string;\n  description: string;\n  useCase: string;\n}\n\n// ============================================================================\n// CONVERSATION EVAL API CLIENT\n// ============================================================================\n\n/**\n * Conversation Evaluation API client for multi-turn evaluation\n */\nexport const conversationEval = {\n  /**\n   * Get traces for a conversation session\n   *\n   * @example\n   * ```typescript\n   * const traces = await conversationEval.getSessionTraces('session_123');\n   * console.log(`Conversation has ${traces.length} turns`);\n   * ```\n   */\n  async getSessionTraces(sessionId: string): Promise<SessionTrace[]> {\n    return apiRequestWithData<SessionTrace[]>(\n      `/conversation-eval/traces?sessionId=${sessionId}`,\n      { apiVersion: 'none' }\n    );\n  },\n\n  /**\n   * Run conversation-level evaluation\n   *\n   * @example\n   * ```typescript\n   * const result = await conversationEval.evaluate({\n   *   sessionId: 'session_123',\n   *   criterionId: 'criterion_456',\n   *   options: {\n   *     aggregateMethod: 'average',\n   *     minTurns: 2,\n   *   },\n   * });\n   * console.log(`Conversation score: ${result.aggregateScore}`);\n   * ```\n   */\n  async evaluate(options: EvaluateConversationOptions): Promise<ConversationEvalResult> {\n    return apiRequestWithData<ConversationEvalResult>('/conversation-eval/evaluate', {\n      method: 'POST',\n      body: options,\n      apiVersion: 'none',\n    });\n  },\n\n  /**\n   * Get available aggregation methods with descriptions\n   *\n   * @example\n   * ```typescript\n   * const methods = await conversationEval.getAggregationMethods();\n   * for (const method of methods) {\n   *   console.log(`${method.name}: ${method.description}`);\n   * }\n   * ```\n   */\n  async getAggregationMethods(): Promise<AggregationMethodInfo[]> {\n    return apiRequestWithData<AggregationMethodInfo[]>(\n      '/conversation-eval/aggregation-methods',\n      { apiVersion: 'none' }\n    );\n  },\n};\n\n// ============================================================================\n// HELPER FUNCTIONS\n// ============================================================================\n\n/**\n * Calculate worst-turn aggregation\n *\n * @param turnResults - Array of turn evaluation results\n * @returns Aggregated result using worst turn logic\n *\n * @example\n * ```typescript\n * const result = aggregateWorst(turnResults);\n * // Fails if any turn fails\n * ```\n */\nexport function aggregateWorst(turnResults: TurnEvaluation[] | number[]): number | { passed: boolean; score: number } {\n  if (turnResults.length === 0) return typeof turnResults[0] === 'number' ? 0 : { passed: false, score: 0 };\n\n  // Simple number array: return the minimum\n  if (typeof turnResults[0] === 'number') {\n    return Math.min(...(turnResults as number[]));\n  }\n\n  const results = turnResults as TurnEvaluation[];\n  const worstScore = Math.min(...results.map(t => t.score));\n  const passed = results.every(t => t.passed);\n\n  return { passed, score: worstScore };\n}\n\n/**\n * Calculate average aggregation\n *\n * Accepts TurnEvaluation[] or simple number[]:\n * - aggregateAverage(turnResults)  — returns {passed, score}\n * - aggregateAverage([0.6, 0.8])   — returns 0.7\n *\n * @param turnResults - Array of turn evaluation results or numbers\n * @returns Aggregated result\n */\nexport function aggregateAverage(turnResults: TurnEvaluation[] | number[]): number | { passed: boolean; score: number } {\n  if (turnResults.length === 0) return typeof turnResults[0] === 'number' ? 0 : { passed: false, score: 0 };\n\n  // Simple number array: return the mean\n  if (typeof turnResults[0] === 'number') {\n    const scores = turnResults as number[];\n    return scores.reduce((sum, s) => sum + s, 0) / scores.length;\n  }\n\n  const results = turnResults as TurnEvaluation[];\n  const avgScore = results.reduce((sum, t) => sum + t.score, 0) / results.length;\n  const passedCount = results.filter(t => t.passed).length;\n  const passed = passedCount > results.length / 2;\n\n  return { passed, score: avgScore };\n}\n\n/**\n * Calculate weighted average aggregation (later turns weighted more)\n *\n * @param turnResults - Array of turn evaluation results\n * @returns Aggregated result using weighted average logic\n *\n * @example\n * ```typescript\n * const result = aggregateWeighted(turnResults);\n * // Later turns have higher weight\n * ```\n */\nexport function aggregateWeighted(turnResults: TurnEvaluation[]): { passed: boolean; score: number } {\n  if (turnResults.length === 0) return { passed: false, score: 0 };\n\n  // Linear weights: 1, 2, 3, ...\n  let weightedSum = 0;\n  let weightTotal = 0;\n  let weightedPassSum = 0;\n\n  turnResults.forEach((turn, index) => {\n    const weight = index + 1;\n    weightedSum += turn.score * weight;\n    weightedPassSum += (turn.passed ? 1 : 0) * weight;\n    weightTotal += weight;\n  });\n\n  const avgScore = weightedSum / weightTotal;\n  const passed = (weightedPassSum / weightTotal) > 0.5;\n\n  return { passed, score: avgScore };\n}\n\n/**\n * Calculate final-turn aggregation\n *\n * @param turnResults - Array of turn evaluation results\n * @returns Aggregated result using only the final turn\n *\n * @example\n * ```typescript\n * const result = aggregateFinalTurn(turnResults);\n * // Only final turn matters\n * ```\n */\nexport function aggregateFinalTurn(turnResults: TurnEvaluation[]): { passed: boolean; score: number } {\n  if (turnResults.length === 0) return { passed: false, score: 0 };\n\n  const finalTurn = turnResults[turnResults.length - 1];\n  return { passed: finalTurn.passed, score: finalTurn.score };\n}\n\n/**\n * Calculate majority vote aggregation\n *\n * @param turnResults - Array of turn evaluation results\n * @returns Aggregated result using majority vote logic\n *\n * @example\n * ```typescript\n * const result = aggregateMajority(turnResults);\n * // Passes if majority of turns pass\n * ```\n */\nexport function aggregateMajority(turnResults: TurnEvaluation[]): { passed: boolean; score: number } {\n  if (turnResults.length === 0) return { passed: false, score: 0 };\n\n  const passedCount = turnResults.filter(t => t.passed).length;\n  const passed = passedCount > turnResults.length / 2;\n  const avgScore = turnResults.reduce((sum, t) => sum + t.score, 0) / turnResults.length;\n\n  return { passed, score: avgScore };\n}\n\n/**\n * Get appropriate aggregation function for a method\n *\n * @param method - Aggregation method name\n * @returns Aggregation function\n */\nexport function getAggregator(\n  method: AggregateMethod\n): (turnResults: TurnEvaluation[]) => { passed: boolean; score: number } {\n  // Wrap overloaded functions to match the expected signature\n  const wrapWorst = (t: TurnEvaluation[]) => aggregateWorst(t) as { passed: boolean; score: number };\n  const wrapAverage = (t: TurnEvaluation[]) => aggregateAverage(t) as { passed: boolean; score: number };\n\n  switch (method) {\n    case 'worst': return wrapWorst;\n    case 'average': return wrapAverage;\n    case 'weighted': return aggregateWeighted;\n    case 'final_turn': return aggregateFinalTurn;\n    case 'majority': return aggregateMajority;\n    default: return wrapAverage;\n  }\n}\n\n/**\n * Find problematic turns in a conversation\n *\n * @param result - Conversation evaluation result\n * @param scoreThreshold - Minimum acceptable score (default 70)\n * @returns Array of problematic turn results\n */\nexport function getProblematicTurns(\n  result: ConversationEvalResult,\n  scoreThreshold = 70\n): TurnEvaluation[] {\n  return result.turnResults.filter(t => !t.passed || t.score < scoreThreshold);\n}\n\n/**\n * Calculate conversation quality trend\n *\n * @param result - Conversation evaluation result\n * @returns Trend analysis\n */\nexport function analyzeConversationTrend(result: ConversationEvalResult): {\n  direction: 'improving' | 'declining' | 'stable';\n  firstHalfAvg: number;\n  secondHalfAvg: number;\n} {\n  const turns = result.turnResults;\n  if (turns.length < 2) {\n    return { direction: 'stable', firstHalfAvg: 0, secondHalfAvg: 0 };\n  }\n\n  const midpoint = Math.floor(turns.length / 2);\n  const firstHalf = turns.slice(0, midpoint);\n  const secondHalf = turns.slice(midpoint);\n\n  const firstHalfAvg = firstHalf.reduce((sum, t) => sum + t.score, 0) / firstHalf.length;\n  const secondHalfAvg = secondHalf.reduce((sum, t) => sum + t.score, 0) / secondHalf.length;\n\n  const diff = secondHalfAvg - firstHalfAvg;\n  let direction: 'improving' | 'declining' | 'stable';\n\n  if (diff > 5) direction = 'improving';\n  else if (diff < -5) direction = 'declining';\n  else direction = 'stable';\n\n  return { direction, firstHalfAvg, secondHalfAvg };\n}\n"]}
@@ -113,63 +113,68 @@ export declare const deterministicGraders: {
113
113
  */
114
114
  getTemplates(): Promise<RuleTemplate[]>;
115
115
  };
116
+ /**
117
+ * Named rule configuration with type, name, and config
118
+ */
119
+ export interface NamedRuleConfig {
120
+ type: RuleType;
121
+ name: string;
122
+ config: Record<string, unknown>;
123
+ }
116
124
  /**
117
125
  * Create a regex rule configuration
118
126
  *
127
+ * @param name - Rule name for identification
128
+ * @param field - Field to check ('output', 'input', etc.)
119
129
  * @param pattern - Regular expression pattern
120
130
  * @param flags - Regex flags (default: 'gi')
121
- * @returns Rule configuration object
131
+ * @returns Named rule configuration object
122
132
  *
123
133
  * @example
124
134
  * ```typescript
125
- * const config = createRegexRule('\\b(error|fail)\\b', 'gi');
135
+ * const rule = createRegexRule('email_check', 'output', '\\w+@\\w+\\.\\w+');
126
136
  * ```
127
137
  */
128
- export declare function createRegexRule(pattern: string, flags?: string): {
129
- pattern: string;
130
- flags: string;
131
- };
138
+ export declare function createRegexRule(name: string, field: string, pattern: string, flags?: string): NamedRuleConfig;
132
139
  /**
133
140
  * Create a contains rule configuration
134
141
  *
142
+ * @param name - Rule name for identification
143
+ * @param field - Field to check ('output', 'input', etc.)
135
144
  * @param values - Strings to check for
136
145
  * @param caseSensitive - Whether comparison is case-sensitive
137
- * @returns Rule configuration object
146
+ * @returns Named rule configuration object
138
147
  *
139
148
  * @example
140
149
  * ```typescript
141
- * const config = createContainsRule(['hello', 'hi', 'hey'], false);
150
+ * const rule = createContainsRule('greeting', 'output', ['hello', 'hi', 'hey']);
142
151
  * ```
143
152
  */
144
- export declare function createContainsRule(values: string[], caseSensitive?: boolean): {
145
- values: string[];
146
- caseSensitive: boolean;
147
- };
153
+ export declare function createContainsRule(name: string, field: string, values: string[], caseSensitive?: boolean): NamedRuleConfig;
148
154
  /**
149
155
  * Create a length rule configuration
150
156
  *
157
+ * @param name - Rule name for identification
151
158
  * @param min - Minimum length (optional)
152
159
  * @param max - Maximum length (optional)
153
- * @returns Rule configuration object
160
+ * @returns Named rule configuration object
154
161
  *
155
162
  * @example
156
163
  * ```typescript
157
- * const config = createLengthRule(50, 1000);
164
+ * const rule = createLengthRule('response_length', 50, 1000);
158
165
  * ```
159
166
  */
160
- export declare function createLengthRule(min?: number, max?: number): {
161
- min?: number;
162
- max?: number;
163
- };
167
+ export declare function createLengthRule(name: string, min?: number, max?: number): NamedRuleConfig;
164
168
  /**
165
169
  * Create a JSON schema rule configuration
166
170
  *
171
+ * @param name - Rule name for identification
167
172
  * @param schema - JSON Schema object
168
- * @returns Rule configuration object
173
+ * @returns Named rule configuration object
169
174
  *
170
175
  * @example
171
176
  * ```typescript
172
- * const config = createJsonSchemaRule({
177
+ * const rule = createJsonSchemaRule('response_format', {
173
178
  * type: 'object',
174
179
  * required: ['name', 'email'],
175
180
  * properties: {
@@ -179,27 +184,31 @@ export declare function createLengthRule(min?: number, max?: number): {
179
184
  * });
180
185
  * ```
181
186
  */
182
- export declare function createJsonSchemaRule(schema: Record<string, unknown>): {
183
- schema: Record<string, unknown>;
184
- };
187
+ export declare function createJsonSchemaRule(name: string, schema: Record<string, unknown>): NamedRuleConfig;
185
188
  /**
186
189
  * Check if all rule results passed
187
190
  *
188
- * @param results - Array of rule results
191
+ * @param results - Array of rule results (or any objects with a `passed` boolean)
189
192
  * @returns Whether all rules passed
190
193
  */
191
- export declare function allRulesPassed(results: RuleResult[]): boolean;
194
+ export declare function allRulesPassed(results: Array<{
195
+ passed: boolean;
196
+ }>): boolean;
192
197
  /**
193
198
  * Get failed rules from results
194
199
  *
195
- * @param results - Array of rule results
200
+ * @param results - Array of rule results (or any objects with a `passed` boolean)
196
201
  * @returns Array of failed rule results
197
202
  */
198
- export declare function getFailedRules(results: RuleResult[]): RuleResult[];
203
+ export declare function getFailedRules<T extends {
204
+ passed: boolean;
205
+ }>(results: T[]): T[];
199
206
  /**
200
207
  * Calculate average score from rule results
201
208
  *
202
209
  * @param results - Array of rule results
203
210
  * @returns Average score (0-100)
204
211
  */
205
- export declare function calculateAverageScore(results: RuleResult[]): number;
212
+ export declare function calculateAverageScore(results: Array<{
213
+ score: number;
214
+ }>): number;
@@ -89,63 +89,66 @@ exports.deterministicGraders = {
89
89
  return (0, client_1.apiRequestWithData)('/deterministic-graders/templates', { apiVersion: 'none' });
90
90
  },
91
91
  };
92
- // ============================================================================
93
- // HELPER FUNCTIONS
94
- // ============================================================================
95
92
  /**
96
93
  * Create a regex rule configuration
97
94
  *
95
+ * @param name - Rule name for identification
96
+ * @param field - Field to check ('output', 'input', etc.)
98
97
  * @param pattern - Regular expression pattern
99
98
  * @param flags - Regex flags (default: 'gi')
100
- * @returns Rule configuration object
99
+ * @returns Named rule configuration object
101
100
  *
102
101
  * @example
103
102
  * ```typescript
104
- * const config = createRegexRule('\\b(error|fail)\\b', 'gi');
103
+ * const rule = createRegexRule('email_check', 'output', '\\w+@\\w+\\.\\w+');
105
104
  * ```
106
105
  */
107
- function createRegexRule(pattern, flags = 'gi') {
108
- return { pattern, flags };
106
+ function createRegexRule(name, field, pattern, flags = 'gi') {
107
+ return { type: 'regex', name, config: { field, pattern, flags } };
109
108
  }
110
109
  /**
111
110
  * Create a contains rule configuration
112
111
  *
112
+ * @param name - Rule name for identification
113
+ * @param field - Field to check ('output', 'input', etc.)
113
114
  * @param values - Strings to check for
114
115
  * @param caseSensitive - Whether comparison is case-sensitive
115
- * @returns Rule configuration object
116
+ * @returns Named rule configuration object
116
117
  *
117
118
  * @example
118
119
  * ```typescript
119
- * const config = createContainsRule(['hello', 'hi', 'hey'], false);
120
+ * const rule = createContainsRule('greeting', 'output', ['hello', 'hi', 'hey']);
120
121
  * ```
121
122
  */
122
- function createContainsRule(values, caseSensitive = false) {
123
- return { values, caseSensitive };
123
+ function createContainsRule(name, field, values, caseSensitive = false) {
124
+ return { type: 'contains', name, config: { field, values, caseSensitive } };
124
125
  }
125
126
  /**
126
127
  * Create a length rule configuration
127
128
  *
129
+ * @param name - Rule name for identification
128
130
  * @param min - Minimum length (optional)
129
131
  * @param max - Maximum length (optional)
130
- * @returns Rule configuration object
132
+ * @returns Named rule configuration object
131
133
  *
132
134
  * @example
133
135
  * ```typescript
134
- * const config = createLengthRule(50, 1000);
136
+ * const rule = createLengthRule('response_length', 50, 1000);
135
137
  * ```
136
138
  */
137
- function createLengthRule(min, max) {
138
- return { min, max };
139
+ function createLengthRule(name, min, max) {
140
+ return { type: 'length', name, config: { min, max } };
139
141
  }
140
142
  /**
141
143
  * Create a JSON schema rule configuration
142
144
  *
145
+ * @param name - Rule name for identification
143
146
  * @param schema - JSON Schema object
144
- * @returns Rule configuration object
147
+ * @returns Named rule configuration object
145
148
  *
146
149
  * @example
147
150
  * ```typescript
148
- * const config = createJsonSchemaRule({
151
+ * const rule = createJsonSchemaRule('response_format', {
149
152
  * type: 'object',
150
153
  * required: ['name', 'email'],
151
154
  * properties: {
@@ -155,13 +158,13 @@ function createLengthRule(min, max) {
155
158
  * });
156
159
  * ```
157
160
  */
158
- function createJsonSchemaRule(schema) {
159
- return { schema };
161
+ function createJsonSchemaRule(name, schema) {
162
+ return { type: 'json_schema', name, config: { schema } };
160
163
  }
161
164
  /**
162
165
  * Check if all rule results passed
163
166
  *
164
- * @param results - Array of rule results
167
+ * @param results - Array of rule results (or any objects with a `passed` boolean)
165
168
  * @returns Whether all rules passed
166
169
  */
167
170
  function allRulesPassed(results) {
@@ -170,7 +173,7 @@ function allRulesPassed(results) {
170
173
  /**
171
174
  * Get failed rules from results
172
175
  *
173
- * @param results - Array of rule results
176
+ * @param results - Array of rule results (or any objects with a `passed` boolean)
174
177
  * @returns Array of failed rule results
175
178
  */
176
179
  function getFailedRules(results) {
@@ -188,4 +191,4 @@ function calculateAverageScore(results) {
188
191
  const sum = results.reduce((acc, r) => acc + r.score, 0);
189
192
  return sum / results.length;
190
193
  }
191
- //# sourceMappingURL=data:application/json;base64,{"version":3,"file":"deterministic-graders.js","sourceRoot":"","sources":["../../src/api/deterministic-graders.ts"],"names":[],"mappings":";AAAA;;;;GAIG;;;AAsLH,0CAEC;AAcD,gDAKC;AAcD,4CAEC;AAoBD,oDAEC;AAQD,wCAEC;AAQD,wCAEC;AAQD,sDAIC;AA/QD,2CAAoD;AA8EpD,+EAA+E;AAC/E,mCAAmC;AACnC,+EAA+E;AAE/E;;GAEG;AACU,QAAA,oBAAoB,GAAG;IAClC;;;;;;;;;;;OAWG;IACH,KAAK,CAAC,QAAQ,CAAC,OAAwB;QACrC,OAAO,IAAA,2BAAkB,EAA0B,iCAAiC,EAAE;YACpF,MAAM,EAAE,MAAM;YACd,IAAI,EAAE,OAAO;YACb,UAAU,EAAE,MAAM;SACnB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;;;;;;;OAcG;IACH,KAAK,CAAC,YAAY,CAAC,OAA4B;QAC7C,OAAO,IAAA,2BAAkB,EAAqB,sCAAsC,EAAE;YACpF,MAAM,EAAE,MAAM;YACd,IAAI,EAAE,OAAO;YACb,UAAU,EAAE,MAAM;SACnB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;;;OAUG;IACH,KAAK,CAAC,YAAY;QAChB,OAAO,IAAA,2BAAkB,EACvB,mCAAmC,EACnC,EAAE,UAAU,EAAE,MAAM,EAAE,CACvB,CAAC;IACJ,CAAC;IAED;;;;;;;;OAQG;IACH,KAAK,CAAC,YAAY;QAChB,OAAO,IAAA,2BAAkB,EACvB,kCAAkC,EAClC,EAAE,UAAU,EAAE,MAAM,EAAE,CACvB,CAAC;IACJ,CAAC;CACF,CAAC;AAEF,+EAA+E;AAC/E,mBAAmB;AACnB,+EAA+E;AAE/E;;;;;;;;;;;GAWG;AACH,SAAgB,eAAe,CAAC,OAAe,EAAE,KAAK,GAAG,IAAI;IAC3D,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;AAC5B,CAAC;AAED;;;;;;;;;;;GAWG;AACH,SAAgB,kBAAkB,CAChC,MAAgB,EAChB,aAAa,GAAG,KAAK;IAErB,OAAO,EAAE,MAAM,EAAE,aAAa,EAAE,CAAC;AACnC,CAAC;AAED;;;;;;;;;;;GAWG;AACH,SAAgB,gBAAgB,CAAC,GAAY,EAAE,GAAY;IACzD,OAAO,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC;AACtB,CAAC;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,SAAgB,oBAAoB,CAAC,MAA+B;IAClE,OAAO,EAAE,MAAM,EAAE,CAAC;AACpB,CAAC;AAED;;;;;GAKG;AACH,SAAgB,cAAc,CAAC,OAAqB;IAClD,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;AACtC,CAAC;AAED;;;;;GAKG;AACH,SAAgB,cAAc,CAAC,OAAqB;IAClD,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;AACxC,CAAC;AAED;;;;;GAKG;AACH,SAAgB,qBAAqB,CAAC,OAAqB;IACzD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IACzD,OAAO,GAAG,GAAG,OAAO,CAAC,MAAM,CAAC;AAC9B,CAAC","sourcesContent":["/**\n * ThinkHive SDK v3.0 - Deterministic Graders API\n *\n * API for running deterministic (code-based) evaluations\n */\n\nimport { apiRequestWithData } from '../core/client';\n\n// ============================================================================\n// TYPES\n// ============================================================================\n\nexport type RuleType =\n  | 'regex'\n  | 'contains'\n  | 'not_contains'\n  | 'json_valid'\n  | 'json_schema'\n  | 'length'\n  | 'pii_check'\n  | 'sentiment'\n  | 'latency'\n  | 'token_count';\n\nexport interface DeterministicEvalResult {\n  passed: boolean;\n  score: number;\n  reasoning: string;\n  ruleResults?: RuleResult[];\n  metadata?: Record<string, unknown>;\n}\n\nexport interface RuleResult {\n  ruleId: string;\n  ruleName: string;\n  ruleType: RuleType;\n  passed: boolean;\n  score: number;\n  details?: string;\n}\n\nexport interface EvaluateOptions {\n  traceId: string;\n  criterionId: string;\n}\n\nexport interface BulkEvaluateOptions {\n  evaluations: Array<{\n    traceId: string;\n    criterionId: string;\n  }>;\n}\n\nexport interface BulkEvaluateResult {\n  results: Array<{\n    traceId: string;\n    criterionId: string;\n    passed: boolean;\n    score: number;\n    error?: string;\n  }>;\n  summary: {\n    total: number;\n    passed: number;\n    failed: number;\n    passRate: number;\n  };\n}\n\nexport interface RuleTypeInfo {\n  id: RuleType;\n  name: string;\n  description: string;\n  configFields: string[];\n}\n\nexport interface RuleTemplate {\n  id: string;\n  name: string;\n  description: string;\n  ruleType: RuleType;\n  config: Record<string, unknown>;\n}\n\n// ============================================================================\n// DETERMINISTIC GRADERS API CLIENT\n// ============================================================================\n\n/**\n * Deterministic Graders API client for code-based evaluations\n */\nexport const deterministicGraders = {\n  /**\n   * Run deterministic evaluation on a single trace\n   *\n   * @example\n   * ```typescript\n   * const result = await deterministicGraders.evaluate({\n   *   traceId: 'trace_123',\n   *   criterionId: 'criterion_456',\n   * });\n   * console.log(`Passed: ${result.passed}, Score: ${result.score}`);\n   * ```\n   */\n  async evaluate(options: EvaluateOptions): Promise<DeterministicEvalResult> {\n    return apiRequestWithData<DeterministicEvalResult>('/deterministic-graders/evaluate', {\n      method: 'POST',\n      body: options,\n      apiVersion: 'none',\n    });\n  },\n\n  /**\n   * Run deterministic evaluations on multiple traces\n   *\n   * @example\n   * ```typescript\n   * const { results, summary } = await deterministicGraders.bulkEvaluate({\n   *   evaluations: [\n   *     { traceId: 'trace_1', criterionId: 'criterion_456' },\n   *     { traceId: 'trace_2', criterionId: 'criterion_456' },\n   *     { traceId: 'trace_3', criterionId: 'criterion_456' },\n   *   ],\n   * });\n   * console.log(`Pass rate: ${summary.passRate * 100}%`);\n   * ```\n   */\n  async bulkEvaluate(options: BulkEvaluateOptions): Promise<BulkEvaluateResult> {\n    return apiRequestWithData<BulkEvaluateResult>('/deterministic-graders/bulk-evaluate', {\n      method: 'POST',\n      body: options,\n      apiVersion: 'none',\n    });\n  },\n\n  /**\n   * Get available rule types with descriptions\n   *\n   * @example\n   * ```typescript\n   * const ruleTypes = await deterministicGraders.getRuleTypes();\n   * for (const type of ruleTypes) {\n   *   console.log(`${type.name}: ${type.description}`);\n   * }\n   * ```\n   */\n  async getRuleTypes(): Promise<RuleTypeInfo[]> {\n    return apiRequestWithData<RuleTypeInfo[]>(\n      '/deterministic-graders/rule-types',\n      { apiVersion: 'none' }\n    );\n  },\n\n  /**\n   * Get rule templates\n   *\n   * @example\n   * ```typescript\n   * const templates = await deterministicGraders.getTemplates();\n   * const noPiiTemplate = templates.find(t => t.id === 'no_pii');\n   * ```\n   */\n  async getTemplates(): Promise<RuleTemplate[]> {\n    return apiRequestWithData<RuleTemplate[]>(\n      '/deterministic-graders/templates',\n      { apiVersion: 'none' }\n    );\n  },\n};\n\n// ============================================================================\n// HELPER FUNCTIONS\n// ============================================================================\n\n/**\n * Create a regex rule configuration\n *\n * @param pattern - Regular expression pattern\n * @param flags - Regex flags (default: 'gi')\n * @returns Rule configuration object\n *\n * @example\n * ```typescript\n * const config = createRegexRule('\\\\b(error|fail)\\\\b', 'gi');\n * ```\n */\nexport function createRegexRule(pattern: string, flags = 'gi'): { pattern: string; flags: string } {\n  return { pattern, flags };\n}\n\n/**\n * Create a contains rule configuration\n *\n * @param values - Strings to check for\n * @param caseSensitive - Whether comparison is case-sensitive\n * @returns Rule configuration object\n *\n * @example\n * ```typescript\n * const config = createContainsRule(['hello', 'hi', 'hey'], false);\n * ```\n */\nexport function createContainsRule(\n  values: string[],\n  caseSensitive = false\n): { values: string[]; caseSensitive: boolean } {\n  return { values, caseSensitive };\n}\n\n/**\n * Create a length rule configuration\n *\n * @param min - Minimum length (optional)\n * @param max - Maximum length (optional)\n * @returns Rule configuration object\n *\n * @example\n * ```typescript\n * const config = createLengthRule(50, 1000);\n * ```\n */\nexport function createLengthRule(min?: number, max?: number): { min?: number; max?: number } {\n  return { min, max };\n}\n\n/**\n * Create a JSON schema rule configuration\n *\n * @param schema - JSON Schema object\n * @returns Rule configuration object\n *\n * @example\n * ```typescript\n * const config = createJsonSchemaRule({\n *   type: 'object',\n *   required: ['name', 'email'],\n *   properties: {\n *     name: { type: 'string' },\n *     email: { type: 'string', format: 'email' },\n *   },\n * });\n * ```\n */\nexport function createJsonSchemaRule(schema: Record<string, unknown>): { schema: Record<string, unknown> } {\n  return { schema };\n}\n\n/**\n * Check if all rule results passed\n *\n * @param results - Array of rule results\n * @returns Whether all rules passed\n */\nexport function allRulesPassed(results: RuleResult[]): boolean {\n  return results.every(r => r.passed);\n}\n\n/**\n * Get failed rules from results\n *\n * @param results - Array of rule results\n * @returns Array of failed rule results\n */\nexport function getFailedRules(results: RuleResult[]): RuleResult[] {\n  return results.filter(r => !r.passed);\n}\n\n/**\n * Calculate average score from rule results\n *\n * @param results - Array of rule results\n * @returns Average score (0-100)\n */\nexport function calculateAverageScore(results: RuleResult[]): number {\n  if (results.length === 0) return 0;\n  const sum = results.reduce((acc, r) => acc + r.score, 0);\n  return sum / results.length;\n}\n"]}
194
+ //# sourceMappingURL=data:application/json;base64,{"version":3,"file":"deterministic-graders.js","sourceRoot":"","sources":["../../src/api/deterministic-graders.ts"],"names":[],"mappings":";AAAA;;;;GAIG;;;AAiMH,0CAEC;AAgBD,gDAOC;AAeD,4CAEC;AAqBD,oDAEC;AAQD,wCAEC;AAQD,wCAEC;AAQD,sDAIC;AAhSD,2CAAoD;AA8EpD,+EAA+E;AAC/E,mCAAmC;AACnC,+EAA+E;AAE/E;;GAEG;AACU,QAAA,oBAAoB,GAAG;IAClC;;;;;;;;;;;OAWG;IACH,KAAK,CAAC,QAAQ,CAAC,OAAwB;QACrC,OAAO,IAAA,2BAAkB,EAA0B,iCAAiC,EAAE;YACpF,MAAM,EAAE,MAAM;YACd,IAAI,EAAE,OAAO;YACb,UAAU,EAAE,MAAM;SACnB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;;;;;;;OAcG;IACH,KAAK,CAAC,YAAY,CAAC,OAA4B;QAC7C,OAAO,IAAA,2BAAkB,EAAqB,sCAAsC,EAAE;YACpF,MAAM,EAAE,MAAM;YACd,IAAI,EAAE,OAAO;YACb,UAAU,EAAE,MAAM;SACnB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;;;OAUG;IACH,KAAK,CAAC,YAAY;QAChB,OAAO,IAAA,2BAAkB,EACvB,mCAAmC,EACnC,EAAE,UAAU,EAAE,MAAM,EAAE,CACvB,CAAC;IACJ,CAAC;IAED;;;;;;;;OAQG;IACH,KAAK,CAAC,YAAY;QAChB,OAAO,IAAA,2BAAkB,EACvB,kCAAkC,EAClC,EAAE,UAAU,EAAE,MAAM,EAAE,CACvB,CAAC;IACJ,CAAC;CACF,CAAC;AAeF;;;;;;;;;;;;;GAaG;AACH,SAAgB,eAAe,CAAC,IAAY,EAAE,KAAa,EAAE,OAAe,EAAE,KAAK,GAAG,IAAI;IACxF,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,EAAE,CAAC;AACpE,CAAC;AAED;;;;;;;;;;;;;GAaG;AACH,SAAgB,kBAAkB,CAChC,IAAY,EACZ,KAAa,EACb,MAAgB,EAChB,aAAa,GAAG,KAAK;IAErB,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,aAAa,EAAE,EAAE,CAAC;AAC9E,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,SAAgB,gBAAgB,CAAC,IAAY,EAAE,GAAY,EAAE,GAAY;IACvE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC;AACxD,CAAC;AAED;;;;;;;;;;;;;;;;;;GAkBG;AACH,SAAgB,oBAAoB,CAAC,IAAY,EAAE,MAA+B;IAChF,OAAO,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,MAAM,EAAE,EAAE,CAAC;AAC3D,CAAC;AAED;;;;;GAKG;AACH,SAAgB,cAAc,CAAC,OAAmC;IAChE,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;AACtC,CAAC;AAED;;;;;GAKG;AACH,SAAgB,cAAc,CAAgC,OAAY;IACxE,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;AACxC,CAAC;AAED;;;;;GAKG;AACH,SAAgB,qBAAqB,CAAC,OAAiC;IACrE,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IACzD,OAAO,GAAG,GAAG,OAAO,CAAC,MAAM,CAAC;AAC9B,CAAC","sourcesContent":["/**\n * ThinkHive SDK v3.0 - Deterministic Graders API\n *\n * API for running deterministic (code-based) evaluations\n */\n\nimport { apiRequestWithData } from '../core/client';\n\n// ============================================================================\n// TYPES\n// ============================================================================\n\nexport type RuleType =\n  | 'regex'\n  | 'contains'\n  | 'not_contains'\n  | 'json_valid'\n  | 'json_schema'\n  | 'length'\n  | 'pii_check'\n  | 'sentiment'\n  | 'latency'\n  | 'token_count';\n\nexport interface DeterministicEvalResult {\n  passed: boolean;\n  score: number;\n  reasoning: string;\n  ruleResults?: RuleResult[];\n  metadata?: Record<string, unknown>;\n}\n\nexport interface RuleResult {\n  ruleId: string;\n  ruleName: string;\n  ruleType: RuleType;\n  passed: boolean;\n  score: number;\n  details?: string;\n}\n\nexport interface EvaluateOptions {\n  traceId: string;\n  criterionId: string;\n}\n\nexport interface BulkEvaluateOptions {\n  evaluations: Array<{\n    traceId: string;\n    criterionId: string;\n  }>;\n}\n\nexport interface BulkEvaluateResult {\n  results: Array<{\n    traceId: string;\n    criterionId: string;\n    passed: boolean;\n    score: number;\n    error?: string;\n  }>;\n  summary: {\n    total: number;\n    passed: number;\n    failed: number;\n    passRate: number;\n  };\n}\n\nexport interface RuleTypeInfo {\n  id: RuleType;\n  name: string;\n  description: string;\n  configFields: string[];\n}\n\nexport interface RuleTemplate {\n  id: string;\n  name: string;\n  description: string;\n  ruleType: RuleType;\n  config: Record<string, unknown>;\n}\n\n// ============================================================================\n// DETERMINISTIC GRADERS API CLIENT\n// ============================================================================\n\n/**\n * Deterministic Graders API client for code-based evaluations\n */\nexport const deterministicGraders = {\n  /**\n   * Run deterministic evaluation on a single trace\n   *\n   * @example\n   * ```typescript\n   * const result = await deterministicGraders.evaluate({\n   *   traceId: 'trace_123',\n   *   criterionId: 'criterion_456',\n   * });\n   * console.log(`Passed: ${result.passed}, Score: ${result.score}`);\n   * ```\n   */\n  async evaluate(options: EvaluateOptions): Promise<DeterministicEvalResult> {\n    return apiRequestWithData<DeterministicEvalResult>('/deterministic-graders/evaluate', {\n      method: 'POST',\n      body: options,\n      apiVersion: 'none',\n    });\n  },\n\n  /**\n   * Run deterministic evaluations on multiple traces\n   *\n   * @example\n   * ```typescript\n   * const { results, summary } = await deterministicGraders.bulkEvaluate({\n   *   evaluations: [\n   *     { traceId: 'trace_1', criterionId: 'criterion_456' },\n   *     { traceId: 'trace_2', criterionId: 'criterion_456' },\n   *     { traceId: 'trace_3', criterionId: 'criterion_456' },\n   *   ],\n   * });\n   * console.log(`Pass rate: ${summary.passRate * 100}%`);\n   * ```\n   */\n  async bulkEvaluate(options: BulkEvaluateOptions): Promise<BulkEvaluateResult> {\n    return apiRequestWithData<BulkEvaluateResult>('/deterministic-graders/bulk-evaluate', {\n      method: 'POST',\n      body: options,\n      apiVersion: 'none',\n    });\n  },\n\n  /**\n   * Get available rule types with descriptions\n   *\n   * @example\n   * ```typescript\n   * const ruleTypes = await deterministicGraders.getRuleTypes();\n   * for (const type of ruleTypes) {\n   *   console.log(`${type.name}: ${type.description}`);\n   * }\n   * ```\n   */\n  async getRuleTypes(): Promise<RuleTypeInfo[]> {\n    return apiRequestWithData<RuleTypeInfo[]>(\n      '/deterministic-graders/rule-types',\n      { apiVersion: 'none' }\n    );\n  },\n\n  /**\n   * Get rule templates\n   *\n   * @example\n   * ```typescript\n   * const templates = await deterministicGraders.getTemplates();\n   * const noPiiTemplate = templates.find(t => t.id === 'no_pii');\n   * ```\n   */\n  async getTemplates(): Promise<RuleTemplate[]> {\n    return apiRequestWithData<RuleTemplate[]>(\n      '/deterministic-graders/templates',\n      { apiVersion: 'none' }\n    );\n  },\n};\n\n// ============================================================================\n// HELPER FUNCTIONS\n// ============================================================================\n\n/**\n * Named rule configuration with type, name, and config\n */\nexport interface NamedRuleConfig {\n  type: RuleType;\n  name: string;\n  config: Record<string, unknown>;\n}\n\n/**\n * Create a regex rule configuration\n *\n * @param name - Rule name for identification\n * @param field - Field to check ('output', 'input', etc.)\n * @param pattern - Regular expression pattern\n * @param flags - Regex flags (default: 'gi')\n * @returns Named rule configuration object\n *\n * @example\n * ```typescript\n * const rule = createRegexRule('email_check', 'output', '\\\\w+@\\\\w+\\\\.\\\\w+');\n * ```\n */\nexport function createRegexRule(name: string, field: string, pattern: string, flags = 'gi'): NamedRuleConfig {\n  return { type: 'regex', name, config: { field, pattern, flags } };\n}\n\n/**\n * Create a contains rule configuration\n *\n * @param name - Rule name for identification\n * @param field - Field to check ('output', 'input', etc.)\n * @param values - Strings to check for\n * @param caseSensitive - Whether comparison is case-sensitive\n * @returns Named rule configuration object\n *\n * @example\n * ```typescript\n * const rule = createContainsRule('greeting', 'output', ['hello', 'hi', 'hey']);\n * ```\n */\nexport function createContainsRule(\n  name: string,\n  field: string,\n  values: string[],\n  caseSensitive = false\n): NamedRuleConfig {\n  return { type: 'contains', name, config: { field, values, caseSensitive } };\n}\n\n/**\n * Create a length rule configuration\n *\n * @param name - Rule name for identification\n * @param min - Minimum length (optional)\n * @param max - Maximum length (optional)\n * @returns Named rule configuration object\n *\n * @example\n * ```typescript\n * const rule = createLengthRule('response_length', 50, 1000);\n * ```\n */\nexport function createLengthRule(name: string, min?: number, max?: number): NamedRuleConfig {\n  return { type: 'length', name, config: { min, max } };\n}\n\n/**\n * Create a JSON schema rule configuration\n *\n * @param name - Rule name for identification\n * @param schema - JSON Schema object\n * @returns Named rule configuration object\n *\n * @example\n * ```typescript\n * const rule = createJsonSchemaRule('response_format', {\n *   type: 'object',\n *   required: ['name', 'email'],\n *   properties: {\n *     name: { type: 'string' },\n *     email: { type: 'string', format: 'email' },\n *   },\n * });\n * ```\n */\nexport function createJsonSchemaRule(name: string, schema: Record<string, unknown>): NamedRuleConfig {\n  return { type: 'json_schema', name, config: { schema } };\n}\n\n/**\n * Check if all rule results passed\n *\n * @param results - Array of rule results (or any objects with a `passed` boolean)\n * @returns Whether all rules passed\n */\nexport function allRulesPassed(results: Array<{ passed: boolean }>): boolean {\n  return results.every(r => r.passed);\n}\n\n/**\n * Get failed rules from results\n *\n * @param results - Array of rule results (or any objects with a `passed` boolean)\n * @returns Array of failed rule results\n */\nexport function getFailedRules<T extends { passed: boolean }>(results: T[]): T[] {\n  return results.filter(r => !r.passed);\n}\n\n/**\n * Calculate average score from rule results\n *\n * @param results - Array of rule results\n * @returns Average score (0-100)\n */\nexport function calculateAverageScore(results: Array<{ score: number }>): number {\n  if (results.length === 0) return 0;\n  const sum = results.reduce((acc, r) => acc + r.score, 0);\n  return sum / results.length;\n}\n"]}
@@ -93,6 +93,8 @@ export declare const llmCosts: {
93
93
  * @returns Optimization stats with recommendations
94
94
  */
95
95
  getOptimizationStats(): Promise<OptimizationStats>;
96
+ /** Alias for getSummary() */
97
+ summary(opts?: CostQueryOptions): Promise<CostSummary>;
96
98
  };
97
99
  /**
98
100
  * Format a cost amount as a currency string
@@ -64,6 +64,10 @@ exports.llmCosts = {
64
64
  apiVersion: 'none',
65
65
  });
66
66
  },
67
+ /** Alias for getSummary() */
68
+ async summary(opts) {
69
+ return this.getSummary(opts);
70
+ },
67
71
  };
68
72
  exports.default = exports.llmCosts;
69
73
  // ============================================================================
@@ -78,4 +82,4 @@ exports.default = exports.llmCosts;
78
82
  function formatCost(amount) {
79
83
  return `$${amount.toFixed(2)}`;
80
84
  }
81
- //# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoibGxtLWNvc3RzLmpzIiwic291cmNlUm9vdCI6IiIsInNvdXJjZXMiOlsiLi4vLi4vc3JjL2FwaS9sbG0tY29zdHMudHMiXSwibmFtZXMiOltdLCJtYXBwaW5ncyI6IjtBQUFBOzs7O0dBSUc7OztBQXNKSCxnQ0FFQztBQXRKRCwyQ0FBb0Q7QUF5RXBELCtFQUErRTtBQUMvRSx1QkFBdUI7QUFDdkIsK0VBQStFO0FBRS9FOztHQUVHO0FBQ1UsUUFBQSxRQUFRLEdBQUc7SUFDdEI7Ozs7O09BS0c7SUFDSCxLQUFLLENBQUMsVUFBVSxDQUFDLElBQXVCO1FBQ3RDLE1BQU0sTUFBTSxHQUFHLElBQUksZUFBZSxFQUFFLENBQUM7UUFDckMsSUFBSSxJQUFJLEVBQUUsTUFBTTtZQUFFLE1BQU0sQ0FBQyxHQUFHLENBQUMsUUFBUSxFQUFFLElBQUksQ0FBQyxNQUFNLENBQUMsQ0FBQztRQUVwRCxNQUFNLEtBQUssR0FBRyxNQUFNLENBQUMsUUFBUSxFQUFFLENBQUM7UUFDaEMsT0FBTyxJQUFBLDJCQUFrQixFQUFjLHFCQUFxQixLQUFLLENBQUMsQ0FBQyxDQUFDLElBQUksS0FBSyxFQUFFLENBQUMsQ0FBQyxDQUFDLEVBQUUsRUFBRSxFQUFFO1lBQ3RGLFVBQVUsRUFBRSxNQUFNO1NBQ25CLENBQUMsQ0FBQztJQUNMLENBQUM7SUFFRDs7Ozs7O09BTUc7SUFDSCxLQUFLLENBQUMsWUFBWSxDQUFDLE9BQWUsRUFBRSxJQUF1QjtRQUN6RCxNQUFNLE1BQU0sR0FBRyxJQUFJLGVBQWUsRUFBRSxDQUFDO1FBQ3JDLElBQUksSUFBSSxFQUFFLE1BQU07WUFBRSxNQUFNLENBQUMsR0FBRyxDQUFDLFFBQVEsRUFBRSxJQUFJLENBQUMsTUFBTSxDQUFDLENBQUM7UUFFcEQsTUFBTSxLQUFLLEdBQUcsTUFBTSxDQUFDLFFBQVEsRUFBRSxDQUFDO1FBQ2hDLE9BQU8sSUFBQSwyQkFBa0IsRUFDdkIsd0JBQXdCLE9BQU8sR0FBRyxLQUFLLENBQUMsQ0FBQyxDQUFDLElBQUksS0FBSyxFQUFFLENBQUMsQ0FBQyxDQUFDLEVBQUUsRUFBRSxFQUM1RCxFQUFFLFVBQVUsRUFBRSxNQUFNLEVBQUUsQ0FDdkIsQ0FBQztJQUNKLENBQUM7SUFFRDs7OztPQUlHO0lBQ0gsS0FBSyxDQUFDLFVBQVU7UUFDZCxPQUFPLElBQUEsMkJBQWtCLEVBQWMsb0JBQW9CLEVBQUU7WUFDM0QsVUFBVSxFQUFFLE1BQU07U0FDbkIsQ0FBQyxDQUFDO0lBQ0wsQ0FBQztJQUVEOzs7O09BSUc7SUFDSCxLQUFLLENBQUMsb0JBQW9CO1FBQ3hCLE9BQU8sSUFBQSwyQkFBa0IsRUFBb0IsK0JBQStCLEVBQUU7WUFDNUUsVUFBVSxFQUFFLE1BQU07U0FDbkIsQ0FBQyxDQUFDO0lBQ0wsQ0FBQztDQUNGLENBQUM7QUFnQm1CLGtCQXhFUixnQkFBUSxDQXdFTztBQWQ1QiwrRUFBK0U7QUFDL0UsbUJBQW1CO0FBQ25CLCtFQUErRTtBQUUvRTs7Ozs7R0FLRztBQUNILFNBQWdCLFVBQVUsQ0FBQyxNQUFjO0lBQ3ZDLE9BQU8sSUFBSSxNQUFNLENBQUMsT0FBTyxDQUFDLENBQUMsQ0FBQyxFQUFFLENBQUM7QUFDakMsQ0FBQyIsInNvdXJjZXNDb250ZW50IjpbIi8qKlxuICogVGhpbmtIaXZlIFNESyAtIExMTSBDb3N0cyBBUElcbiAqXG4gKiBMTE0gY29zdCB0cmFja2luZyBhbmQgb3B0aW1pemF0aW9uIGFuYWx5dGljc1xuICovXG5cbmltcG9ydCB7IGFwaVJlcXVlc3RXaXRoRGF0YSB9IGZyb20gJy4uL2NvcmUvY2xpZW50JztcblxuLy8gPT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PVxuLy8gVFlQRVNcbi8vID09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT1cblxuLyoqIE9wdGlvbnMgZm9yIGNvc3Qgc3VtbWFyeSBhbmQgYnJlYWtkb3duICovXG5leHBvcnQgaW50ZXJmYWNlIENvc3RRdWVyeU9wdGlvbnMge1xuICBwZXJpb2Q/OiAnZGF5JyB8ICd3ZWVrJyB8ICdtb250aCcgfCAncXVhcnRlcicgfCAneWVhcic7XG59XG5cbi8qKiBDb3N0IHN1bW1hcnkgYWNyb3NzIGFsbCBhZ2VudHMgKi9cbmV4cG9ydCBpbnRlcmZhY2UgQ29zdFN1bW1hcnkge1xuICB0b3RhbENvc3Q6IG51bWJlcjtcbiAgdG90YWxUb2tlbnM6IG51bWJlcjtcbiAgdG90YWxSZXF1ZXN0czogbnVtYmVyO1xuICBwZXJpb2Q6IHN0cmluZztcbiAgYnJlYWtkb3duOiBDb3N0QnJlYWtkb3duSXRlbVtdO1xufVxuXG4vKiogQ29zdCBicmVha2Rvd24gaXRlbSAqL1xuZXhwb3J0IGludGVyZmFjZSBDb3N0QnJlYWtkb3duSXRlbSB7XG4gIG1vZGVsOiBzdHJpbmc7XG4gIHByb3ZpZGVyOiBzdHJpbmc7XG4gIGNvc3Q6IG51bWJlcjtcbiAgdG9rZW5zOiBudW1iZXI7XG4gIHJlcXVlc3RzOiBudW1iZXI7XG4gIGlucHV0VG9rZW5zOiBudW1iZXI7XG4gIG91dHB1dFRva2VuczogbnVtYmVyO1xufVxuXG4vKiogQWdlbnQtbGV2ZWwgY29zdCBicmVha2Rvd24gKi9cbmV4cG9ydCBpbnRlcmZhY2UgQWdlbnRDb3N0QnJlYWtkb3duIHtcbiAgYWdlbnRJZDogc3RyaW5nO1xuICB0b3RhbENvc3Q6IG51bWJlcjtcbiAgdG90YWxUb2tlbnM6IG51bWJlcjtcbiAgdG90YWxSZXF1ZXN0czogbnVtYmVyO1xuICBwZXJpb2Q6IHN0cmluZztcbiAgbW9kZWxzOiBDb3N0QnJlYWtkb3duSXRlbVtdO1xufVxuXG4vKiogQ29zdCBzYXZpbmdzIGRhdGEgKi9cbmV4cG9ydCBpbnRlcmZhY2UgQ29zdFNhdmluZ3Mge1xuICB0b3RhbFNhdmVkOiBudW1iZXI7XG4gIHNhdmluZ3NQZXJjZW50OiBudW1iZXI7XG4gIG9wdGltaXphdGlvbnM6IE9wdGltaXphdGlvbkVudHJ5W107XG59XG5cbi8qKiBBIHNpbmdsZSBvcHRpbWl6YXRpb24gZW50cnkgKi9cbmV4cG9ydCBpbnRlcmZhY2UgT3B0aW1pemF0aW9uRW50cnkge1xuICB0eXBlOiBzdHJpbmc7XG4gIGRlc2NyaXB0aW9uOiBzdHJpbmc7XG4gIHNhdmVkQW1vdW50OiBudW1iZXI7XG4gIGFwcGxpZWRBdDogc3RyaW5nO1xufVxuXG4vKiogT3B0aW1pemF0aW9uIHN0YXRpc3RpY3MgKi9cbmV4cG9ydCBpbnRlcmZhY2UgT3B0aW1pemF0aW9uU3RhdHMge1xuICBjYWNoZUhpdFJhdGU6IG51bWJlcjtcbiAgYXZnVG9rZW5SZWR1Y3Rpb246IG51bWJlcjtcbiAgbW9kZWxEb3duZ3JhZGVDb3VudDogbnVtYmVyO1xuICB0b3RhbE9wdGltaXphdGlvbnM6IG51bWJlcjtcbiAgcmVjb21tZW5kYXRpb25zOiBPcHRpbWl6YXRpb25SZWNvbW1lbmRhdGlvbltdO1xufVxuXG4vKiogT3B0aW1pemF0aW9uIHJlY29tbWVuZGF0aW9uICovXG5leHBvcnQgaW50ZXJmYWNlIE9wdGltaXphdGlvblJlY29tbWVuZGF0aW9uIHtcbiAgdHlwZTogc3RyaW5nO1xuICBkZXNjcmlwdGlvbjogc3RyaW5nO1xuICBlc3RpbWF0ZWRTYXZpbmdzOiBudW1iZXI7XG4gIHByaW9yaXR5OiAnbG93JyB8ICdtZWRpdW0nIHwgJ2hpZ2gnO1xufVxuXG4vLyA9PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09XG4vLyBMTE0gQ09TVFMgQVBJIENMSUVOVFxuLy8gPT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PVxuXG4vKipcbiAqIExMTSBDb3N0cyBBUEkgY2xpZW50IGZvciB0cmFja2luZyBhbmQgb3B0aW1pemluZyBMTE0gc3BlbmRpbmdcbiAqL1xuZXhwb3J0IGNvbnN0IGxsbUNvc3RzID0ge1xuICAvKipcbiAgICogR2V0IGNvc3Qgc3VtbWFyeSBhY3Jvc3MgYWxsIGFnZW50c1xuICAgKlxuICAgKiBAcGFyYW0gb3B0cyAtIFBlcmlvZCBmaWx0ZXIgb3B0aW9uXG4gICAqIEByZXR1cm5zIENvc3Qgc3VtbWFyeSB3aXRoIGJyZWFrZG93blxuICAgKi9cbiAgYXN5bmMgZ2V0U3VtbWFyeShvcHRzPzogQ29zdFF1ZXJ5T3B0aW9ucyk6IFByb21pc2U8Q29zdFN1bW1hcnk+IHtcbiAgICBjb25zdCBwYXJhbXMgPSBuZXcgVVJMU2VhcmNoUGFyYW1zKCk7XG4gICAgaWYgKG9wdHM/LnBlcmlvZCkgcGFyYW1zLnNldCgncGVyaW9kJywgb3B0cy5wZXJpb2QpO1xuXG4gICAgY29uc3QgcXVlcnkgPSBwYXJhbXMudG9TdHJpbmcoKTtcbiAgICByZXR1cm4gYXBpUmVxdWVzdFdpdGhEYXRhPENvc3RTdW1tYXJ5PihgL2xsbS1jb3N0cy9zdW1tYXJ5JHtxdWVyeSA/IGA/JHtxdWVyeX1gIDogJyd9YCwge1xuICAgICAgYXBpVmVyc2lvbjogJ25vbmUnLFxuICAgIH0pO1xuICB9LFxuXG4gIC8qKlxuICAgKiBHZXQgY29zdCBicmVha2Rvd24gZm9yIGEgc3BlY2lmaWMgYWdlbnRcbiAgICpcbiAgICogQHBhcmFtIGFnZW50SWQgLSBUaGUgYWdlbnQgSURcbiAgICogQHBhcmFtIG9wdHMgLSBQZXJpb2QgZmlsdGVyIG9wdGlvblxuICAgKiBAcmV0dXJucyBBZ2VudCBjb3N0IGJyZWFrZG93blxuICAgKi9cbiAgYXN5bmMgZ2V0QnJlYWtkb3duKGFnZW50SWQ6IHN0cmluZywgb3B0cz86IENvc3RRdWVyeU9wdGlvbnMpOiBQcm9taXNlPEFnZW50Q29zdEJyZWFrZG93bj4ge1xuICAgIGNvbnN0IHBhcmFtcyA9IG5ldyBVUkxTZWFyY2hQYXJhbXMoKTtcbiAgICBpZiAob3B0cz8ucGVyaW9kKSBwYXJhbXMuc2V0KCdwZXJpb2QnLCBvcHRzLnBlcmlvZCk7XG5cbiAgICBjb25zdCBxdWVyeSA9IHBhcmFtcy50b1N0cmluZygpO1xuICAgIHJldHVybiBhcGlSZXF1ZXN0V2l0aERhdGE8QWdlbnRDb3N0QnJlYWtkb3duPihcbiAgICAgIGAvbGxtLWNvc3RzL2JyZWFrZG93bi8ke2FnZW50SWR9JHtxdWVyeSA/IGA/JHtxdWVyeX1gIDogJyd9YCxcbiAgICAgIHsgYXBpVmVyc2lvbjogJ25vbmUnIH1cbiAgICApO1xuICB9LFxuXG4gIC8qKlxuICAgKiBHZXQgY29zdCBzYXZpbmdzIGZyb20gb3B0aW1pemF0aW9uc1xuICAgKlxuICAgKiBAcmV0dXJucyBTYXZpbmdzIGRhdGEgYW5kIG9wdGltaXphdGlvbiBoaXN0b3J5XG4gICAqL1xuICBhc3luYyBnZXRTYXZpbmdzKCk6IFByb21pc2U8Q29zdFNhdmluZ3M+IHtcbiAgICByZXR1cm4gYXBpUmVxdWVzdFdpdGhEYXRhPENvc3RTYXZpbmdzPignL2xsbS1jb3N0cy9zYXZpbmdzJywge1xuICAgICAgYXBpVmVyc2lvbjogJ25vbmUnLFxuICAgIH0pO1xuICB9LFxuXG4gIC8qKlxuICAgKiBHZXQgb3B0aW1pemF0aW9uIHN0YXRpc3RpY3MgYW5kIHJlY29tbWVuZGF0aW9uc1xuICAgKlxuICAgKiBAcmV0dXJucyBPcHRpbWl6YXRpb24gc3RhdHMgd2l0aCByZWNvbW1lbmRhdGlvbnNcbiAgICovXG4gIGFzeW5jIGdldE9wdGltaXphdGlvblN0YXRzKCk6IFByb21pc2U8T3B0aW1pemF0aW9uU3RhdHM+IHtcbiAgICByZXR1cm4gYXBpUmVxdWVzdFdpdGhEYXRhPE9wdGltaXphdGlvblN0YXRzPignL2xsbS1jb3N0cy9vcHRpbWl6YXRpb24tc3RhdHMnLCB7XG4gICAgICBhcGlWZXJzaW9uOiAnbm9uZScsXG4gICAgfSk7XG4gIH0sXG59O1xuXG4vLyA9PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09XG4vLyBIRUxQRVIgRlVOQ1RJT05TXG4vLyA9PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09XG5cbi8qKlxuICogRm9ybWF0IGEgY29zdCBhbW91bnQgYXMgYSBjdXJyZW5jeSBzdHJpbmdcbiAqXG4gKiBAcGFyYW0gYW1vdW50IC0gVGhlIGNvc3QgYW1vdW50IGluIGRvbGxhcnNcbiAqIEByZXR1cm5zIEZvcm1hdHRlZCBzdHJpbmcgKGUuZy4sIFwiJDEyLjM0XCIpXG4gKi9cbmV4cG9ydCBmdW5jdGlvbiBmb3JtYXRDb3N0KGFtb3VudDogbnVtYmVyKTogc3RyaW5nIHtcbiAgcmV0dXJuIGAkJHthbW91bnQudG9GaXhlZCgyKX1gO1xufVxuXG5leHBvcnQgeyBsbG1Db3N0cyBhcyBkZWZhdWx0IH07XG4iXX0=
85
+ //# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoibGxtLWNvc3RzLmpzIiwic291cmNlUm9vdCI6IiIsInNvdXJjZXMiOlsiLi4vLi4vc3JjL2FwaS9sbG0tY29zdHMudHMiXSwibmFtZXMiOltdLCJtYXBwaW5ncyI6IjtBQUFBOzs7O0dBSUc7OztBQTJKSCxnQ0FFQztBQTNKRCwyQ0FBb0Q7QUF5RXBELCtFQUErRTtBQUMvRSx1QkFBdUI7QUFDdkIsK0VBQStFO0FBRS9FOztHQUVHO0FBQ1UsUUFBQSxRQUFRLEdBQUc7SUFDdEI7Ozs7O09BS0c7SUFDSCxLQUFLLENBQUMsVUFBVSxDQUFDLElBQXVCO1FBQ3RDLE1BQU0sTUFBTSxHQUFHLElBQUksZUFBZSxFQUFFLENBQUM7UUFDckMsSUFBSSxJQUFJLEVBQUUsTUFBTTtZQUFFLE1BQU0sQ0FBQyxHQUFHLENBQUMsUUFBUSxFQUFFLElBQUksQ0FBQyxNQUFNLENBQUMsQ0FBQztRQUVwRCxNQUFNLEtBQUssR0FBRyxNQUFNLENBQUMsUUFBUSxFQUFFLENBQUM7UUFDaEMsT0FBTyxJQUFBLDJCQUFrQixFQUFjLHFCQUFxQixLQUFLLENBQUMsQ0FBQyxDQUFDLElBQUksS0FBSyxFQUFFLENBQUMsQ0FBQyxDQUFDLEVBQUUsRUFBRSxFQUFFO1lBQ3RGLFVBQVUsRUFBRSxNQUFNO1NBQ25CLENBQUMsQ0FBQztJQUNMLENBQUM7SUFFRDs7Ozs7O09BTUc7SUFDSCxLQUFLLENBQUMsWUFBWSxDQUFDLE9BQWUsRUFBRSxJQUF1QjtRQUN6RCxNQUFNLE1BQU0sR0FBRyxJQUFJLGVBQWUsRUFBRSxDQUFDO1FBQ3JDLElBQUksSUFBSSxFQUFFLE1BQU07WUFBRSxNQUFNLENBQUMsR0FBRyxDQUFDLFFBQVEsRUFBRSxJQUFJLENBQUMsTUFBTSxDQUFDLENBQUM7UUFFcEQsTUFBTSxLQUFLLEdBQUcsTUFBTSxDQUFDLFFBQVEsRUFBRSxDQUFDO1FBQ2hDLE9BQU8sSUFBQSwyQkFBa0IsRUFDdkIsd0JBQXdCLE9BQU8sR0FBRyxLQUFLLENBQUMsQ0FBQyxDQUFDLElBQUksS0FBSyxFQUFFLENBQUMsQ0FBQyxDQUFDLEVBQUUsRUFBRSxFQUM1RCxFQUFFLFVBQVUsRUFBRSxNQUFNLEVBQUUsQ0FDdkIsQ0FBQztJQUNKLENBQUM7SUFFRDs7OztPQUlHO0lBQ0gsS0FBSyxDQUFDLFVBQVU7UUFDZCxPQUFPLElBQUEsMkJBQWtCLEVBQWMsb0JBQW9CLEVBQUU7WUFDM0QsVUFBVSxFQUFFLE1BQU07U0FDbkIsQ0FBQyxDQUFDO0lBQ0wsQ0FBQztJQUVEOzs7O09BSUc7SUFDSCxLQUFLLENBQUMsb0JBQW9CO1FBQ3hCLE9BQU8sSUFBQSwyQkFBa0IsRUFBb0IsK0JBQStCLEVBQUU7WUFDNUUsVUFBVSxFQUFFLE1BQU07U0FDbkIsQ0FBQyxDQUFDO0lBQ0wsQ0FBQztJQUVELDZCQUE2QjtJQUM3QixLQUFLLENBQUMsT0FBTyxDQUFDLElBQXVCO1FBQ25DLE9BQU8sSUFBSSxDQUFDLFVBQVUsQ0FBQyxJQUFJLENBQUMsQ0FBQztJQUMvQixDQUFDO0NBQ0YsQ0FBQztBQWdCbUIsa0JBN0VSLGdCQUFRLENBNkVPO0FBZDVCLCtFQUErRTtBQUMvRSxtQkFBbUI7QUFDbkIsK0VBQStFO0FBRS9FOzs7OztHQUtHO0FBQ0gsU0FBZ0IsVUFBVSxDQUFDLE1BQWM7SUFDdkMsT0FBTyxJQUFJLE1BQU0sQ0FBQyxPQUFPLENBQUMsQ0FBQyxDQUFDLEVBQUUsQ0FBQztBQUNqQyxDQUFDIiwic291cmNlc0NvbnRlbnQiOlsiLyoqXG4gKiBUaGlua0hpdmUgU0RLIC0gTExNIENvc3RzIEFQSVxuICpcbiAqIExMTSBjb3N0IHRyYWNraW5nIGFuZCBvcHRpbWl6YXRpb24gYW5hbHl0aWNzXG4gKi9cblxuaW1wb3J0IHsgYXBpUmVxdWVzdFdpdGhEYXRhIH0gZnJvbSAnLi4vY29yZS9jbGllbnQnO1xuXG4vLyA9PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09XG4vLyBUWVBFU1xuLy8gPT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PVxuXG4vKiogT3B0aW9ucyBmb3IgY29zdCBzdW1tYXJ5IGFuZCBicmVha2Rvd24gKi9cbmV4cG9ydCBpbnRlcmZhY2UgQ29zdFF1ZXJ5T3B0aW9ucyB7XG4gIHBlcmlvZD86ICdkYXknIHwgJ3dlZWsnIHwgJ21vbnRoJyB8ICdxdWFydGVyJyB8ICd5ZWFyJztcbn1cblxuLyoqIENvc3Qgc3VtbWFyeSBhY3Jvc3MgYWxsIGFnZW50cyAqL1xuZXhwb3J0IGludGVyZmFjZSBDb3N0U3VtbWFyeSB7XG4gIHRvdGFsQ29zdDogbnVtYmVyO1xuICB0b3RhbFRva2VuczogbnVtYmVyO1xuICB0b3RhbFJlcXVlc3RzOiBudW1iZXI7XG4gIHBlcmlvZDogc3RyaW5nO1xuICBicmVha2Rvd246IENvc3RCcmVha2Rvd25JdGVtW107XG59XG5cbi8qKiBDb3N0IGJyZWFrZG93biBpdGVtICovXG5leHBvcnQgaW50ZXJmYWNlIENvc3RCcmVha2Rvd25JdGVtIHtcbiAgbW9kZWw6IHN0cmluZztcbiAgcHJvdmlkZXI6IHN0cmluZztcbiAgY29zdDogbnVtYmVyO1xuICB0b2tlbnM6IG51bWJlcjtcbiAgcmVxdWVzdHM6IG51bWJlcjtcbiAgaW5wdXRUb2tlbnM6IG51bWJlcjtcbiAgb3V0cHV0VG9rZW5zOiBudW1iZXI7XG59XG5cbi8qKiBBZ2VudC1sZXZlbCBjb3N0IGJyZWFrZG93biAqL1xuZXhwb3J0IGludGVyZmFjZSBBZ2VudENvc3RCcmVha2Rvd24ge1xuICBhZ2VudElkOiBzdHJpbmc7XG4gIHRvdGFsQ29zdDogbnVtYmVyO1xuICB0b3RhbFRva2VuczogbnVtYmVyO1xuICB0b3RhbFJlcXVlc3RzOiBudW1iZXI7XG4gIHBlcmlvZDogc3RyaW5nO1xuICBtb2RlbHM6IENvc3RCcmVha2Rvd25JdGVtW107XG59XG5cbi8qKiBDb3N0IHNhdmluZ3MgZGF0YSAqL1xuZXhwb3J0IGludGVyZmFjZSBDb3N0U2F2aW5ncyB7XG4gIHRvdGFsU2F2ZWQ6IG51bWJlcjtcbiAgc2F2aW5nc1BlcmNlbnQ6IG51bWJlcjtcbiAgb3B0aW1pemF0aW9uczogT3B0aW1pemF0aW9uRW50cnlbXTtcbn1cblxuLyoqIEEgc2luZ2xlIG9wdGltaXphdGlvbiBlbnRyeSAqL1xuZXhwb3J0IGludGVyZmFjZSBPcHRpbWl6YXRpb25FbnRyeSB7XG4gIHR5cGU6IHN0cmluZztcbiAgZGVzY3JpcHRpb246IHN0cmluZztcbiAgc2F2ZWRBbW91bnQ6IG51bWJlcjtcbiAgYXBwbGllZEF0OiBzdHJpbmc7XG59XG5cbi8qKiBPcHRpbWl6YXRpb24gc3RhdGlzdGljcyAqL1xuZXhwb3J0IGludGVyZmFjZSBPcHRpbWl6YXRpb25TdGF0cyB7XG4gIGNhY2hlSGl0UmF0ZTogbnVtYmVyO1xuICBhdmdUb2tlblJlZHVjdGlvbjogbnVtYmVyO1xuICBtb2RlbERvd25ncmFkZUNvdW50OiBudW1iZXI7XG4gIHRvdGFsT3B0aW1pemF0aW9uczogbnVtYmVyO1xuICByZWNvbW1lbmRhdGlvbnM6IE9wdGltaXphdGlvblJlY29tbWVuZGF0aW9uW107XG59XG5cbi8qKiBPcHRpbWl6YXRpb24gcmVjb21tZW5kYXRpb24gKi9cbmV4cG9ydCBpbnRlcmZhY2UgT3B0aW1pemF0aW9uUmVjb21tZW5kYXRpb24ge1xuICB0eXBlOiBzdHJpbmc7XG4gIGRlc2NyaXB0aW9uOiBzdHJpbmc7XG4gIGVzdGltYXRlZFNhdmluZ3M6IG51bWJlcjtcbiAgcHJpb3JpdHk6ICdsb3cnIHwgJ21lZGl1bScgfCAnaGlnaCc7XG59XG5cbi8vID09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT1cbi8vIExMTSBDT1NUUyBBUEkgQ0xJRU5UXG4vLyA9PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09XG5cbi8qKlxuICogTExNIENvc3RzIEFQSSBjbGllbnQgZm9yIHRyYWNraW5nIGFuZCBvcHRpbWl6aW5nIExMTSBzcGVuZGluZ1xuICovXG5leHBvcnQgY29uc3QgbGxtQ29zdHMgPSB7XG4gIC8qKlxuICAgKiBHZXQgY29zdCBzdW1tYXJ5IGFjcm9zcyBhbGwgYWdlbnRzXG4gICAqXG4gICAqIEBwYXJhbSBvcHRzIC0gUGVyaW9kIGZpbHRlciBvcHRpb25cbiAgICogQHJldHVybnMgQ29zdCBzdW1tYXJ5IHdpdGggYnJlYWtkb3duXG4gICAqL1xuICBhc3luYyBnZXRTdW1tYXJ5KG9wdHM/OiBDb3N0UXVlcnlPcHRpb25zKTogUHJvbWlzZTxDb3N0U3VtbWFyeT4ge1xuICAgIGNvbnN0IHBhcmFtcyA9IG5ldyBVUkxTZWFyY2hQYXJhbXMoKTtcbiAgICBpZiAob3B0cz8ucGVyaW9kKSBwYXJhbXMuc2V0KCdwZXJpb2QnLCBvcHRzLnBlcmlvZCk7XG5cbiAgICBjb25zdCBxdWVyeSA9IHBhcmFtcy50b1N0cmluZygpO1xuICAgIHJldHVybiBhcGlSZXF1ZXN0V2l0aERhdGE8Q29zdFN1bW1hcnk+KGAvbGxtLWNvc3RzL3N1bW1hcnkke3F1ZXJ5ID8gYD8ke3F1ZXJ5fWAgOiAnJ31gLCB7XG4gICAgICBhcGlWZXJzaW9uOiAnbm9uZScsXG4gICAgfSk7XG4gIH0sXG5cbiAgLyoqXG4gICAqIEdldCBjb3N0IGJyZWFrZG93biBmb3IgYSBzcGVjaWZpYyBhZ2VudFxuICAgKlxuICAgKiBAcGFyYW0gYWdlbnRJZCAtIFRoZSBhZ2VudCBJRFxuICAgKiBAcGFyYW0gb3B0cyAtIFBlcmlvZCBmaWx0ZXIgb3B0aW9uXG4gICAqIEByZXR1cm5zIEFnZW50IGNvc3QgYnJlYWtkb3duXG4gICAqL1xuICBhc3luYyBnZXRCcmVha2Rvd24oYWdlbnRJZDogc3RyaW5nLCBvcHRzPzogQ29zdFF1ZXJ5T3B0aW9ucyk6IFByb21pc2U8QWdlbnRDb3N0QnJlYWtkb3duPiB7XG4gICAgY29uc3QgcGFyYW1zID0gbmV3IFVSTFNlYXJjaFBhcmFtcygpO1xuICAgIGlmIChvcHRzPy5wZXJpb2QpIHBhcmFtcy5zZXQoJ3BlcmlvZCcsIG9wdHMucGVyaW9kKTtcblxuICAgIGNvbnN0IHF1ZXJ5ID0gcGFyYW1zLnRvU3RyaW5nKCk7XG4gICAgcmV0dXJuIGFwaVJlcXVlc3RXaXRoRGF0YTxBZ2VudENvc3RCcmVha2Rvd24+KFxuICAgICAgYC9sbG0tY29zdHMvYnJlYWtkb3duLyR7YWdlbnRJZH0ke3F1ZXJ5ID8gYD8ke3F1ZXJ5fWAgOiAnJ31gLFxuICAgICAgeyBhcGlWZXJzaW9uOiAnbm9uZScgfVxuICAgICk7XG4gIH0sXG5cbiAgLyoqXG4gICAqIEdldCBjb3N0IHNhdmluZ3MgZnJvbSBvcHRpbWl6YXRpb25zXG4gICAqXG4gICAqIEByZXR1cm5zIFNhdmluZ3MgZGF0YSBhbmQgb3B0aW1pemF0aW9uIGhpc3RvcnlcbiAgICovXG4gIGFzeW5jIGdldFNhdmluZ3MoKTogUHJvbWlzZTxDb3N0U2F2aW5ncz4ge1xuICAgIHJldHVybiBhcGlSZXF1ZXN0V2l0aERhdGE8Q29zdFNhdmluZ3M+KCcvbGxtLWNvc3RzL3NhdmluZ3MnLCB7XG4gICAgICBhcGlWZXJzaW9uOiAnbm9uZScsXG4gICAgfSk7XG4gIH0sXG5cbiAgLyoqXG4gICAqIEdldCBvcHRpbWl6YXRpb24gc3RhdGlzdGljcyBhbmQgcmVjb21tZW5kYXRpb25zXG4gICAqXG4gICAqIEByZXR1cm5zIE9wdGltaXphdGlvbiBzdGF0cyB3aXRoIHJlY29tbWVuZGF0aW9uc1xuICAgKi9cbiAgYXN5bmMgZ2V0T3B0aW1pemF0aW9uU3RhdHMoKTogUHJvbWlzZTxPcHRpbWl6YXRpb25TdGF0cz4ge1xuICAgIHJldHVybiBhcGlSZXF1ZXN0V2l0aERhdGE8T3B0aW1pemF0aW9uU3RhdHM+KCcvbGxtLWNvc3RzL29wdGltaXphdGlvbi1zdGF0cycsIHtcbiAgICAgIGFwaVZlcnNpb246ICdub25lJyxcbiAgICB9KTtcbiAgfSxcblxuICAvKiogQWxpYXMgZm9yIGdldFN1bW1hcnkoKSAqL1xuICBhc3luYyBzdW1tYXJ5KG9wdHM/OiBDb3N0UXVlcnlPcHRpb25zKTogUHJvbWlzZTxDb3N0U3VtbWFyeT4ge1xuICAgIHJldHVybiB0aGlzLmdldFN1bW1hcnkob3B0cyk7XG4gIH0sXG59O1xuXG4vLyA9PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09XG4vLyBIRUxQRVIgRlVOQ1RJT05TXG4vLyA9PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09XG5cbi8qKlxuICogRm9ybWF0IGEgY29zdCBhbW91bnQgYXMgYSBjdXJyZW5jeSBzdHJpbmdcbiAqXG4gKiBAcGFyYW0gYW1vdW50IC0gVGhlIGNvc3QgYW1vdW50IGluIGRvbGxhcnNcbiAqIEByZXR1cm5zIEZvcm1hdHRlZCBzdHJpbmcgKGUuZy4sIFwiJDEyLjM0XCIpXG4gKi9cbmV4cG9ydCBmdW5jdGlvbiBmb3JtYXRDb3N0KGFtb3VudDogbnVtYmVyKTogc3RyaW5nIHtcbiAgcmV0dXJuIGAkJHthbW91bnQudG9GaXhlZCgyKX1gO1xufVxuXG5leHBvcnQgeyBsbG1Db3N0cyBhcyBkZWZhdWx0IH07XG4iXX0=
@@ -245,18 +245,21 @@ export declare const nondeterminism: {
245
245
  getInfo(): Promise<PassAtKInfo>;
246
246
  };
247
247
  /**
248
- * Calculate pass@k probability from pass rate
248
+ * Calculate pass@k probability
249
+ *
250
+ * Supports two calling conventions:
251
+ * - calculatePassAtK(passRate, k) — from pass rate directly
252
+ * - calculatePassAtK(n, c, k) — from n total runs, c correct, sample k
249
253
  *
250
- * @param passRate - Single-run pass rate (0-1)
251
- * @param k - Number of runs
252
254
  * @returns Probability that at least 1 of k runs passes
253
255
  *
254
256
  * @example
255
257
  * ```typescript
256
- * const passAtK = calculatePassAtK(0.7, 3); // ~0.973
258
+ * calculatePassAtK(0.7, 3); // ~0.973 (from pass rate)
259
+ * calculatePassAtK(10, 8, 3); // ~0.983 (from 10 total, 8 correct, sample 3)
257
260
  * ```
258
261
  */
259
- export declare function calculatePassAtK(passRate: number, k: number): number;
262
+ export declare function calculatePassAtK(passRateOrN: number, kOrC: number, k?: number): number;
260
263
  /**
261
264
  * Calculate pass^k probability from pass rate
262
265
  *
@@ -286,11 +289,16 @@ export declare function requiredPassRateForPassAtK(targetPassAtK: number, k: num
286
289
  /**
287
290
  * Determine if evaluation is reliable based on analysis
288
291
  *
289
- * @param analysis - Criterion analysis result
290
- * @param reliabilityThreshold - Minimum reliability score (default 0.8)
292
+ * Accepts either a CriterionAnalysis object or a raw reliability score:
293
+ * - isReliableEvaluation(analysis) — from analysis result
294
+ * - isReliableEvaluation(0.95, 5) — score and k value (k ignored, checks score)
295
+ * - isReliableEvaluation(analysis, 0.9) — with custom threshold
296
+ *
297
+ * @param analysisOrScore - Criterion analysis result or raw reliability score
298
+ * @param thresholdOrK - Reliability threshold (default 0.8)
291
299
  * @returns Whether the evaluation is considered reliable
292
300
  */
293
- export declare function isReliableEvaluation(analysis: CriterionAnalysis, reliabilityThreshold?: number): boolean;
301
+ export declare function isReliableEvaluation(analysisOrScore: CriterionAnalysis | number, thresholdOrK?: number): boolean;
294
302
  /**
295
303
  * Get recommendation based on reliability analysis
296
304
  *