@anyway-sh/node-server-sdk 0.22.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/README +35 -0
  2. package/dist/index.d.ts +1957 -0
  3. package/dist/index.js +4458 -0
  4. package/dist/index.js.map +1 -0
  5. package/dist/index.mjs +4382 -0
  6. package/dist/src/index.d.ts +10 -0
  7. package/dist/src/lib/associations/associations.d.ts +32 -0
  8. package/dist/src/lib/client/annotation/base-annotation.d.ts +20 -0
  9. package/dist/src/lib/client/annotation/user-feedback.d.ts +32 -0
  10. package/dist/src/lib/client/dataset/attachment-uploader.d.ts +50 -0
  11. package/dist/src/lib/client/dataset/attachment.d.ts +84 -0
  12. package/dist/src/lib/client/dataset/base-dataset.d.ts +10 -0
  13. package/dist/src/lib/client/dataset/column.d.ts +23 -0
  14. package/dist/src/lib/client/dataset/dataset.d.ts +43 -0
  15. package/dist/src/lib/client/dataset/datasets.d.ts +14 -0
  16. package/dist/src/lib/client/dataset/index.d.ts +8 -0
  17. package/dist/src/lib/client/dataset/row.d.ts +73 -0
  18. package/dist/src/lib/client/evaluator/evaluator.d.ts +28 -0
  19. package/dist/src/lib/client/evaluator/index.d.ts +2 -0
  20. package/dist/src/lib/client/experiment/experiment.d.ts +76 -0
  21. package/dist/src/lib/client/experiment/index.d.ts +2 -0
  22. package/dist/src/lib/client/traceloop-client.d.ts +40 -0
  23. package/dist/src/lib/configuration/index.d.ts +35 -0
  24. package/dist/src/lib/configuration/validation.d.ts +3 -0
  25. package/dist/src/lib/errors/index.d.ts +36 -0
  26. package/dist/src/lib/generated/evaluators/index.d.ts +5 -0
  27. package/dist/src/lib/generated/evaluators/mbt-evaluators.d.ts +386 -0
  28. package/dist/src/lib/generated/evaluators/registry.d.ts +12 -0
  29. package/dist/src/lib/generated/evaluators/types.d.ts +401 -0
  30. package/dist/src/lib/images/image-uploader.d.ts +15 -0
  31. package/dist/src/lib/images/index.d.ts +2 -0
  32. package/dist/src/lib/interfaces/annotations.interface.d.ts +35 -0
  33. package/dist/src/lib/interfaces/dataset.interface.d.ts +105 -0
  34. package/dist/src/lib/interfaces/evaluator.interface.d.ts +83 -0
  35. package/dist/src/lib/interfaces/experiment.interface.d.ts +117 -0
  36. package/dist/src/lib/interfaces/index.d.ts +8 -0
  37. package/dist/src/lib/interfaces/initialize-options.interface.d.ts +133 -0
  38. package/dist/src/lib/interfaces/prompts.interface.d.ts +53 -0
  39. package/dist/src/lib/interfaces/traceloop-client.interface.d.ts +7 -0
  40. package/dist/src/lib/node-server-sdk.d.ts +19 -0
  41. package/dist/src/lib/prompts/fetch.d.ts +3 -0
  42. package/dist/src/lib/prompts/index.d.ts +3 -0
  43. package/dist/src/lib/prompts/registry.d.ts +9 -0
  44. package/dist/src/lib/prompts/template.d.ts +3 -0
  45. package/dist/src/lib/tracing/ai-sdk-transformations.d.ts +5 -0
  46. package/dist/src/lib/tracing/association.d.ts +4 -0
  47. package/dist/src/lib/tracing/baggage-utils.d.ts +2 -0
  48. package/dist/src/lib/tracing/custom-metric.d.ts +14 -0
  49. package/dist/src/lib/tracing/decorators.d.ts +22 -0
  50. package/dist/src/lib/tracing/index.d.ts +14 -0
  51. package/dist/src/lib/tracing/manual.d.ts +60 -0
  52. package/dist/src/lib/tracing/sampler.d.ts +7 -0
  53. package/dist/src/lib/tracing/span-processor.d.ts +48 -0
  54. package/dist/src/lib/tracing/tracing.d.ts +10 -0
  55. package/dist/src/lib/utils/response-transformer.d.ts +19 -0
  56. package/package.json +127 -0
@@ -0,0 +1,386 @@
1
+ import type { EvaluatorWithConfig } from '../../interfaces/experiment.interface';
2
+ import type { components } from './types';
3
+ import { type EvaluatorSlug, type EvaluatorSchema } from './registry';
4
+ export type AgentFlowQualityConfig = components['schemas']['request.AgentFlowQualityRequest']['config'];
5
+ export type AgentGoalCompletenessConfig = components['schemas']['request.AgentGoalCompletenessRequest']['config'];
6
+ export type AgentToolTrajectoryConfig = components['schemas']['request.AgentToolTrajectoryRequest']['config'];
7
+ export type ContextRelevanceConfig = components['schemas']['request.ContextRelevanceRequest']['config'];
8
+ export type JsonValidatorConfig = components['schemas']['request.JSONValidatorRequest']['config'];
9
+ export type PiiDetectorConfig = components['schemas']['request.PIIDetectorRequest']['config'];
10
+ export type PlaceholderRegexConfig = components['schemas']['request.PlaceholderRegexRequest']['config'];
11
+ export type PromptInjectionConfig = components['schemas']['request.PromptInjectionRequest']['config'];
12
+ export type RegexValidatorConfig = components['schemas']['request.RegexValidatorRequest']['config'];
13
+ export type SexismDetectorConfig = components['schemas']['request.SexismDetectorRequest']['config'];
14
+ export type ToxicityDetectorConfig = components['schemas']['request.ToxicityDetectorRequest']['config'];
15
+ /**
16
+ * Create an evaluator configuration object.
17
+ */
18
+ export declare function createEvaluator(slug: EvaluatorSlug, options?: {
19
+ version?: string;
20
+ config?: Record<string, unknown>;
21
+ }): EvaluatorWithConfig;
22
+ /**
23
+ * Validate that required input fields are present in task output.
24
+ */
25
+ export declare function validateEvaluatorInput(slug: EvaluatorSlug, taskOutput: Record<string, unknown>): {
26
+ valid: boolean;
27
+ missingFields: string[];
28
+ };
29
+ /**
30
+ * Get all available evaluator slugs.
31
+ */
32
+ export declare function getAvailableEvaluatorSlugs(): EvaluatorSlug[];
33
+ /**
34
+ * Get schema information for an evaluator.
35
+ */
36
+ export declare function getEvaluatorSchemaInfo(slug: EvaluatorSlug): EvaluatorSchema | undefined;
37
+ /**
38
+ * Factory class for creating type-safe MBT evaluator configurations.
39
+ *
40
+ * @example
41
+ * ```typescript
42
+ * import { EvaluatorMadeByTraceloop } from '@anyway-sh/node-server-sdk';
43
+ *
44
+ * const evaluators = [
45
+ * EvaluatorMadeByTraceloop.piiDetector({ probability_threshold: 0.8 }),
46
+ * EvaluatorMadeByTraceloop.faithfulness(),
47
+ * ];
48
+ * ```
49
+ */
50
+ export declare class EvaluatorMadeByTraceloop {
51
+ static create(slug: EvaluatorSlug, options?: {
52
+ version?: string;
53
+ config?: Record<string, unknown>;
54
+ }): EvaluatorWithConfig;
55
+ static getAvailableSlugs(): EvaluatorSlug[];
56
+ static isValidSlug(slug: string): slug is EvaluatorSlug;
57
+ /**
58
+ * Evaluate agent efficiency - detect redundant calls, unnecessary follow-ups
59
+
60
+ **Request Body:**
61
+ - `input.trajectory_prompts` (string, required): JSON array of prompts in the agent trajectory
62
+ - `input.trajectory_completions` (string, required): JSON array of completions in the agent trajectory
63
+ * Required task output fields: trajectory_completions, trajectory_prompts
64
+ */
65
+ static agentEfficiency(): EvaluatorWithConfig;
66
+ /**
67
+ * Validate agent trajectory against user-defined conditions
68
+
69
+ **Request Body:**
70
+ - `input.trajectory_prompts` (string, required): JSON array of prompts in the agent trajectory
71
+ - `input.trajectory_completions` (string, required): JSON array of completions in the agent trajectory
72
+ - `config.conditions` (array of strings, required): Array of evaluation conditions/rules to validate against
73
+ - `config.threshold` (number, required): Score threshold for pass/fail determination (0.0-1.0)
74
+ * Required task output fields: trajectory_completions, trajectory_prompts
75
+ */
76
+ static agentFlowQuality(config?: AgentFlowQualityConfig): EvaluatorWithConfig;
77
+ /**
78
+ * Evaluate agent goal accuracy
79
+
80
+ **Request Body:**
81
+ - `input.question` (string, required): The original question or goal
82
+ - `input.completion` (string, required): The agent's completion/response
83
+ - `input.reference` (string, required): The expected reference answer
84
+ * Required task output fields: completion, question, reference
85
+ */
86
+ static agentGoalAccuracy(): EvaluatorWithConfig;
87
+ /**
88
+ * Measure if agent accomplished all user goals
89
+
90
+ **Request Body:**
91
+ - `input.trajectory_prompts` (string, required): JSON array of prompts in the agent trajectory
92
+ - `input.trajectory_completions` (string, required): JSON array of completions in the agent trajectory
93
+ - `config.threshold` (number, required): Score threshold for pass/fail determination (0.0-1.0)
94
+ * Required task output fields: trajectory_completions, trajectory_prompts
95
+ */
96
+ static agentGoalCompleteness(config?: AgentGoalCompletenessConfig): EvaluatorWithConfig;
97
+ /**
98
+ * Detect errors or failures during tool execution
99
+
100
+ **Request Body:**
101
+ - `input.tool_input` (string, required): JSON string of the tool input
102
+ - `input.tool_output` (string, required): JSON string of the tool output
103
+ * Required task output fields: tool_input, tool_output
104
+ */
105
+ static agentToolErrorDetector(): EvaluatorWithConfig;
106
+ /**
107
+ * Compare actual tool calls against expected reference tool calls
108
+
109
+ **Request Body:**
110
+ - `input.executed_tool_calls` (string, required): JSON array of actual tool calls made by the agent
111
+ - `input.expected_tool_calls` (string, required): JSON array of expected/reference tool calls
112
+ - `config.threshold` (float, optional): Score threshold for pass/fail determination (default: 0.5)
113
+ - `config.mismatch_sensitive` (bool, optional): Whether tool calls must match exactly (default: false)
114
+ - `config.order_sensitive` (bool, optional): Whether order of tool calls matters (default: false)
115
+ - `config.input_params_sensitive` (bool, optional): Whether to compare input parameters (default: true)
116
+ * Required task output fields: executed_tool_calls, expected_tool_calls
117
+ */
118
+ static agentToolTrajectory(config?: AgentToolTrajectoryConfig): EvaluatorWithConfig;
119
+ /**
120
+ * Evaluate whether the answer is complete and contains all the necessary information
121
+
122
+ **Request Body:**
123
+ - `input.question` (string, required): The original question
124
+ - `input.completion` (string, required): The completion to evaluate for completeness
125
+ - `input.context` (string, required): The context that provides the complete information
126
+ * Required task output fields: completion, context, question
127
+ */
128
+ static answerCompleteness(): EvaluatorWithConfig;
129
+ /**
130
+ * Evaluate factual accuracy by comparing answers against ground truth
131
+
132
+ **Request Body:**
133
+ - `input.question` (string, required): The original question
134
+ - `input.completion` (string, required): The completion to evaluate
135
+ - `input.ground_truth` (string, required): The expected correct answer
136
+ * Required task output fields: completion, ground_truth, question
137
+ */
138
+ static answerCorrectness(): EvaluatorWithConfig;
139
+ /**
140
+ * Check if an answer is relevant to a question
141
+
142
+ **Request Body:**
143
+ - `input.answer` (string, required): The answer to evaluate for relevancy
144
+ - `input.question` (string, required): The question that the answer should be relevant to
145
+ * Required task output fields: answer, question
146
+ */
147
+ static answerRelevancy(): EvaluatorWithConfig;
148
+ /**
149
+ * Count the number of characters in text
150
+
151
+ **Request Body:**
152
+ - `input.text` (string, required): The text to count characters in
153
+ * Required task output fields: text
154
+ */
155
+ static charCount(): EvaluatorWithConfig;
156
+ /**
157
+ * Calculate the ratio of characters between two texts
158
+
159
+ **Request Body:**
160
+ - `input.numerator_text` (string, required): The numerator text (will be divided by denominator)
161
+ - `input.denominator_text` (string, required): The denominator text (divides the numerator)
162
+ * Required task output fields: denominator_text, numerator_text
163
+ */
164
+ static charCountRatio(): EvaluatorWithConfig;
165
+ /**
166
+ * Evaluate whether retrieved context contains sufficient information to answer the query
167
+
168
+ **Request Body:**
169
+ - `input.query` (string, required): The query/question to evaluate context relevance for
170
+ - `input.context` (string, required): The context to evaluate for relevance to the query
171
+ - `config.model` (string, optional): Model to use for evaluation (default: gpt-4o)
172
+ * Required task output fields: context, query
173
+ */
174
+ static contextRelevance(config?: ContextRelevanceConfig): EvaluatorWithConfig;
175
+ /**
176
+ * Evaluate conversation quality based on tone, clarity, flow, responsiveness, and transparency
177
+
178
+ **Request Body:**
179
+ - `input.prompts` (string, required): JSON array of prompts in the conversation
180
+ - `input.completions` (string, required): JSON array of completions in the conversation
181
+ * Required task output fields: completions, prompts
182
+ */
183
+ static conversationQuality(): EvaluatorWithConfig;
184
+ /**
185
+ * Check if a completion is faithful to the provided context
186
+
187
+ **Request Body:**
188
+ - `input.completion` (string, required): The LLM completion to check for faithfulness
189
+ - `input.context` (string, required): The context that the completion should be faithful to
190
+ - `input.question` (string, required): The original question asked
191
+ * Required task output fields: completion, context, question
192
+ */
193
+ static faithfulness(): EvaluatorWithConfig;
194
+ /**
195
+ * Compare two HTML documents for structural and content similarity
196
+
197
+ **Request Body:**
198
+ - `input.html1` (string, required): The first HTML document to compare
199
+ - `input.html2` (string, required): The second HTML document to compare
200
+ * Required task output fields: html1, html2
201
+ */
202
+ static htmlComparison(): EvaluatorWithConfig;
203
+ /**
204
+ * Evaluate how well responses follow given instructions
205
+
206
+ **Request Body:**
207
+ - `input.instructions` (string, required): The instructions that should be followed
208
+ - `input.response` (string, required): The response to evaluate for instruction adherence
209
+ * Required task output fields: instructions, response
210
+ */
211
+ static instructionAdherence(): EvaluatorWithConfig;
212
+ /**
213
+ * Detect changes in user intent between prompts and completions
214
+
215
+ **Request Body:**
216
+ - `input.prompts` (string, required): JSON array of prompts in the conversation
217
+ - `input.completions` (string, required): JSON array of completions in the conversation
218
+ * Required task output fields: completions, prompts
219
+ */
220
+ static intentChange(): EvaluatorWithConfig;
221
+ /**
222
+ * Validate JSON syntax
223
+
224
+ **Request Body:**
225
+ - `input.text` (string, required): The text to validate as JSON
226
+ - `config.enable_schema_validation` (bool, optional): Enable JSON schema validation
227
+ - `config.schema_string` (string, optional): JSON schema to validate against
228
+ * Required task output fields: text
229
+ */
230
+ static jsonValidator(config?: JsonValidatorConfig): EvaluatorWithConfig;
231
+ /**
232
+ * Measure text perplexity from logprobs
233
+
234
+ **Request Body:**
235
+ - `input.logprobs` (string, required): JSON array of log probabilities from the model
236
+ * Required task output fields: logprobs
237
+ */
238
+ static perplexity(): EvaluatorWithConfig;
239
+ /**
240
+ * Detect personally identifiable information in text
241
+
242
+ **Request Body:**
243
+ - `input.text` (string, required): The text to scan for personally identifiable information
244
+ - `config.probability_threshold` (float, optional): Detection threshold (default: 0.8)
245
+ * Required task output fields: text
246
+ */
247
+ static piiDetector(config?: PiiDetectorConfig): EvaluatorWithConfig;
248
+ /**
249
+ * Validate text against a placeholder regex pattern
250
+
251
+ **Request Body:**
252
+ - `input.placeholder_value` (string, required): The regex pattern to match against
253
+ - `input.text` (string, required): The text to validate against the regex pattern
254
+ - `config.should_match` (bool, optional): Whether the text should match the regex
255
+ - `config.case_sensitive` (bool, optional): Case-sensitive matching
256
+ - `config.dot_include_nl` (bool, optional): Dot matches newlines
257
+ - `config.multi_line` (bool, optional): Multi-line mode
258
+ * Required task output fields: placeholder_value, text
259
+ */
260
+ static placeholderRegex(config?: PlaceholderRegexConfig): EvaluatorWithConfig;
261
+ /**
262
+ * Detect profanity in text
263
+
264
+ **Request Body:**
265
+ - `input.text` (string, required): The text to scan for profanity
266
+ * Required task output fields: text
267
+ */
268
+ static profanityDetector(): EvaluatorWithConfig;
269
+ /**
270
+ * Detect prompt injection attempts
271
+
272
+ **Request Body:**
273
+ - `input.prompt` (string, required): The prompt to check for injection attempts
274
+ - `config.threshold` (float, optional): Detection threshold (default: 0.5)
275
+ * Required task output fields: prompt
276
+ */
277
+ static promptInjection(config?: PromptInjectionConfig): EvaluatorWithConfig;
278
+ /**
279
+ * Measure prompt perplexity to detect potential injection attempts
280
+
281
+ **Request Body:**
282
+ - `input.prompt` (string, required): The prompt to calculate perplexity for
283
+ * Required task output fields: prompt
284
+ */
285
+ static promptPerplexity(): EvaluatorWithConfig;
286
+ /**
287
+ * Validate text against a regex pattern
288
+
289
+ **Request Body:**
290
+ - `input.text` (string, required): The text to validate against a regex pattern
291
+ - `config.regex` (string, optional): The regex pattern to match against
292
+ - `config.should_match` (bool, optional): Whether the text should match the regex
293
+ - `config.case_sensitive` (bool, optional): Case-sensitive matching
294
+ - `config.dot_include_nl` (bool, optional): Dot matches newlines
295
+ - `config.multi_line` (bool, optional): Multi-line mode
296
+ * Required task output fields: text
297
+ */
298
+ static regexValidator(config?: RegexValidatorConfig): EvaluatorWithConfig;
299
+ /**
300
+ * Detect secrets and credentials in text
301
+
302
+ **Request Body:**
303
+ - `input.text` (string, required): The text to scan for secrets (API keys, passwords, etc.)
304
+ * Required task output fields: text
305
+ */
306
+ static secretsDetector(): EvaluatorWithConfig;
307
+ /**
308
+ * Calculate semantic similarity between completion and reference
309
+
310
+ **Request Body:**
311
+ - `input.completion` (string, required): The completion text to compare
312
+ - `input.reference` (string, required): The reference text to compare against
313
+ * Required task output fields: completion, reference
314
+ */
315
+ static semanticSimilarity(): EvaluatorWithConfig;
316
+ /**
317
+ * Detect sexist language and bias
318
+
319
+ **Request Body:**
320
+ - `input.text` (string, required): The text to scan for sexist content
321
+ - `config.threshold` (float, optional): Detection threshold (default: 0.5)
322
+ * Required task output fields: text
323
+ */
324
+ static sexismDetector(config?: SexismDetectorConfig): EvaluatorWithConfig;
325
+ /**
326
+ * Validate SQL query syntax
327
+
328
+ **Request Body:**
329
+ - `input.text` (string, required): The text to validate as SQL
330
+ * Required task output fields: text
331
+ */
332
+ static sqlValidator(): EvaluatorWithConfig;
333
+ /**
334
+ * Detect the tone of the text
335
+
336
+ **Request Body:**
337
+ - `input.text` (string, required): The text to detect the tone of
338
+ * Required task output fields: text
339
+ */
340
+ static toneDetection(): EvaluatorWithConfig;
341
+ /**
342
+ * Evaluate topic adherence
343
+
344
+ **Request Body:**
345
+ - `input.question` (string, required): The original question
346
+ - `input.completion` (string, required): The completion to evaluate
347
+ - `input.reference_topics` (string, required): Comma-separated list of expected topics
348
+ * Required task output fields: completion, question, reference_topics
349
+ */
350
+ static topicAdherence(): EvaluatorWithConfig;
351
+ /**
352
+ * Detect toxic or harmful language
353
+
354
+ **Request Body:**
355
+ - `input.text` (string, required): The text to scan for toxic content
356
+ - `config.threshold` (float, optional): Detection threshold (default: 0.5)
357
+ * Required task output fields: text
358
+ */
359
+ static toxicityDetector(config?: ToxicityDetectorConfig): EvaluatorWithConfig;
360
+ /**
361
+ * Detect uncertainty in the text
362
+
363
+ **Request Body:**
364
+ - `input.prompt` (string, required): The text to detect uncertainty in
365
+ * Required task output fields: prompt
366
+ */
367
+ static uncertaintyDetector(): EvaluatorWithConfig;
368
+ /**
369
+ * Count the number of words in text
370
+
371
+ **Request Body:**
372
+ - `input.text` (string, required): The text to count words in
373
+ * Required task output fields: text
374
+ */
375
+ static wordCount(): EvaluatorWithConfig;
376
+ /**
377
+ * Calculate the ratio of words between two texts
378
+
379
+ **Request Body:**
380
+ - `input.numerator_text` (string, required): The numerator text (will be divided by denominator)
381
+ - `input.denominator_text` (string, required): The denominator text (divides the numerator)
382
+ * Required task output fields: denominator_text, numerator_text
383
+ */
384
+ static wordCountRatio(): EvaluatorWithConfig;
385
+ }
386
+ //# sourceMappingURL=mbt-evaluators.d.ts.map
@@ -0,0 +1,12 @@
1
+ export interface EvaluatorSchema {
2
+ slug: string;
3
+ requiredInputFields: string[];
4
+ optionalConfigFields: string[];
5
+ description?: string;
6
+ }
7
+ export type EvaluatorSlug = 'agent-efficiency' | 'agent-flow-quality' | 'agent-goal-accuracy' | 'agent-goal-completeness' | 'agent-tool-error-detector' | 'agent-tool-trajectory' | 'answer-completeness' | 'answer-correctness' | 'answer-relevancy' | 'char-count' | 'char-count-ratio' | 'context-relevance' | 'conversation-quality' | 'faithfulness' | 'html-comparison' | 'instruction-adherence' | 'intent-change' | 'json-validator' | 'perplexity' | 'pii-detector' | 'placeholder-regex' | 'profanity-detector' | 'prompt-injection' | 'prompt-perplexity' | 'regex-validator' | 'secrets-detector' | 'semantic-similarity' | 'sexism-detector' | 'sql-validator' | 'tone-detection' | 'topic-adherence' | 'toxicity-detector' | 'uncertainty-detector' | 'word-count' | 'word-count-ratio';
8
+ export declare const EVALUATOR_SLUGS: EvaluatorSlug[];
9
+ export declare const EVALUATOR_SCHEMAS: Record<EvaluatorSlug, EvaluatorSchema>;
10
+ export declare function getEvaluatorSchema<S extends EvaluatorSlug>(slug: S): EvaluatorSchema;
11
+ export declare function isValidEvaluatorSlug(slug: string): slug is EvaluatorSlug;
12
+ //# sourceMappingURL=registry.d.ts.map