axiom 0.27.0 → 0.28.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin.cjs +43 -496
- package/dist/bin.cjs.map +1 -1
- package/dist/bin.js +5 -4
- package/dist/bin.js.map +1 -1
- package/dist/{chunk-CZJEEQDG.js → chunk-5TVCLHTM.js} +7 -9
- package/dist/chunk-5TVCLHTM.js.map +1 -0
- package/dist/{chunk-YCOR62XR.js → chunk-CSMSR3XW.js} +26 -241
- package/dist/chunk-CSMSR3XW.js.map +1 -0
- package/dist/chunk-MM5FFQJT.js +19 -0
- package/dist/chunk-MM5FFQJT.js.map +1 -0
- package/dist/{chunk-3YNZM3A7.js → chunk-WMSQHW3M.js} +13 -2
- package/dist/chunk-WMSQHW3M.js.map +1 -0
- package/dist/evals/custom-runner.cjs +17 -5
- package/dist/evals/custom-runner.cjs.map +1 -1
- package/dist/evals/custom-runner.js +2 -1
- package/dist/evals/custom-runner.js.map +1 -1
- package/dist/evals.cjs +84 -67
- package/dist/evals.cjs.map +1 -1
- package/dist/evals.d.cts +13 -80
- package/dist/evals.d.ts +13 -80
- package/dist/evals.js +237 -6
- package/dist/evals.js.map +1 -1
- package/dist/index.cjs +26 -1
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +5 -1
- package/dist/index.d.ts +5 -1
- package/dist/index.js +2 -1
- package/dist/index.js.map +1 -1
- package/dist/name-validation.d-CDPeW_pV.d.cts +81 -0
- package/dist/name-validation.d-CDPeW_pV.d.ts +81 -0
- package/package.json +1 -1
- package/dist/chunk-3YNZM3A7.js.map +0 -1
- package/dist/chunk-CZJEEQDG.js.map +0 -1
- package/dist/chunk-YCOR62XR.js.map +0 -1
package/dist/bin.cjs
CHANGED
|
@@ -249,290 +249,7 @@ var import_api5 = require("@opentelemetry/api");
|
|
|
249
249
|
|
|
250
250
|
// src/otel/semconv/attributes.ts
|
|
251
251
|
var import_semantic_conventions = require("@opentelemetry/semantic-conventions");
|
|
252
|
-
|
|
253
|
-
// src/otel/semconv/eval_proposal.ts
|
|
254
|
-
var ATTR_EVAL_ID = "eval.id";
|
|
255
|
-
var ATTR_EVAL_NAME = "eval.name";
|
|
256
|
-
var ATTR_EVAL_VERSION = "eval.version";
|
|
257
|
-
var ATTR_EVAL_TYPE = "eval.type";
|
|
258
|
-
var ATTR_EVAL_TAGS = "eval.tags";
|
|
259
|
-
var ATTR_EVAL_BASELINE_ID = "eval.baseline.id";
|
|
260
|
-
var ATTR_EVAL_BASELINE_NAME = "eval.baseline.name";
|
|
261
|
-
var ATTR_EVAL_BASELINE_VERSION = "eval.baseline.version";
|
|
262
|
-
var ATTR_EVAL_METADATA = "eval.metadata";
|
|
263
|
-
var ATTR_EVAL_CAPABILITY_NAME = "eval.capability.name";
|
|
264
|
-
var ATTR_EVAL_STEP_NAME = "eval.step.name";
|
|
265
|
-
var ATTR_EVAL_COLLECTION_ID = "eval.collection.id";
|
|
266
|
-
var ATTR_EVAL_COLLECTION_SIZE = "eval.collection.size";
|
|
267
|
-
var ATTR_EVAL_COLLECTION_NAME = "eval.collection.name";
|
|
268
|
-
var ATTR_EVAL_CONFIG_FLAGS = "eval.config.flags";
|
|
269
|
-
var ATTR_EVAL_CASE_INDEX = "eval.case.index";
|
|
270
|
-
var ATTR_EVAL_CASE_INPUT = "eval.case.input";
|
|
271
|
-
var ATTR_EVAL_CASE_OUTPUT = "eval.case.output";
|
|
272
|
-
var ATTR_EVAL_CASE_EXPECTED = "eval.case.expected";
|
|
273
|
-
var ATTR_EVAL_CASE_SCORES = "eval.case.scores";
|
|
274
|
-
var ATTR_EVAL_CASE_METADATA = "eval.case.metadata";
|
|
275
|
-
var ATTR_EVAL_TASK_OUTPUT = "eval.task.output";
|
|
276
|
-
var ATTR_EVAL_TASK_NAME = "eval.task.name";
|
|
277
|
-
var ATTR_EVAL_TASK_TYPE = "eval.task.type";
|
|
278
|
-
var ATTR_EVAL_RUN_ID = "eval.run.id";
|
|
279
|
-
var ATTR_EVAL_SCORE_NAME = "eval.score.name";
|
|
280
|
-
var ATTR_EVAL_SCORE_VALUE = "eval.score.value";
|
|
281
|
-
var ATTR_EVAL_SCORE_THRESHOLD = "eval.score.threshold";
|
|
282
|
-
var ATTR_EVAL_SCORE_PASSED = "eval.score.passed";
|
|
283
|
-
var ATTR_EVAL_SCORE_METADATA = "eval.score.metadata";
|
|
284
|
-
var ATTR_EVAL_USER_NAME = "eval.user.name";
|
|
285
|
-
var ATTR_EVAL_USER_EMAIL = "eval.user.email";
|
|
286
|
-
|
|
287
|
-
// src/otel/semconv/attributes.ts
|
|
288
252
|
var import_incubating = require("@opentelemetry/semantic-conventions/incubating");
|
|
289
|
-
var ATTR_AXIOM_GEN_AI_SCHEMA_URL = "axiom.gen_ai.schema_url";
|
|
290
|
-
var ATTR_AXIOM_GEN_AI_SDK_NAME = "axiom.gen_ai.sdk.name";
|
|
291
|
-
var ATTR_AXIOM_GEN_AI_SDK_VERSION = "axiom.gen_ai.sdk.version";
|
|
292
|
-
var ATTR_GEN_AI_CAPABILITY_NAME = "gen_ai.capability.name";
|
|
293
|
-
var ATTR_GEN_AI_STEP_NAME = "gen_ai.step.name";
|
|
294
|
-
var ATTR_GEN_AI_TOOL_ARGUMENTS = "gen_ai.tool.arguments";
|
|
295
|
-
var ATTR_GEN_AI_TOOL_MESSAGE = "gen_ai.tool.message";
|
|
296
|
-
var GEN_AI_PROVIDER_NAME_VALUE_ASSEMBLYAI = "assemblyai";
|
|
297
|
-
var GEN_AI_PROVIDER_NAME_VALUE_CEREBRAS = "cerebras";
|
|
298
|
-
var GEN_AI_PROVIDER_NAME_VALUE_DEEPGRAM = "deepgram";
|
|
299
|
-
var GEN_AI_PROVIDER_NAME_VALUE_DEEPINFRA = "deepinfra";
|
|
300
|
-
var GEN_AI_PROVIDER_NAME_VALUE_ELEVENLABS = "elevenlabs";
|
|
301
|
-
var GEN_AI_PROVIDER_NAME_VALUE_FAL = "fal";
|
|
302
|
-
var GEN_AI_PROVIDER_NAME_VALUE_FIREWORKS = "fireworks";
|
|
303
|
-
var GEN_AI_PROVIDER_NAME_VALUE_GLADIA = "gladia";
|
|
304
|
-
var GEN_AI_PROVIDER_NAME_VALUE_HUME = "hume";
|
|
305
|
-
var GEN_AI_PROVIDER_NAME_VALUE_LMNT = "lmnt";
|
|
306
|
-
var GEN_AI_PROVIDER_NAME_VALUE_LUMA = "luma";
|
|
307
|
-
var GEN_AI_PROVIDER_NAME_VALUE_REPLICATE = "replicate";
|
|
308
|
-
var GEN_AI_PROVIDER_NAME_VALUE_REVAI = "revai";
|
|
309
|
-
var GEN_AI_PROVIDER_NAME_VALUE_TOGETHERAI = "togetherai";
|
|
310
|
-
var GEN_AI_PROVIDER_NAME_VALUE_VERCEL = "vercel";
|
|
311
|
-
var Attr = {
|
|
312
|
-
__EXPERIMENTAL_Flag: (flagName) => `flag.${flagName}`,
|
|
313
|
-
__EXPERIMENTAL_Fact: (factName) => `fact.${factName}`,
|
|
314
|
-
Axiom: {
|
|
315
|
-
GenAI: {
|
|
316
|
-
SchemaURL: ATTR_AXIOM_GEN_AI_SCHEMA_URL,
|
|
317
|
-
SDK: {
|
|
318
|
-
Name: ATTR_AXIOM_GEN_AI_SDK_NAME,
|
|
319
|
-
Version: ATTR_AXIOM_GEN_AI_SDK_VERSION
|
|
320
|
-
}
|
|
321
|
-
}
|
|
322
|
-
},
|
|
323
|
-
GenAI: {
|
|
324
|
-
PromptMetadata: {
|
|
325
|
-
ID: "axiom.gen_ai.prompt.id",
|
|
326
|
-
Name: "axiom.gen_ai.prompt.name",
|
|
327
|
-
Slug: "axiom.gen_ai.prompt.slug",
|
|
328
|
-
Version: "axiom.gen_ai.prompt.version"
|
|
329
|
-
},
|
|
330
|
-
/**
|
|
331
|
-
* These two are used to identify the span
|
|
332
|
-
*/
|
|
333
|
-
Capability: {
|
|
334
|
-
Name: ATTR_GEN_AI_CAPABILITY_NAME
|
|
335
|
-
},
|
|
336
|
-
Step: {
|
|
337
|
-
Name: ATTR_GEN_AI_STEP_NAME
|
|
338
|
-
},
|
|
339
|
-
Provider: {
|
|
340
|
-
Name: import_incubating.ATTR_GEN_AI_PROVIDER_NAME,
|
|
341
|
-
Name_Values: {
|
|
342
|
-
Anthropic: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_ANTHROPIC,
|
|
343
|
-
AssemblyAI: GEN_AI_PROVIDER_NAME_VALUE_ASSEMBLYAI,
|
|
344
|
-
AWSBedrock: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_AWS_BEDROCK,
|
|
345
|
-
AzureAIInference: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_AZURE_AI_INFERENCE,
|
|
346
|
-
AzureAIOpenAI: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_AZURE_AI_OPENAI,
|
|
347
|
-
Cerebras: GEN_AI_PROVIDER_NAME_VALUE_CEREBRAS,
|
|
348
|
-
Cohere: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_COHERE,
|
|
349
|
-
Deepgram: GEN_AI_PROVIDER_NAME_VALUE_DEEPGRAM,
|
|
350
|
-
DeepInfra: GEN_AI_PROVIDER_NAME_VALUE_DEEPINFRA,
|
|
351
|
-
Deepseek: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_DEEPSEEK,
|
|
352
|
-
ElevenLabs: GEN_AI_PROVIDER_NAME_VALUE_ELEVENLABS,
|
|
353
|
-
Fal: GEN_AI_PROVIDER_NAME_VALUE_FAL,
|
|
354
|
-
Fireworks: GEN_AI_PROVIDER_NAME_VALUE_FIREWORKS,
|
|
355
|
-
GCPGemini: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_GCP_GEMINI,
|
|
356
|
-
GCPGenAI: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_GCP_GEN_AI,
|
|
357
|
-
GCPVertexAI: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_GCP_VERTEX_AI,
|
|
358
|
-
Gladia: GEN_AI_PROVIDER_NAME_VALUE_GLADIA,
|
|
359
|
-
Groq: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_GROQ,
|
|
360
|
-
Hume: GEN_AI_PROVIDER_NAME_VALUE_HUME,
|
|
361
|
-
IBMWatsonxAI: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_IBM_WATSONX_AI,
|
|
362
|
-
Lmnt: GEN_AI_PROVIDER_NAME_VALUE_LMNT,
|
|
363
|
-
Luma: GEN_AI_PROVIDER_NAME_VALUE_LUMA,
|
|
364
|
-
MistralAI: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_MISTRAL_AI,
|
|
365
|
-
OpenAI: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_OPENAI,
|
|
366
|
-
Perplexity: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_PERPLEXITY,
|
|
367
|
-
Replicate: GEN_AI_PROVIDER_NAME_VALUE_REPLICATE,
|
|
368
|
-
RevAI: GEN_AI_PROVIDER_NAME_VALUE_REVAI,
|
|
369
|
-
TogetherAI: GEN_AI_PROVIDER_NAME_VALUE_TOGETHERAI,
|
|
370
|
-
Vercel: GEN_AI_PROVIDER_NAME_VALUE_VERCEL,
|
|
371
|
-
XAI: import_incubating.GEN_AI_PROVIDER_NAME_VALUE_X_AI
|
|
372
|
-
}
|
|
373
|
-
},
|
|
374
|
-
/**
|
|
375
|
-
* Regular attributes
|
|
376
|
-
*/
|
|
377
|
-
Agent: {
|
|
378
|
-
Description: import_incubating.ATTR_GEN_AI_AGENT_DESCRIPTION,
|
|
379
|
-
// not yet used by axiom-ai
|
|
380
|
-
ID: import_incubating.ATTR_GEN_AI_AGENT_ID,
|
|
381
|
-
// not yet used by axiom-ai
|
|
382
|
-
Name: import_incubating.ATTR_GEN_AI_AGENT_NAME
|
|
383
|
-
// not yet used by axiom-ai
|
|
384
|
-
},
|
|
385
|
-
Conversation: {
|
|
386
|
-
ID: import_incubating.ATTR_GEN_AI_CONVERSATION_ID
|
|
387
|
-
// not yet used by axiom-ai, anyway probably needs to be provided by user
|
|
388
|
-
},
|
|
389
|
-
Input: {
|
|
390
|
-
Messages: import_incubating.ATTR_GEN_AI_INPUT_MESSAGES
|
|
391
|
-
},
|
|
392
|
-
Operation: {
|
|
393
|
-
Name: import_incubating.ATTR_GEN_AI_OPERATION_NAME,
|
|
394
|
-
Name_Values: {
|
|
395
|
-
/**
|
|
396
|
-
* Note that "text_completion" is deprecated in favor of "chat" for both OpenAI and Anthropic
|
|
397
|
-
*/
|
|
398
|
-
Chat: import_incubating.GEN_AI_OPERATION_NAME_VALUE_CHAT,
|
|
399
|
-
CreateAgent: import_incubating.GEN_AI_OPERATION_NAME_VALUE_CREATE_AGENT,
|
|
400
|
-
Embeddings: import_incubating.GEN_AI_OPERATION_NAME_VALUE_EMBEDDINGS,
|
|
401
|
-
ExecuteTool: import_incubating.GEN_AI_OPERATION_NAME_VALUE_EXECUTE_TOOL,
|
|
402
|
-
GenerateContent: import_incubating.GEN_AI_OPERATION_NAME_VALUE_GENERATE_CONTENT,
|
|
403
|
-
InvokeAgent: import_incubating.GEN_AI_OPERATION_NAME_VALUE_INVOKE_AGENT
|
|
404
|
-
}
|
|
405
|
-
},
|
|
406
|
-
Output: {
|
|
407
|
-
Messages: import_incubating.ATTR_GEN_AI_OUTPUT_MESSAGES,
|
|
408
|
-
Type: import_incubating.ATTR_GEN_AI_OUTPUT_TYPE,
|
|
409
|
-
Type_Values: {
|
|
410
|
-
Text: import_incubating.GEN_AI_OUTPUT_TYPE_VALUE_TEXT,
|
|
411
|
-
Json: import_incubating.GEN_AI_OUTPUT_TYPE_VALUE_JSON,
|
|
412
|
-
Image: import_incubating.GEN_AI_OUTPUT_TYPE_VALUE_IMAGE,
|
|
413
|
-
Speech: import_incubating.GEN_AI_OUTPUT_TYPE_VALUE_SPEECH
|
|
414
|
-
}
|
|
415
|
-
},
|
|
416
|
-
/**
|
|
417
|
-
* The provider that is hosting the model, eg AWS Bedrock
|
|
418
|
-
* There doesn't seem to be a semconv for this
|
|
419
|
-
*/
|
|
420
|
-
Request: {
|
|
421
|
-
ChoiceCount: import_incubating.ATTR_GEN_AI_REQUEST_CHOICE_COUNT,
|
|
422
|
-
// not yet used by axiom-ai
|
|
423
|
-
EncodingFormats: import_incubating.ATTR_GEN_AI_REQUEST_ENCODING_FORMATS,
|
|
424
|
-
// not yet used by axiom-ai
|
|
425
|
-
FrequencyPenalty: import_incubating.ATTR_GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
|
426
|
-
MaxTokens: import_incubating.ATTR_GEN_AI_REQUEST_MAX_TOKENS,
|
|
427
|
-
/**
|
|
428
|
-
* The model you asked for
|
|
429
|
-
*/
|
|
430
|
-
Model: import_incubating.ATTR_GEN_AI_REQUEST_MODEL,
|
|
431
|
-
PresencePenalty: import_incubating.ATTR_GEN_AI_REQUEST_PRESENCE_PENALTY,
|
|
432
|
-
Seed: import_incubating.ATTR_GEN_AI_REQUEST_SEED,
|
|
433
|
-
StopSequences: import_incubating.ATTR_GEN_AI_REQUEST_STOP_SEQUENCES,
|
|
434
|
-
Temperature: import_incubating.ATTR_GEN_AI_REQUEST_TEMPERATURE,
|
|
435
|
-
TopK: import_incubating.ATTR_GEN_AI_REQUEST_TOP_K,
|
|
436
|
-
TopP: import_incubating.ATTR_GEN_AI_REQUEST_TOP_P
|
|
437
|
-
},
|
|
438
|
-
Response: {
|
|
439
|
-
FinishReasons: import_incubating.ATTR_GEN_AI_RESPONSE_FINISH_REASONS,
|
|
440
|
-
ID: import_incubating.ATTR_GEN_AI_RESPONSE_ID,
|
|
441
|
-
/**
|
|
442
|
-
* The model that was actually used (might be different bc routing) - only ever get this from the response, otherwise omit
|
|
443
|
-
*/
|
|
444
|
-
Model: import_incubating.ATTR_GEN_AI_RESPONSE_MODEL
|
|
445
|
-
// somehow not landing on the span for google models? check up on this...
|
|
446
|
-
},
|
|
447
|
-
Tool: {
|
|
448
|
-
CallID: import_incubating.ATTR_GEN_AI_TOOL_CALL_ID,
|
|
449
|
-
Description: import_incubating.ATTR_GEN_AI_TOOL_DESCRIPTION,
|
|
450
|
-
Name: import_incubating.ATTR_GEN_AI_TOOL_NAME,
|
|
451
|
-
Type: import_incubating.ATTR_GEN_AI_TOOL_TYPE,
|
|
452
|
-
/**
|
|
453
|
-
* Note, OTel Semantic Convention suggest only putting tool inputs/outputs on the parent chat span
|
|
454
|
-
* But we at least want to give users THE OPTION to put them on the tool spans themselves as well
|
|
455
|
-
* Because it enables a lot of things with querying
|
|
456
|
-
* @see https://github.com/open-telemetry/semantic-conventions/releases/tag/v1.37.0
|
|
457
|
-
*/
|
|
458
|
-
Arguments: ATTR_GEN_AI_TOOL_ARGUMENTS,
|
|
459
|
-
/**
|
|
460
|
-
* Note, OTel Semantic Convention suggest only putting tool inputs/outputs on the parent chat span
|
|
461
|
-
* But we at least want to give users THE OPTION to put them on the tool spans themselves as well
|
|
462
|
-
* Because it enables a lot of things with querying
|
|
463
|
-
* @see https://github.com/open-telemetry/semantic-conventions/releases/tag/v1.37.0
|
|
464
|
-
*/
|
|
465
|
-
Message: ATTR_GEN_AI_TOOL_MESSAGE
|
|
466
|
-
},
|
|
467
|
-
Usage: {
|
|
468
|
-
InputTokens: import_incubating.ATTR_GEN_AI_USAGE_INPUT_TOKENS,
|
|
469
|
-
OutputTokens: import_incubating.ATTR_GEN_AI_USAGE_OUTPUT_TOKENS
|
|
470
|
-
}
|
|
471
|
-
},
|
|
472
|
-
Eval: {
|
|
473
|
-
ID: ATTR_EVAL_ID,
|
|
474
|
-
Name: ATTR_EVAL_NAME,
|
|
475
|
-
Version: ATTR_EVAL_VERSION,
|
|
476
|
-
Type: ATTR_EVAL_TYPE,
|
|
477
|
-
Baseline: {
|
|
478
|
-
ID: ATTR_EVAL_BASELINE_ID,
|
|
479
|
-
Name: ATTR_EVAL_BASELINE_NAME,
|
|
480
|
-
Version: ATTR_EVAL_BASELINE_VERSION
|
|
481
|
-
},
|
|
482
|
-
Capability: {
|
|
483
|
-
Name: ATTR_EVAL_CAPABILITY_NAME
|
|
484
|
-
},
|
|
485
|
-
Step: {
|
|
486
|
-
Name: ATTR_EVAL_STEP_NAME
|
|
487
|
-
},
|
|
488
|
-
Tags: ATTR_EVAL_TAGS,
|
|
489
|
-
Metadata: ATTR_EVAL_METADATA,
|
|
490
|
-
Collection: {
|
|
491
|
-
ID: ATTR_EVAL_COLLECTION_ID,
|
|
492
|
-
Name: ATTR_EVAL_COLLECTION_NAME,
|
|
493
|
-
Size: ATTR_EVAL_COLLECTION_SIZE
|
|
494
|
-
},
|
|
495
|
-
Config: {
|
|
496
|
-
Flags: ATTR_EVAL_CONFIG_FLAGS
|
|
497
|
-
},
|
|
498
|
-
Run: {
|
|
499
|
-
ID: ATTR_EVAL_RUN_ID
|
|
500
|
-
},
|
|
501
|
-
Case: {
|
|
502
|
-
Index: ATTR_EVAL_CASE_INDEX,
|
|
503
|
-
Input: ATTR_EVAL_CASE_INPUT,
|
|
504
|
-
Output: ATTR_EVAL_CASE_OUTPUT,
|
|
505
|
-
Expected: ATTR_EVAL_CASE_EXPECTED,
|
|
506
|
-
Scores: ATTR_EVAL_CASE_SCORES,
|
|
507
|
-
Metadata: ATTR_EVAL_CASE_METADATA
|
|
508
|
-
},
|
|
509
|
-
Task: {
|
|
510
|
-
Output: ATTR_EVAL_TASK_OUTPUT,
|
|
511
|
-
Name: ATTR_EVAL_TASK_NAME,
|
|
512
|
-
Type: ATTR_EVAL_TASK_TYPE
|
|
513
|
-
},
|
|
514
|
-
Score: {
|
|
515
|
-
Name: ATTR_EVAL_SCORE_NAME,
|
|
516
|
-
Value: ATTR_EVAL_SCORE_VALUE,
|
|
517
|
-
Threshold: ATTR_EVAL_SCORE_THRESHOLD,
|
|
518
|
-
Passed: ATTR_EVAL_SCORE_PASSED,
|
|
519
|
-
Metadata: ATTR_EVAL_SCORE_METADATA
|
|
520
|
-
},
|
|
521
|
-
User: {
|
|
522
|
-
Name: ATTR_EVAL_USER_NAME,
|
|
523
|
-
Email: ATTR_EVAL_USER_EMAIL
|
|
524
|
-
}
|
|
525
|
-
},
|
|
526
|
-
Error: {
|
|
527
|
-
Type: import_semantic_conventions.ATTR_ERROR_TYPE,
|
|
528
|
-
Message: import_incubating.ATTR_ERROR_MESSAGE
|
|
529
|
-
},
|
|
530
|
-
HTTP: {
|
|
531
|
-
Response: {
|
|
532
|
-
StatusCode: import_semantic_conventions.ATTR_HTTP_RESPONSE_STATUS_CODE
|
|
533
|
-
}
|
|
534
|
-
}
|
|
535
|
-
};
|
|
536
253
|
|
|
537
254
|
// src/otel/startActiveSpan.ts
|
|
538
255
|
var import_api2 = require("@opentelemetry/api");
|
|
@@ -543,7 +260,7 @@ var import_api4 = require("@opentelemetry/api");
|
|
|
543
260
|
// package.json
|
|
544
261
|
var package_default = {
|
|
545
262
|
name: "axiom",
|
|
546
|
-
version: "0.
|
|
263
|
+
version: "0.28.0",
|
|
547
264
|
type: "module",
|
|
548
265
|
author: "Axiom, Inc.",
|
|
549
266
|
contributors: [
|
|
@@ -747,191 +464,6 @@ function withEvalContext(options = {}, fn) {
|
|
|
747
464
|
);
|
|
748
465
|
}
|
|
749
466
|
|
|
750
|
-
// src/config/resolver.ts
|
|
751
|
-
var buildConsoleUrl = (urlString) => {
|
|
752
|
-
const url = new URL(urlString);
|
|
753
|
-
return `${url.protocol}//app.${url.host.split("api.").at(-1)}`;
|
|
754
|
-
};
|
|
755
|
-
function resolveAxiomConnection(config) {
|
|
756
|
-
let consoleEndpointUrl = buildConsoleUrl(config.eval.url);
|
|
757
|
-
if ("__overrideEndpointUrl" in config.eval) {
|
|
758
|
-
consoleEndpointUrl = config.eval.__overrideEndpointUrl;
|
|
759
|
-
}
|
|
760
|
-
return {
|
|
761
|
-
url: config.eval.url,
|
|
762
|
-
consoleEndpointUrl,
|
|
763
|
-
token: config.eval.token,
|
|
764
|
-
dataset: config.eval.dataset,
|
|
765
|
-
orgId: config.eval.orgId
|
|
766
|
-
};
|
|
767
|
-
}
|
|
768
|
-
|
|
769
|
-
// src/cli/errors.ts
|
|
770
|
-
var AxiomCLIError = class extends Error {
|
|
771
|
-
constructor(message) {
|
|
772
|
-
super(message);
|
|
773
|
-
this.name = "AxiomCLIError";
|
|
774
|
-
}
|
|
775
|
-
};
|
|
776
|
-
function errorToString(error) {
|
|
777
|
-
if (typeof error === "string") {
|
|
778
|
-
return error;
|
|
779
|
-
}
|
|
780
|
-
if (error instanceof Error) {
|
|
781
|
-
return error.message;
|
|
782
|
-
}
|
|
783
|
-
return JSON.stringify(error);
|
|
784
|
-
}
|
|
785
|
-
|
|
786
|
-
// src/evals/eval.service.ts
|
|
787
|
-
var findEvaluationCases = async (evalId, config) => {
|
|
788
|
-
const { dataset, url, token, orgId } = resolveAxiomConnection(config);
|
|
789
|
-
const apl = `['${dataset}'] | where trace_id == "${evalId}" | order by _time`;
|
|
790
|
-
const headers = new Headers({
|
|
791
|
-
Authorization: `Bearer ${token}`,
|
|
792
|
-
"Content-Type": "application/json",
|
|
793
|
-
...orgId ? { "X-AXIOM-ORG-ID": orgId } : {}
|
|
794
|
-
});
|
|
795
|
-
const resp = await fetch(`${url}/v1/datasets/_apl?format=legacy`, {
|
|
796
|
-
headers,
|
|
797
|
-
method: "POST",
|
|
798
|
-
body: JSON.stringify({ apl })
|
|
799
|
-
});
|
|
800
|
-
const payload = await resp.json();
|
|
801
|
-
if (!resp.ok) {
|
|
802
|
-
throw new Error(`Failed to query evaluation cases: ${payload.message || resp.statusText}`);
|
|
803
|
-
}
|
|
804
|
-
return payload.matches.length ? buildSpanTree(payload.matches) : null;
|
|
805
|
-
};
|
|
806
|
-
var mapSpanToEval = (span) => {
|
|
807
|
-
const flagConfigRaw = span.data.attributes[Attr.Eval.Config.Flags] ?? span.data.attributes.custom[Attr.Eval.Config.Flags];
|
|
808
|
-
return {
|
|
809
|
-
id: span.data.attributes.custom[Attr.Eval.ID],
|
|
810
|
-
name: span.data.attributes.custom[Attr.Eval.Name],
|
|
811
|
-
type: span.data.attributes.custom[Attr.Eval.Type],
|
|
812
|
-
version: span.data.attributes.custom[Attr.Eval.Version],
|
|
813
|
-
collection: {
|
|
814
|
-
name: span.data.attributes.custom[Attr.Eval.Collection.Name],
|
|
815
|
-
size: span.data.attributes.custom[Attr.Eval.Collection.Size]
|
|
816
|
-
},
|
|
817
|
-
baseline: {
|
|
818
|
-
id: span.data.attributes.custom[Attr.Eval.Baseline.ID],
|
|
819
|
-
name: span.data.attributes.custom[Attr.Eval.Baseline.Name]
|
|
820
|
-
},
|
|
821
|
-
prompt: {
|
|
822
|
-
// TODO: do we still want this?
|
|
823
|
-
model: span.data.attributes.custom["eval.prompt.model"],
|
|
824
|
-
params: span.data.attributes.custom["eval.prompt.params"]
|
|
825
|
-
},
|
|
826
|
-
duration: span.data.duration,
|
|
827
|
-
status: span.data.status.code,
|
|
828
|
-
traceId: span.data.trace_id,
|
|
829
|
-
runAt: span._time,
|
|
830
|
-
tags: span.data.attributes.custom[Attr.Eval.Tags].length ? JSON.parse(span.data.attributes.custom[Attr.Eval.Tags]) : [],
|
|
831
|
-
user: {
|
|
832
|
-
name: span.data.attributes.custom[Attr.Eval.User.Name],
|
|
833
|
-
email: span.data.attributes.custom[Attr.Eval.User.Email]
|
|
834
|
-
},
|
|
835
|
-
cases: [],
|
|
836
|
-
flagConfig: flagConfigRaw ? JSON.parse(flagConfigRaw) : void 0
|
|
837
|
-
};
|
|
838
|
-
};
|
|
839
|
-
var mapSpanToCase = (item) => {
|
|
840
|
-
const data = item.data;
|
|
841
|
-
const d = data.duration;
|
|
842
|
-
let duration = "-";
|
|
843
|
-
if (d.endsWith("s")) {
|
|
844
|
-
duration = `${Number(d.replace("s", "")).toFixed(2)}s`;
|
|
845
|
-
} else {
|
|
846
|
-
duration = d;
|
|
847
|
-
}
|
|
848
|
-
return {
|
|
849
|
-
index: data.attributes.custom[Attr.Eval.Case.Index],
|
|
850
|
-
input: data.attributes.custom[Attr.Eval.Case.Input],
|
|
851
|
-
output: data.attributes.custom[Attr.Eval.Case.Output],
|
|
852
|
-
expected: data.attributes.custom[Attr.Eval.Case.Expected],
|
|
853
|
-
duration,
|
|
854
|
-
status: data.status.code,
|
|
855
|
-
scores: data.attributes.custom[Attr.Eval.Case.Scores] ? JSON.parse(data.attributes.custom[Attr.Eval.Case.Scores]) : {},
|
|
856
|
-
runAt: item._time,
|
|
857
|
-
spanId: data.span_id,
|
|
858
|
-
traceId: data.trace_id
|
|
859
|
-
};
|
|
860
|
-
};
|
|
861
|
-
var buildSpanTree = (spans) => {
|
|
862
|
-
if (!spans.length) {
|
|
863
|
-
return null;
|
|
864
|
-
}
|
|
865
|
-
const evalSpan = spans.find((span) => span.data.attributes.gen_ai.operation.name === "eval");
|
|
866
|
-
if (!evalSpan) {
|
|
867
|
-
return null;
|
|
868
|
-
}
|
|
869
|
-
const rootSpan = mapSpanToEval(evalSpan);
|
|
870
|
-
const caseSpans = spans.filter((span) => span.data.name.startsWith("case"));
|
|
871
|
-
for (const caseSpan of caseSpans) {
|
|
872
|
-
const caseData = mapSpanToCase(caseSpan);
|
|
873
|
-
const taskSpans = spans.filter(
|
|
874
|
-
(span) => span.data.name.startsWith("task") && span.data.parent_span_id === caseSpan.data.span_id
|
|
875
|
-
);
|
|
876
|
-
if (taskSpans.length > 0) {
|
|
877
|
-
const taskSpan = taskSpans[0];
|
|
878
|
-
const chatSpans = spans.filter(
|
|
879
|
-
(span) => span.data.name.startsWith("chat") && span.data.parent_span_id === taskSpan.data.span_id
|
|
880
|
-
);
|
|
881
|
-
const chatData = chatSpans.map((chatSpan) => ({
|
|
882
|
-
operation: chatSpan.data.attributes.custom?.operation || "",
|
|
883
|
-
capability: chatSpan.data.attributes.custom?.capability || "",
|
|
884
|
-
step: chatSpan.data.attributes.custom?.step || "",
|
|
885
|
-
request: {
|
|
886
|
-
max_token: chatSpan.data.attributes.custom?.["request.max_token"] || "",
|
|
887
|
-
model: chatSpan.data.attributes.custom?.["request.model"] || "",
|
|
888
|
-
temperature: chatSpan.data.attributes.custom?.["request.temperature"] || 0
|
|
889
|
-
},
|
|
890
|
-
response: {
|
|
891
|
-
finish_reasons: chatSpan.data.attributes.custom?.["response.finish_reasons"] || ""
|
|
892
|
-
},
|
|
893
|
-
usage: {
|
|
894
|
-
input_tokens: chatSpan.data.attributes.gen_ai?.usage?.input_tokens || 0,
|
|
895
|
-
output_tokens: chatSpan.data.attributes.gen_ai?.usage?.output_tokens || 0
|
|
896
|
-
}
|
|
897
|
-
}));
|
|
898
|
-
const taskData = {
|
|
899
|
-
name: taskSpan.data.name,
|
|
900
|
-
output: taskSpan.data.attributes.custom?.output || "",
|
|
901
|
-
trial: taskSpan.data.attributes.custom?.trial || 0,
|
|
902
|
-
type: taskSpan.data.attributes.custom?.type || "",
|
|
903
|
-
error: taskSpan.data.attributes.custom?.error,
|
|
904
|
-
chat: chatData[0] || {
|
|
905
|
-
operation: "",
|
|
906
|
-
capability: "",
|
|
907
|
-
step: "",
|
|
908
|
-
request: { max_token: "", model: "", temperature: 0 },
|
|
909
|
-
response: { finish_reasons: "" },
|
|
910
|
-
usage: { input_tokens: 0, output_tokens: 0 }
|
|
911
|
-
}
|
|
912
|
-
};
|
|
913
|
-
caseData.task = taskData;
|
|
914
|
-
}
|
|
915
|
-
const scoreSpans = spans.filter(
|
|
916
|
-
(span) => span.data.attributes.gen_ai.operation.name === "eval.score" && span.data.parent_span_id === caseSpan.data.span_id
|
|
917
|
-
);
|
|
918
|
-
caseData.scores = {};
|
|
919
|
-
scoreSpans.forEach((score) => {
|
|
920
|
-
const name = score.data.attributes.custom[Attr.Eval.Score.Name];
|
|
921
|
-
caseData.scores[name] = {
|
|
922
|
-
name,
|
|
923
|
-
value: score.data.attributes.custom[Attr.Eval.Score.Value],
|
|
924
|
-
metadata: {
|
|
925
|
-
error: score.data.attributes.error
|
|
926
|
-
}
|
|
927
|
-
};
|
|
928
|
-
});
|
|
929
|
-
rootSpan.cases.push(caseData);
|
|
930
|
-
}
|
|
931
|
-
rootSpan.cases.sort((a2, b) => a2.index - b.index);
|
|
932
|
-
return rootSpan;
|
|
933
|
-
};
|
|
934
|
-
|
|
935
467
|
// src/util/deep-equal.ts
|
|
936
468
|
function deepEqual(data, other) {
|
|
937
469
|
if (data === other) {
|
|
@@ -1383,6 +915,25 @@ function printFinalReport({
|
|
|
1383
915
|
}
|
|
1384
916
|
}
|
|
1385
917
|
|
|
918
|
+
// src/config/resolver.ts
|
|
919
|
+
var buildConsoleUrl = (urlString) => {
|
|
920
|
+
const url = new URL(urlString);
|
|
921
|
+
return `${url.protocol}//app.${url.host.split("api.").at(-1)}`;
|
|
922
|
+
};
|
|
923
|
+
function resolveAxiomConnection(config) {
|
|
924
|
+
let consoleEndpointUrl = buildConsoleUrl(config.eval.url);
|
|
925
|
+
if ("__overrideEndpointUrl" in config.eval) {
|
|
926
|
+
consoleEndpointUrl = config.eval.__overrideEndpointUrl;
|
|
927
|
+
}
|
|
928
|
+
return {
|
|
929
|
+
url: config.eval.url,
|
|
930
|
+
consoleEndpointUrl,
|
|
931
|
+
token: config.eval.token,
|
|
932
|
+
dataset: config.eval.dataset,
|
|
933
|
+
orgId: config.eval.orgId
|
|
934
|
+
};
|
|
935
|
+
}
|
|
936
|
+
|
|
1386
937
|
// src/evals/reporter.ts
|
|
1387
938
|
var AxiomReporter = class {
|
|
1388
939
|
constructor() {
|
|
@@ -1390,7 +941,6 @@ var AxiomReporter = class {
|
|
|
1390
941
|
__publicField(this, "start", 0);
|
|
1391
942
|
__publicField(this, "_endOfRunConfigEnd");
|
|
1392
943
|
__publicField(this, "_suiteData", []);
|
|
1393
|
-
__publicField(this, "_baselines", /* @__PURE__ */ new Map());
|
|
1394
944
|
__publicField(this, "_printedFlagOverrides", false);
|
|
1395
945
|
__publicField(this, "_config");
|
|
1396
946
|
}
|
|
@@ -1415,17 +965,6 @@ var AxiomReporter = class {
|
|
|
1415
965
|
}
|
|
1416
966
|
this._printedFlagOverrides = true;
|
|
1417
967
|
}
|
|
1418
|
-
const baseline = meta.evaluation.baseline;
|
|
1419
|
-
if (baseline) {
|
|
1420
|
-
const config = getAxiomConfig();
|
|
1421
|
-
if (!config) {
|
|
1422
|
-
throw new AxiomCLIError("Axiom config not available in reporter");
|
|
1423
|
-
}
|
|
1424
|
-
const baselineData = await findEvaluationCases(baseline.id, config);
|
|
1425
|
-
this._baselines.set(meta.evaluation.name, baselineData || null);
|
|
1426
|
-
} else {
|
|
1427
|
-
this._baselines.set(meta.evaluation.name, null);
|
|
1428
|
-
}
|
|
1429
968
|
if (meta.evaluation.configEnd && !this._endOfRunConfigEnd) {
|
|
1430
969
|
this._endOfRunConfigEnd = meta.evaluation.configEnd;
|
|
1431
970
|
}
|
|
@@ -1455,16 +994,7 @@ var AxiomReporter = class {
|
|
|
1455
994
|
}
|
|
1456
995
|
const cwd = process.cwd();
|
|
1457
996
|
const relativePath = testSuite.module.moduleId.replace(cwd, "").replace(/^\//, "");
|
|
1458
|
-
let suiteBaseline =
|
|
1459
|
-
if (suiteBaseline === void 0 && meta.evaluation.baseline) {
|
|
1460
|
-
const config = getAxiomConfig();
|
|
1461
|
-
if (!config) {
|
|
1462
|
-
throw new AxiomCLIError("Axiom config not available in reporter");
|
|
1463
|
-
}
|
|
1464
|
-
const baselineData = await findEvaluationCases(meta.evaluation.baseline.id, config);
|
|
1465
|
-
suiteBaseline = baselineData || null;
|
|
1466
|
-
this._baselines.set(meta.evaluation.name, suiteBaseline);
|
|
1467
|
-
}
|
|
997
|
+
let suiteBaseline = meta.evaluation.baseline;
|
|
1468
998
|
this._suiteData.push({
|
|
1469
999
|
name: meta.evaluation.name,
|
|
1470
1000
|
file: relativePath,
|
|
@@ -1533,6 +1063,23 @@ var import_resources = require("@opentelemetry/resources");
|
|
|
1533
1063
|
var import_exporter_trace_otlp_http = require("@opentelemetry/exporter-trace-otlp-http");
|
|
1534
1064
|
var import_api10 = require("@opentelemetry/api");
|
|
1535
1065
|
|
|
1066
|
+
// src/cli/errors.ts
|
|
1067
|
+
var AxiomCLIError = class extends Error {
|
|
1068
|
+
constructor(message) {
|
|
1069
|
+
super(message);
|
|
1070
|
+
this.name = "AxiomCLIError";
|
|
1071
|
+
}
|
|
1072
|
+
};
|
|
1073
|
+
function errorToString(error) {
|
|
1074
|
+
if (typeof error === "string") {
|
|
1075
|
+
return error;
|
|
1076
|
+
}
|
|
1077
|
+
if (error instanceof Error) {
|
|
1078
|
+
return error.message;
|
|
1079
|
+
}
|
|
1080
|
+
return JSON.stringify(error);
|
|
1081
|
+
}
|
|
1082
|
+
|
|
1536
1083
|
// src/config/loader.ts
|
|
1537
1084
|
var import_c12 = require("c12");
|
|
1538
1085
|
var import_defu = require("defu");
|
|
@@ -1998,11 +1545,11 @@ function setupEvalProvider(connection) {
|
|
|
1998
1545
|
axiomProvider = new import_sdk_trace_node.NodeTracerProvider({
|
|
1999
1546
|
resource: (0, import_resources.resourceFromAttributes)({
|
|
2000
1547
|
["service.name"]: "axiom",
|
|
2001
|
-
["service.version"]: "0.
|
|
1548
|
+
["service.version"]: "0.28.0"
|
|
2002
1549
|
}),
|
|
2003
1550
|
spanProcessors: [processor]
|
|
2004
1551
|
});
|
|
2005
|
-
axiomTracer = axiomProvider.getTracer("axiom", "0.
|
|
1552
|
+
axiomTracer = axiomProvider.getTracer("axiom", "0.28.0");
|
|
2006
1553
|
}
|
|
2007
1554
|
async function initInstrumentation(config) {
|
|
2008
1555
|
if (initialized) {
|
|
@@ -2014,7 +1561,7 @@ async function initInstrumentation(config) {
|
|
|
2014
1561
|
}
|
|
2015
1562
|
initializationPromise = (async () => {
|
|
2016
1563
|
if (!config.enabled) {
|
|
2017
|
-
axiomTracer = import_api10.trace.getTracer("axiom", "0.
|
|
1564
|
+
axiomTracer = import_api10.trace.getTracer("axiom", "0.28.0");
|
|
2018
1565
|
initialized = true;
|
|
2019
1566
|
return;
|
|
2020
1567
|
}
|
|
@@ -2786,7 +2333,7 @@ var import_commander2 = require("commander");
|
|
|
2786
2333
|
var loadVersionCommand = (program2) => {
|
|
2787
2334
|
return program2.addCommand(
|
|
2788
2335
|
new import_commander2.Command("version").description("cli version").action(() => {
|
|
2789
|
-
console.log("0.
|
|
2336
|
+
console.log("0.28.0");
|
|
2790
2337
|
})
|
|
2791
2338
|
);
|
|
2792
2339
|
};
|
|
@@ -2796,7 +2343,7 @@ var { loadEnvConfig } = import_env.default;
|
|
|
2796
2343
|
loadEnvConfig(process.cwd());
|
|
2797
2344
|
var { cleanedArgv, overrides } = extractOverrides(process.argv.slice(2));
|
|
2798
2345
|
var program = new import_commander3.Command();
|
|
2799
|
-
program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.
|
|
2346
|
+
program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.28.0");
|
|
2800
2347
|
program.hook("preAction", async (_, actionCommand) => {
|
|
2801
2348
|
const commandName = actionCommand.name();
|
|
2802
2349
|
const parentCommand = actionCommand.parent;
|