judgeval 0.20.1__tar.gz → 0.21.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of judgeval might be problematic. Click here for more details.

Files changed (167) hide show
  1. {judgeval-0.20.1 → judgeval-0.21.0}/PKG-INFO +1 -1
  2. {judgeval-0.20.1 → judgeval-0.21.0}/pyproject.toml +1 -1
  3. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/keys.py +10 -9
  4. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/llm/llm_anthropic/messages.py +34 -22
  5. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/llm/llm_anthropic/messages_stream.py +12 -12
  6. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/llm/llm_google/generate_content.py +8 -6
  7. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/llm/llm_openai/beta_chat_completions.py +36 -12
  8. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/llm/llm_openai/chat_completions.py +75 -22
  9. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/llm/llm_openai/responses.py +77 -22
  10. judgeval-0.21.0/src/judgeval/tracer/llm/llm_openai/utils.py +22 -0
  11. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/llm/llm_together/chat_completions.py +22 -14
  12. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/utils/serialize.py +2 -2
  13. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/version.py +1 -1
  14. {judgeval-0.20.1 → judgeval-0.21.0}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  15. {judgeval-0.20.1 → judgeval-0.21.0}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  16. {judgeval-0.20.1 → judgeval-0.21.0}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  17. {judgeval-0.20.1 → judgeval-0.21.0}/.github/pull_request_template.md +0 -0
  18. {judgeval-0.20.1 → judgeval-0.21.0}/.github/workflows/blocked-pr.yaml +0 -0
  19. {judgeval-0.20.1 → judgeval-0.21.0}/.github/workflows/ci.yaml +0 -0
  20. {judgeval-0.20.1 → judgeval-0.21.0}/.github/workflows/claude-code-review.yml +0 -0
  21. {judgeval-0.20.1 → judgeval-0.21.0}/.github/workflows/claude.yml +0 -0
  22. {judgeval-0.20.1 → judgeval-0.21.0}/.github/workflows/lint.yaml +0 -0
  23. {judgeval-0.20.1 → judgeval-0.21.0}/.github/workflows/merge-branch-check.yaml +0 -0
  24. {judgeval-0.20.1 → judgeval-0.21.0}/.github/workflows/mypy.yaml +0 -0
  25. {judgeval-0.20.1 → judgeval-0.21.0}/.github/workflows/pre-commit-autoupdate.yaml +0 -0
  26. {judgeval-0.20.1 → judgeval-0.21.0}/.github/workflows/release.yaml +0 -0
  27. {judgeval-0.20.1 → judgeval-0.21.0}/.github/workflows/validate-branch.yaml +0 -0
  28. {judgeval-0.20.1 → judgeval-0.21.0}/.gitignore +0 -0
  29. {judgeval-0.20.1 → judgeval-0.21.0}/.pre-commit-config.yaml +0 -0
  30. {judgeval-0.20.1 → judgeval-0.21.0}/CONTRIBUTING.md +0 -0
  31. {judgeval-0.20.1 → judgeval-0.21.0}/LICENSE.md +0 -0
  32. {judgeval-0.20.1 → judgeval-0.21.0}/README.md +0 -0
  33. {judgeval-0.20.1 → judgeval-0.21.0}/assets/Screenshot 2025-05-17 at 8.14.27/342/200/257PM.png" +0 -0
  34. {judgeval-0.20.1 → judgeval-0.21.0}/assets/agent.gif +0 -0
  35. {judgeval-0.20.1 → judgeval-0.21.0}/assets/agent_trace_example.png +0 -0
  36. {judgeval-0.20.1 → judgeval-0.21.0}/assets/brand/company.jpg +0 -0
  37. {judgeval-0.20.1 → judgeval-0.21.0}/assets/brand/company_banner.jpg +0 -0
  38. {judgeval-0.20.1 → judgeval-0.21.0}/assets/brand/darkmode.svg +0 -0
  39. {judgeval-0.20.1 → judgeval-0.21.0}/assets/brand/full_logo.png +0 -0
  40. {judgeval-0.20.1 → judgeval-0.21.0}/assets/brand/icon.png +0 -0
  41. {judgeval-0.20.1 → judgeval-0.21.0}/assets/brand/lightmode.svg +0 -0
  42. {judgeval-0.20.1 → judgeval-0.21.0}/assets/brand/white_background.png +0 -0
  43. {judgeval-0.20.1 → judgeval-0.21.0}/assets/custom_scorer_online_abm.png +0 -0
  44. {judgeval-0.20.1 → judgeval-0.21.0}/assets/data.gif +0 -0
  45. {judgeval-0.20.1 → judgeval-0.21.0}/assets/dataset_clustering_screenshot.png +0 -0
  46. {judgeval-0.20.1 → judgeval-0.21.0}/assets/dataset_clustering_screenshot_dm.png +0 -0
  47. {judgeval-0.20.1 → judgeval-0.21.0}/assets/datasets_preview_screenshot.png +0 -0
  48. {judgeval-0.20.1 → judgeval-0.21.0}/assets/document.gif +0 -0
  49. {judgeval-0.20.1 → judgeval-0.21.0}/assets/error_analysis_dashboard.png +0 -0
  50. {judgeval-0.20.1 → judgeval-0.21.0}/assets/errors.png +0 -0
  51. {judgeval-0.20.1 → judgeval-0.21.0}/assets/experiments_dashboard_screenshot.png +0 -0
  52. {judgeval-0.20.1 → judgeval-0.21.0}/assets/experiments_page.png +0 -0
  53. {judgeval-0.20.1 → judgeval-0.21.0}/assets/experiments_pagev2.png +0 -0
  54. {judgeval-0.20.1 → judgeval-0.21.0}/assets/logo_darkmode.svg +0 -0
  55. {judgeval-0.20.1 → judgeval-0.21.0}/assets/logo_lightmode.svg +0 -0
  56. {judgeval-0.20.1 → judgeval-0.21.0}/assets/monitoring_screenshot.png +0 -0
  57. {judgeval-0.20.1 → judgeval-0.21.0}/assets/online_eval.png +0 -0
  58. {judgeval-0.20.1 → judgeval-0.21.0}/assets/product_shot.png +0 -0
  59. {judgeval-0.20.1 → judgeval-0.21.0}/assets/quickstart_trajectory_ss.png +0 -0
  60. {judgeval-0.20.1 → judgeval-0.21.0}/assets/test.png +0 -0
  61. {judgeval-0.20.1 → judgeval-0.21.0}/assets/tests.png +0 -0
  62. {judgeval-0.20.1 → judgeval-0.21.0}/assets/trace.gif +0 -0
  63. {judgeval-0.20.1 → judgeval-0.21.0}/assets/trace_demo.png +0 -0
  64. {judgeval-0.20.1 → judgeval-0.21.0}/assets/trace_screenshot.png +0 -0
  65. {judgeval-0.20.1 → judgeval-0.21.0}/assets/trace_screenshot_old.png +0 -0
  66. {judgeval-0.20.1 → judgeval-0.21.0}/pytest.ini +0 -0
  67. {judgeval-0.20.1 → judgeval-0.21.0}/scripts/api_generator.py +0 -0
  68. {judgeval-0.20.1 → judgeval-0.21.0}/scripts/openapi_transform.py +0 -0
  69. {judgeval-0.20.1 → judgeval-0.21.0}/scripts/update_types.sh +0 -0
  70. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/__init__.py +0 -0
  71. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/api/__init__.py +0 -0
  72. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/api/api_types.py +0 -0
  73. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/cli.py +0 -0
  74. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/constants.py +0 -0
  75. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/data/__init__.py +0 -0
  76. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/data/evaluation_run.py +0 -0
  77. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/data/example.py +0 -0
  78. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/data/judgment_types.py +0 -0
  79. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/data/result.py +0 -0
  80. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/data/scorer_data.py +0 -0
  81. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/data/scripts/fix_default_factory.py +0 -0
  82. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/data/scripts/openapi_transform.py +0 -0
  83. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/data/trace.py +0 -0
  84. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/dataset/__init__.py +0 -0
  85. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/env.py +0 -0
  86. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/evaluation/__init__.py +0 -0
  87. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/exceptions.py +0 -0
  88. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/integrations/langgraph/__init__.py +0 -0
  89. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/integrations/openlit/__init__.py +0 -0
  90. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/judges/__init__.py +0 -0
  91. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/judges/base_judge.py +0 -0
  92. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/judges/litellm_judge.py +0 -0
  93. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/judges/together_judge.py +0 -0
  94. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/judges/utils.py +0 -0
  95. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/logger.py +0 -0
  96. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/prompt/__init__.py +0 -0
  97. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/scorers/__init__.py +0 -0
  98. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/scorers/agent_scorer.py +0 -0
  99. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/scorers/api_scorer.py +0 -0
  100. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/scorers/base_scorer.py +0 -0
  101. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/scorers/example_scorer.py +0 -0
  102. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/scorers/exceptions.py +0 -0
  103. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/scorers/judgeval_scorers/__init__.py +0 -0
  104. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -0
  105. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +0 -0
  106. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +0 -0
  107. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +0 -0
  108. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +0 -0
  109. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +0 -0
  110. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/scorers/score.py +0 -0
  111. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/scorers/utils.py +0 -0
  112. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/__init__.py +0 -0
  113. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/constants.py +0 -0
  114. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/exporters/__init__.py +0 -0
  115. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/exporters/s3.py +0 -0
  116. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/exporters/store.py +0 -0
  117. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/exporters/utils.py +0 -0
  118. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/llm/__init__.py +0 -0
  119. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/llm/config.py +0 -0
  120. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/llm/constants.py +0 -0
  121. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/llm/llm_anthropic/__init__.py +0 -0
  122. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/llm/llm_anthropic/config.py +0 -0
  123. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/llm/llm_anthropic/wrapper.py +0 -0
  124. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/llm/llm_google/__init__.py +0 -0
  125. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/llm/llm_google/config.py +0 -0
  126. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/llm/llm_google/wrapper.py +0 -0
  127. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/llm/llm_openai/__init__.py +0 -0
  128. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/llm/llm_openai/config.py +0 -0
  129. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/llm/llm_openai/wrapper.py +0 -0
  130. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/llm/llm_together/__init__.py +0 -0
  131. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/llm/llm_together/config.py +0 -0
  132. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/llm/llm_together/wrapper.py +0 -0
  133. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/llm/providers.py +0 -0
  134. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/managers.py +0 -0
  135. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/processors/__init__.py +0 -0
  136. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/utils.py +0 -0
  137. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/trainer/__init__.py +0 -0
  138. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/trainer/base_trainer.py +0 -0
  139. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/trainer/config.py +0 -0
  140. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/trainer/console.py +0 -0
  141. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/trainer/fireworks_trainer.py +0 -0
  142. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/trainer/trainable_model.py +0 -0
  143. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/trainer/trainer.py +0 -0
  144. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/utils/async_utils.py +0 -0
  145. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/utils/decorators/__init__.py +0 -0
  146. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/utils/decorators/dont_throw.py +0 -0
  147. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/utils/decorators/use_once.py +0 -0
  148. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/utils/file_utils.py +0 -0
  149. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/utils/guards.py +0 -0
  150. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/utils/meta.py +0 -0
  151. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/utils/project.py +0 -0
  152. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/utils/testing.py +0 -0
  153. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/utils/url.py +0 -0
  154. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/utils/version_check.py +0 -0
  155. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/utils/wrappers/README.md +0 -0
  156. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/utils/wrappers/__init__.py +0 -0
  157. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/utils/wrappers/immutable_wrap_async.py +0 -0
  158. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/utils/wrappers/immutable_wrap_async_iterator.py +0 -0
  159. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/utils/wrappers/immutable_wrap_sync.py +0 -0
  160. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +0 -0
  161. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/utils/wrappers/mutable_wrap_async.py +0 -0
  162. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/utils/wrappers/mutable_wrap_sync.py +0 -0
  163. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/utils/wrappers/py.typed +0 -0
  164. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/utils/wrappers/utils.py +0 -0
  165. {judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/warnings.py +0 -0
  166. {judgeval-0.20.1 → judgeval-0.21.0}/update_version.py +0 -0
  167. {judgeval-0.20.1 → judgeval-0.21.0}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: judgeval
3
- Version: 0.20.1
3
+ Version: 0.21.0
4
4
  Summary: Judgeval Package
5
5
  Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
6
6
  Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "judgeval"
3
- version = "0.20.1"
3
+ version = "0.21.0"
4
4
  authors = [
5
5
  { name = "Andrew Li", email = "andrew@judgmentlabs.ai" },
6
6
  { name = "Alex Shan", email = "alex@judgmentlabs.ai" },
@@ -26,18 +26,19 @@ class AttributeKeys(str, Enum):
26
26
 
27
27
  PENDING_TRACE_EVAL = "judgment.pending_trace_eval"
28
28
 
29
+ JUDGMENT_LLM_PROVIDER = "judgment.llm.provider"
30
+ JUDGMENT_LLM_MODEL_NAME = "judgment.llm.model"
31
+ JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS = "judgment.usage.non_cached_input_tokens"
32
+ JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS = (
33
+ "judgment.usage.cache_creation_input_tokens"
34
+ )
35
+ JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS = "judgment.usage.cache_read_input_tokens"
36
+ JUDGMENT_USAGE_OUTPUT_TOKENS = "judgment.usage.output_tokens"
37
+ JUDGMENT_USAGE_TOTAL_COST_USD = "judgment.usage.total_cost_usd"
38
+
29
39
  GEN_AI_PROMPT = "gen_ai.prompt"
30
40
  GEN_AI_COMPLETION = "gen_ai.completion"
31
- GEN_AI_REQUEST_MODEL = "gen_ai.request.model"
32
- GEN_AI_RESPONSE_MODEL = "gen_ai.response.model"
33
41
  GEN_AI_SYSTEM = "gen_ai.system"
34
- GEN_AI_USAGE_INPUT_TOKENS = "gen_ai.usage.input_tokens"
35
- GEN_AI_USAGE_OUTPUT_TOKENS = "gen_ai.usage.output_tokens"
36
- GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS = (
37
- "gen_ai.usage.cache_creation_input_tokens"
38
- )
39
- GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS = "gen_ai.usage.cache_read_input_tokens"
40
-
41
42
  GEN_AI_REQUEST_TEMPERATURE = "gen_ai.request.temperature"
42
43
  GEN_AI_REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens"
43
44
  GEN_AI_RESPONSE_FINISH_REASONS = "gen_ai.response.finish_reasons"
@@ -95,7 +95,7 @@ def _wrap_non_streaming_sync(
95
95
  )
96
96
  ctx["model_name"] = kwargs.get("model", "")
97
97
  set_span_attribute(
98
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
98
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
99
99
  )
100
100
 
101
101
  def post_hook(ctx: Dict[str, Any], result: Message) -> None:
@@ -112,17 +112,19 @@ def _wrap_non_streaming_sync(
112
112
  _extract_anthropic_tokens(result.usage)
113
113
  )
114
114
  set_span_attribute(
115
- span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
115
+ span,
116
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
117
+ prompt_tokens,
116
118
  )
117
119
  set_span_attribute(
118
- span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
120
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
119
121
  )
120
122
  set_span_attribute(
121
- span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
123
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
122
124
  )
123
125
  set_span_attribute(
124
126
  span,
125
- AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
127
+ AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
126
128
  cache_creation,
127
129
  )
128
130
  set_span_attribute(
@@ -133,7 +135,7 @@ def _wrap_non_streaming_sync(
133
135
 
134
136
  set_span_attribute(
135
137
  span,
136
- AttributeKeys.GEN_AI_RESPONSE_MODEL,
138
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
137
139
  result.model,
138
140
  )
139
141
 
@@ -169,7 +171,7 @@ def _wrap_streaming_sync(
169
171
  )
170
172
  ctx["model_name"] = kwargs.get("model", "")
171
173
  set_span_attribute(
172
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
174
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
173
175
  )
174
176
  ctx["accumulated_content"] = ""
175
177
 
@@ -197,17 +199,21 @@ def _wrap_streaming_sync(
197
199
  _extract_anthropic_tokens(usage_data)
198
200
  )
199
201
  set_span_attribute(
200
- span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
202
+ span,
203
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
204
+ prompt_tokens,
201
205
  )
202
206
  set_span_attribute(
203
- span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
207
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
204
208
  )
205
209
  set_span_attribute(
206
- span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
210
+ span,
211
+ AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
212
+ cache_read,
207
213
  )
208
214
  set_span_attribute(
209
215
  span,
210
- AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
216
+ AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
211
217
  cache_creation,
212
218
  )
213
219
  set_span_attribute(
@@ -279,7 +285,7 @@ def _wrap_non_streaming_async(
279
285
  )
280
286
  ctx["model_name"] = kwargs.get("model", "")
281
287
  set_span_attribute(
282
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
288
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
283
289
  )
284
290
 
285
291
  def post_hook(ctx: Dict[str, Any], result: Message) -> None:
@@ -296,17 +302,19 @@ def _wrap_non_streaming_async(
296
302
  _extract_anthropic_tokens(result.usage)
297
303
  )
298
304
  set_span_attribute(
299
- span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
305
+ span,
306
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
307
+ prompt_tokens,
300
308
  )
301
309
  set_span_attribute(
302
- span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
310
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
303
311
  )
304
312
  set_span_attribute(
305
- span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
313
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
306
314
  )
307
315
  set_span_attribute(
308
316
  span,
309
- AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
317
+ AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
310
318
  cache_creation,
311
319
  )
312
320
  set_span_attribute(
@@ -317,7 +325,7 @@ def _wrap_non_streaming_async(
317
325
 
318
326
  set_span_attribute(
319
327
  span,
320
- AttributeKeys.GEN_AI_RESPONSE_MODEL,
328
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
321
329
  result.model,
322
330
  )
323
331
 
@@ -354,7 +362,7 @@ def _wrap_streaming_async(
354
362
  )
355
363
  ctx["model_name"] = kwargs.get("model", "")
356
364
  set_span_attribute(
357
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
365
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
358
366
  )
359
367
  ctx["accumulated_content"] = ""
360
368
 
@@ -382,17 +390,21 @@ def _wrap_streaming_async(
382
390
  _extract_anthropic_tokens(usage_data)
383
391
  )
384
392
  set_span_attribute(
385
- span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
393
+ span,
394
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
395
+ prompt_tokens,
386
396
  )
387
397
  set_span_attribute(
388
- span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
398
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
389
399
  )
390
400
  set_span_attribute(
391
- span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
401
+ span,
402
+ AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
403
+ cache_read,
392
404
  )
393
405
  set_span_attribute(
394
406
  span,
395
- AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
407
+ AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
396
408
  cache_creation,
397
409
  )
398
410
  set_span_attribute(
@@ -44,7 +44,7 @@ def wrap_messages_stream_sync(tracer: Tracer, client: Anthropic) -> None:
44
44
 
45
45
  ctx["model_name"] = kwargs.get("model", "")
46
46
  set_span_attribute(
47
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
47
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
48
48
  )
49
49
  ctx["accumulated_content"] = ""
50
50
 
@@ -125,22 +125,22 @@ def wrap_messages_stream_sync(tracer: Tracer, client: Anthropic) -> None:
125
125
  ) = _extract_anthropic_tokens(final_message.usage)
126
126
  set_span_attribute(
127
127
  span,
128
- AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
128
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
129
129
  prompt_tokens,
130
130
  )
131
131
  set_span_attribute(
132
132
  span,
133
- AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
133
+ AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS,
134
134
  completion_tokens,
135
135
  )
136
136
  set_span_attribute(
137
137
  span,
138
- AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
138
+ AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
139
139
  cache_read,
140
140
  )
141
141
  set_span_attribute(
142
142
  span,
143
- AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
143
+ AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
144
144
  cache_creation,
145
145
  )
146
146
  set_span_attribute(
@@ -151,7 +151,7 @@ def wrap_messages_stream_sync(tracer: Tracer, client: Anthropic) -> None:
151
151
 
152
152
  set_span_attribute(
153
153
  span,
154
- AttributeKeys.GEN_AI_RESPONSE_MODEL,
154
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
155
155
  final_message.model,
156
156
  )
157
157
  except Exception:
@@ -190,7 +190,7 @@ def wrap_messages_stream_async(tracer: Tracer, client: AsyncAnthropic) -> None:
190
190
 
191
191
  ctx["model_name"] = kwargs.get("model", "")
192
192
  set_span_attribute(
193
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
193
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
194
194
  )
195
195
  ctx["accumulated_content"] = ""
196
196
 
@@ -271,22 +271,22 @@ def wrap_messages_stream_async(tracer: Tracer, client: AsyncAnthropic) -> None:
271
271
  ) = _extract_anthropic_tokens(final_message.usage)
272
272
  set_span_attribute(
273
273
  span,
274
- AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
274
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
275
275
  prompt_tokens,
276
276
  )
277
277
  set_span_attribute(
278
278
  span,
279
- AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
279
+ AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS,
280
280
  completion_tokens,
281
281
  )
282
282
  set_span_attribute(
283
283
  span,
284
- AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
284
+ AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
285
285
  cache_read,
286
286
  )
287
287
  set_span_attribute(
288
288
  span,
289
- AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
289
+ AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
290
290
  cache_creation,
291
291
  )
292
292
  set_span_attribute(
@@ -297,7 +297,7 @@ def wrap_messages_stream_async(tracer: Tracer, client: AsyncAnthropic) -> None:
297
297
 
298
298
  set_span_attribute(
299
299
  span,
300
- AttributeKeys.GEN_AI_RESPONSE_MODEL,
300
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
301
301
  final_message.model,
302
302
  )
303
303
  except Exception:
@@ -63,7 +63,7 @@ def wrap_generate_content_sync(tracer: Tracer, client: Client) -> None:
63
63
  )
64
64
  ctx["model_name"] = kwargs.get("model", "")
65
65
  set_span_attribute(
66
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
66
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
67
67
  )
68
68
 
69
69
  def post_hook(ctx: Dict[str, Any], result: GenerateContentResponse) -> None:
@@ -79,17 +79,19 @@ def wrap_generate_content_sync(tracer: Tracer, client: Client) -> None:
79
79
  _extract_google_tokens(usage_data)
80
80
  )
81
81
  set_span_attribute(
82
- span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
82
+ span,
83
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
84
+ prompt_tokens,
83
85
  )
84
86
  set_span_attribute(
85
- span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
87
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
86
88
  )
87
89
  set_span_attribute(
88
- span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
90
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
89
91
  )
90
92
  set_span_attribute(
91
93
  span,
92
- AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
94
+ AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
93
95
  cache_creation,
94
96
  )
95
97
  set_span_attribute(
@@ -100,7 +102,7 @@ def wrap_generate_content_sync(tracer: Tracer, client: Client) -> None:
100
102
 
101
103
  set_span_attribute(
102
104
  span,
103
- AttributeKeys.GEN_AI_RESPONSE_MODEL,
105
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
104
106
  result.model_version if result.model_version else ctx["model_name"],
105
107
  )
106
108
 
@@ -16,6 +16,7 @@ from judgeval.utils.wrappers import (
16
16
  immutable_wrap_sync,
17
17
  immutable_wrap_async,
18
18
  )
19
+ from judgeval.tracer.llm.llm_openai.utils import openai_tokens_converter
19
20
 
20
21
  if TYPE_CHECKING:
21
22
  from judgeval.tracer import Tracer
@@ -45,7 +46,7 @@ def _wrap_beta_non_streaming_sync(
45
46
  )
46
47
  ctx["model_name"] = kwargs.get("model", "")
47
48
  set_span_attribute(
48
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
49
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
49
50
  )
50
51
 
51
52
  def post_hook(ctx: Dict[str, Any], result: ParsedChatCompletion[T]) -> None:
@@ -66,17 +67,29 @@ def _wrap_beta_non_streaming_sync(
66
67
  if prompt_tokens_details:
67
68
  cache_read = prompt_tokens_details.cached_tokens or 0
68
69
 
70
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
71
+ openai_tokens_converter(
72
+ prompt_tokens,
73
+ completion_tokens,
74
+ cache_read,
75
+ 0,
76
+ usage_data.total_tokens,
77
+ )
78
+ )
79
+
69
80
  set_span_attribute(
70
- span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
81
+ span,
82
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
83
+ prompt_tokens,
71
84
  )
72
85
  set_span_attribute(
73
- span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
86
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
74
87
  )
75
88
  set_span_attribute(
76
- span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
89
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
77
90
  )
78
91
  set_span_attribute(
79
- span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
92
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
80
93
  )
81
94
  set_span_attribute(
82
95
  span,
@@ -86,7 +99,7 @@ def _wrap_beta_non_streaming_sync(
86
99
 
87
100
  set_span_attribute(
88
101
  span,
89
- AttributeKeys.GEN_AI_RESPONSE_MODEL,
102
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
90
103
  result.model or ctx["model_name"],
91
104
  )
92
105
 
@@ -128,7 +141,7 @@ def _wrap_beta_non_streaming_async(
128
141
  )
129
142
  ctx["model_name"] = kwargs.get("model", "")
130
143
  set_span_attribute(
131
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
144
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
132
145
  )
133
146
 
134
147
  def post_hook(ctx: Dict[str, Any], result: ParsedChatCompletion[T]) -> None:
@@ -149,17 +162,28 @@ def _wrap_beta_non_streaming_async(
149
162
  if prompt_tokens_details:
150
163
  cache_read = prompt_tokens_details.cached_tokens or 0
151
164
 
165
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
166
+ openai_tokens_converter(
167
+ prompt_tokens,
168
+ completion_tokens,
169
+ cache_read,
170
+ 0,
171
+ usage_data.total_tokens,
172
+ )
173
+ )
152
174
  set_span_attribute(
153
- span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
175
+ span,
176
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
177
+ prompt_tokens,
154
178
  )
155
179
  set_span_attribute(
156
- span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
180
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
157
181
  )
158
182
  set_span_attribute(
159
- span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
183
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
160
184
  )
161
185
  set_span_attribute(
162
- span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
186
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
163
187
  )
164
188
  set_span_attribute(
165
189
  span,
@@ -169,7 +193,7 @@ def _wrap_beta_non_streaming_async(
169
193
 
170
194
  set_span_attribute(
171
195
  span,
172
- AttributeKeys.GEN_AI_RESPONSE_MODEL,
196
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
173
197
  result.model or ctx["model_name"],
174
198
  )
175
199
 
@@ -25,6 +25,7 @@ from judgeval.utils.wrappers import (
25
25
  immutable_wrap_sync_iterator,
26
26
  immutable_wrap_async_iterator,
27
27
  )
28
+ from judgeval.tracer.llm.llm_openai.utils import openai_tokens_converter
28
29
 
29
30
  if TYPE_CHECKING:
30
31
  from judgeval.tracer import Tracer
@@ -68,7 +69,7 @@ def _wrap_non_streaming_sync(
68
69
  )
69
70
  ctx["model_name"] = kwargs.get("model", "")
70
71
  set_span_attribute(
71
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
72
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
72
73
  )
73
74
 
74
75
  def post_hook(ctx: Dict[str, Any], result: ChatCompletion) -> None:
@@ -89,17 +90,29 @@ def _wrap_non_streaming_sync(
89
90
  if prompt_tokens_details:
90
91
  cache_read = prompt_tokens_details.cached_tokens or 0
91
92
 
93
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
94
+ openai_tokens_converter(
95
+ prompt_tokens,
96
+ completion_tokens,
97
+ cache_read,
98
+ 0,
99
+ usage_data.total_tokens,
100
+ )
101
+ )
102
+
92
103
  set_span_attribute(
93
- span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
104
+ span,
105
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
106
+ prompt_tokens,
94
107
  )
95
108
  set_span_attribute(
96
- span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
109
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
97
110
  )
98
111
  set_span_attribute(
99
- span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
112
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
100
113
  )
101
114
  set_span_attribute(
102
- span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
115
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
103
116
  )
104
117
  set_span_attribute(
105
118
  span,
@@ -109,7 +122,7 @@ def _wrap_non_streaming_sync(
109
122
 
110
123
  set_span_attribute(
111
124
  span,
112
- AttributeKeys.GEN_AI_RESPONSE_MODEL,
125
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
113
126
  result.model or ctx["model_name"],
114
127
  )
115
128
 
@@ -145,7 +158,7 @@ def _wrap_streaming_sync(
145
158
  )
146
159
  ctx["model_name"] = kwargs.get("model", "")
147
160
  set_span_attribute(
148
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
161
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
149
162
  )
150
163
  ctx["accumulated_content"] = ""
151
164
 
@@ -182,17 +195,31 @@ def _wrap_streaming_sync(
182
195
  if chunk.usage.prompt_tokens_details:
183
196
  cache_read = chunk.usage.prompt_tokens_details.cached_tokens or 0
184
197
 
198
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
199
+ openai_tokens_converter(
200
+ prompt_tokens,
201
+ completion_tokens,
202
+ cache_read,
203
+ 0,
204
+ chunk.usage.total_tokens,
205
+ )
206
+ )
207
+
185
208
  set_span_attribute(
186
- span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
209
+ span,
210
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
211
+ prompt_tokens,
187
212
  )
188
213
  set_span_attribute(
189
- span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
214
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
190
215
  )
191
216
  set_span_attribute(
192
- span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
217
+ span,
218
+ AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
219
+ cache_read,
193
220
  )
194
221
  set_span_attribute(
195
- span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
222
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
196
223
  )
197
224
  set_span_attribute(
198
225
  span,
@@ -264,7 +291,7 @@ def _wrap_non_streaming_async(
264
291
  )
265
292
  ctx["model_name"] = kwargs.get("model", "")
266
293
  set_span_attribute(
267
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
294
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
268
295
  )
269
296
 
270
297
  def post_hook(ctx: Dict[str, Any], result: ChatCompletion) -> None:
@@ -285,17 +312,29 @@ def _wrap_non_streaming_async(
285
312
  if prompt_tokens_details:
286
313
  cache_read = prompt_tokens_details.cached_tokens or 0
287
314
 
315
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
316
+ openai_tokens_converter(
317
+ prompt_tokens,
318
+ completion_tokens,
319
+ cache_read,
320
+ 0,
321
+ usage_data.total_tokens,
322
+ )
323
+ )
324
+
288
325
  set_span_attribute(
289
- span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
326
+ span,
327
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
328
+ prompt_tokens,
290
329
  )
291
330
  set_span_attribute(
292
- span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
331
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
293
332
  )
294
333
  set_span_attribute(
295
- span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
334
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
296
335
  )
297
336
  set_span_attribute(
298
- span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
337
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
299
338
  )
300
339
  set_span_attribute(
301
340
  span,
@@ -305,7 +344,7 @@ def _wrap_non_streaming_async(
305
344
 
306
345
  set_span_attribute(
307
346
  span,
308
- AttributeKeys.GEN_AI_RESPONSE_MODEL,
347
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
309
348
  result.model or ctx["model_name"],
310
349
  )
311
350
 
@@ -342,7 +381,7 @@ def _wrap_streaming_async(
342
381
  )
343
382
  ctx["model_name"] = kwargs.get("model", "")
344
383
  set_span_attribute(
345
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
384
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
346
385
  )
347
386
  ctx["accumulated_content"] = ""
348
387
 
@@ -379,17 +418,31 @@ def _wrap_streaming_async(
379
418
  if chunk.usage.prompt_tokens_details:
380
419
  cache_read = chunk.usage.prompt_tokens_details.cached_tokens or 0
381
420
 
421
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
422
+ openai_tokens_converter(
423
+ prompt_tokens,
424
+ completion_tokens,
425
+ cache_read,
426
+ 0,
427
+ chunk.usage.total_tokens,
428
+ )
429
+ )
430
+
382
431
  set_span_attribute(
383
- span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
432
+ span,
433
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
434
+ prompt_tokens,
384
435
  )
385
436
  set_span_attribute(
386
- span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
437
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
387
438
  )
388
439
  set_span_attribute(
389
- span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
440
+ span,
441
+ AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
442
+ cache_read,
390
443
  )
391
444
  set_span_attribute(
392
- span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
445
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
393
446
  )
394
447
  set_span_attribute(
395
448
  span,