judgeval 0.16.9__tar.gz → 0.18.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of judgeval might be problematic. Click here for more details.

Files changed (168) hide show
  1. {judgeval-0.16.9 → judgeval-0.18.0}/.github/workflows/ci.yaml +1 -1
  2. {judgeval-0.16.9 → judgeval-0.18.0}/PKG-INFO +2 -3
  3. {judgeval-0.16.9 → judgeval-0.18.0}/README.md +1 -2
  4. {judgeval-0.16.9 → judgeval-0.18.0}/pyproject.toml +1 -1
  5. {judgeval-0.16.9 → judgeval-0.18.0}/scripts/api_generator.py +5 -0
  6. {judgeval-0.16.9 → judgeval-0.18.0}/scripts/openapi_transform.py +5 -0
  7. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/__init__.py +29 -0
  8. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/api/__init__.py +108 -0
  9. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/api/api_types.py +56 -1
  10. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/cli.py +7 -0
  11. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/data/judgment_types.py +56 -1
  12. judgeval-0.18.0/src/judgeval/prompts/prompt.py +320 -0
  13. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +0 -12
  14. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/__init__.py +71 -33
  15. judgeval-0.18.0/src/judgeval/tracer/exporters/store.py +59 -0
  16. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/keys.py +1 -0
  17. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_anthropic/messages.py +4 -4
  18. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_anthropic/messages_stream.py +2 -2
  19. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_google/generate_content.py +1 -1
  20. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_openai/beta_chat_completions.py +2 -2
  21. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_openai/chat_completions.py +4 -4
  22. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_openai/responses.py +4 -4
  23. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_together/chat_completions.py +4 -4
  24. judgeval-0.18.0/src/judgeval/trainer/__init__.py +14 -0
  25. judgeval-0.18.0/src/judgeval/trainer/base_trainer.py +122 -0
  26. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/trainer/config.py +1 -1
  27. judgeval-0.16.9/src/judgeval/trainer/trainer.py → judgeval-0.18.0/src/judgeval/trainer/fireworks_trainer.py +35 -44
  28. judgeval-0.18.0/src/judgeval/trainer/trainer.py +70 -0
  29. judgeval-0.18.0/src/judgeval/utils/project.py +15 -0
  30. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/version.py +1 -1
  31. judgeval-0.16.9/src/judgeval/tracer/exporters/store.py +0 -43
  32. judgeval-0.16.9/src/judgeval/trainer/__init__.py +0 -5
  33. {judgeval-0.16.9 → judgeval-0.18.0}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  34. {judgeval-0.16.9 → judgeval-0.18.0}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  35. {judgeval-0.16.9 → judgeval-0.18.0}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  36. {judgeval-0.16.9 → judgeval-0.18.0}/.github/pull_request_template.md +0 -0
  37. {judgeval-0.16.9 → judgeval-0.18.0}/.github/workflows/blocked-pr.yaml +0 -0
  38. {judgeval-0.16.9 → judgeval-0.18.0}/.github/workflows/claude-code-review.yml +0 -0
  39. {judgeval-0.16.9 → judgeval-0.18.0}/.github/workflows/claude.yml +0 -0
  40. {judgeval-0.16.9 → judgeval-0.18.0}/.github/workflows/lint.yaml +0 -0
  41. {judgeval-0.16.9 → judgeval-0.18.0}/.github/workflows/merge-branch-check.yaml +0 -0
  42. {judgeval-0.16.9 → judgeval-0.18.0}/.github/workflows/mypy.yaml +0 -0
  43. {judgeval-0.16.9 → judgeval-0.18.0}/.github/workflows/pre-commit-autoupdate.yaml +0 -0
  44. {judgeval-0.16.9 → judgeval-0.18.0}/.github/workflows/release.yaml +0 -0
  45. {judgeval-0.16.9 → judgeval-0.18.0}/.github/workflows/validate-branch.yaml +0 -0
  46. {judgeval-0.16.9 → judgeval-0.18.0}/.gitignore +0 -0
  47. {judgeval-0.16.9 → judgeval-0.18.0}/.pre-commit-config.yaml +0 -0
  48. {judgeval-0.16.9 → judgeval-0.18.0}/CONTRIBUTING.md +0 -0
  49. {judgeval-0.16.9 → judgeval-0.18.0}/LICENSE.md +0 -0
  50. {judgeval-0.16.9 → judgeval-0.18.0}/assets/Screenshot 2025-05-17 at 8.14.27/342/200/257PM.png" +0 -0
  51. {judgeval-0.16.9 → judgeval-0.18.0}/assets/agent.gif +0 -0
  52. {judgeval-0.16.9 → judgeval-0.18.0}/assets/agent_trace_example.png +0 -0
  53. {judgeval-0.16.9 → judgeval-0.18.0}/assets/brand/company.jpg +0 -0
  54. {judgeval-0.16.9 → judgeval-0.18.0}/assets/brand/company_banner.jpg +0 -0
  55. {judgeval-0.16.9 → judgeval-0.18.0}/assets/brand/darkmode.svg +0 -0
  56. {judgeval-0.16.9 → judgeval-0.18.0}/assets/brand/full_logo.png +0 -0
  57. {judgeval-0.16.9 → judgeval-0.18.0}/assets/brand/icon.png +0 -0
  58. {judgeval-0.16.9 → judgeval-0.18.0}/assets/brand/lightmode.svg +0 -0
  59. {judgeval-0.16.9 → judgeval-0.18.0}/assets/brand/white_background.png +0 -0
  60. {judgeval-0.16.9 → judgeval-0.18.0}/assets/custom_scorer_online_abm.png +0 -0
  61. {judgeval-0.16.9 → judgeval-0.18.0}/assets/data.gif +0 -0
  62. {judgeval-0.16.9 → judgeval-0.18.0}/assets/dataset_clustering_screenshot.png +0 -0
  63. {judgeval-0.16.9 → judgeval-0.18.0}/assets/dataset_clustering_screenshot_dm.png +0 -0
  64. {judgeval-0.16.9 → judgeval-0.18.0}/assets/datasets_preview_screenshot.png +0 -0
  65. {judgeval-0.16.9 → judgeval-0.18.0}/assets/document.gif +0 -0
  66. {judgeval-0.16.9 → judgeval-0.18.0}/assets/error_analysis_dashboard.png +0 -0
  67. {judgeval-0.16.9 → judgeval-0.18.0}/assets/errors.png +0 -0
  68. {judgeval-0.16.9 → judgeval-0.18.0}/assets/experiments_dashboard_screenshot.png +0 -0
  69. {judgeval-0.16.9 → judgeval-0.18.0}/assets/experiments_page.png +0 -0
  70. {judgeval-0.16.9 → judgeval-0.18.0}/assets/experiments_pagev2.png +0 -0
  71. {judgeval-0.16.9 → judgeval-0.18.0}/assets/logo_darkmode.svg +0 -0
  72. {judgeval-0.16.9 → judgeval-0.18.0}/assets/logo_lightmode.svg +0 -0
  73. {judgeval-0.16.9 → judgeval-0.18.0}/assets/monitoring_screenshot.png +0 -0
  74. {judgeval-0.16.9 → judgeval-0.18.0}/assets/online_eval.png +0 -0
  75. {judgeval-0.16.9 → judgeval-0.18.0}/assets/product_shot.png +0 -0
  76. {judgeval-0.16.9 → judgeval-0.18.0}/assets/quickstart_trajectory_ss.png +0 -0
  77. {judgeval-0.16.9 → judgeval-0.18.0}/assets/test.png +0 -0
  78. {judgeval-0.16.9 → judgeval-0.18.0}/assets/tests.png +0 -0
  79. {judgeval-0.16.9 → judgeval-0.18.0}/assets/trace.gif +0 -0
  80. {judgeval-0.16.9 → judgeval-0.18.0}/assets/trace_demo.png +0 -0
  81. {judgeval-0.16.9 → judgeval-0.18.0}/assets/trace_screenshot.png +0 -0
  82. {judgeval-0.16.9 → judgeval-0.18.0}/assets/trace_screenshot_old.png +0 -0
  83. {judgeval-0.16.9 → judgeval-0.18.0}/pytest.ini +0 -0
  84. {judgeval-0.16.9 → judgeval-0.18.0}/scripts/update_types.sh +0 -0
  85. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/constants.py +0 -0
  86. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/data/__init__.py +0 -0
  87. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/data/evaluation_run.py +0 -0
  88. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/data/example.py +0 -0
  89. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/data/result.py +0 -0
  90. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/data/scorer_data.py +0 -0
  91. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/data/scripts/fix_default_factory.py +0 -0
  92. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/data/scripts/openapi_transform.py +0 -0
  93. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/data/trace.py +0 -0
  94. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/dataset/__init__.py +0 -0
  95. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/env.py +0 -0
  96. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/evaluation/__init__.py +0 -0
  97. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/exceptions.py +0 -0
  98. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/integrations/langgraph/__init__.py +0 -0
  99. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/integrations/openlit/__init__.py +0 -0
  100. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/judges/__init__.py +0 -0
  101. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/judges/base_judge.py +0 -0
  102. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/judges/litellm_judge.py +0 -0
  103. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/judges/together_judge.py +0 -0
  104. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/judges/utils.py +0 -0
  105. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/logger.py +0 -0
  106. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/scorers/__init__.py +0 -0
  107. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/scorers/agent_scorer.py +0 -0
  108. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/scorers/api_scorer.py +0 -0
  109. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/scorers/base_scorer.py +0 -0
  110. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/scorers/example_scorer.py +0 -0
  111. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/scorers/exceptions.py +0 -0
  112. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/scorers/judgeval_scorers/__init__.py +0 -0
  113. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -0
  114. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +0 -0
  115. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +0 -0
  116. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +0 -0
  117. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +0 -0
  118. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/scorers/score.py +0 -0
  119. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/scorers/utils.py +0 -0
  120. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/constants.py +0 -0
  121. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/exporters/__init__.py +0 -0
  122. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/exporters/s3.py +0 -0
  123. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/exporters/utils.py +0 -0
  124. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/__init__.py +0 -0
  125. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/config.py +0 -0
  126. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/constants.py +0 -0
  127. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_anthropic/__init__.py +0 -0
  128. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_anthropic/config.py +0 -0
  129. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_anthropic/wrapper.py +0 -0
  130. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_google/__init__.py +0 -0
  131. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_google/config.py +0 -0
  132. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_google/wrapper.py +0 -0
  133. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_openai/__init__.py +0 -0
  134. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_openai/config.py +0 -0
  135. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_openai/wrapper.py +0 -0
  136. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_together/__init__.py +0 -0
  137. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_together/config.py +0 -0
  138. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_together/wrapper.py +0 -0
  139. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/providers.py +0 -0
  140. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/managers.py +0 -0
  141. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/processors/__init__.py +0 -0
  142. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/utils.py +0 -0
  143. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/trainer/console.py +0 -0
  144. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/trainer/trainable_model.py +0 -0
  145. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/async_utils.py +0 -0
  146. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/decorators/__init__.py +0 -0
  147. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/decorators/dont_throw.py +0 -0
  148. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/decorators/use_once.py +0 -0
  149. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/file_utils.py +0 -0
  150. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/guards.py +0 -0
  151. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/meta.py +0 -0
  152. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/serialize.py +0 -0
  153. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/testing.py +0 -0
  154. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/url.py +0 -0
  155. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/version_check.py +0 -0
  156. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/wrappers/README.md +0 -0
  157. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/wrappers/__init__.py +0 -0
  158. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/wrappers/immutable_wrap_async.py +0 -0
  159. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/wrappers/immutable_wrap_async_iterator.py +0 -0
  160. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/wrappers/immutable_wrap_sync.py +0 -0
  161. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +0 -0
  162. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/wrappers/mutable_wrap_async.py +0 -0
  163. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/wrappers/mutable_wrap_sync.py +0 -0
  164. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/wrappers/py.typed +0 -0
  165. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/wrappers/utils.py +0 -0
  166. {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/warnings.py +0 -0
  167. {judgeval-0.16.9 → judgeval-0.18.0}/update_version.py +0 -0
  168. {judgeval-0.16.9 → judgeval-0.18.0}/uv.lock +0 -0
@@ -51,7 +51,7 @@ jobs:
51
51
  cd src
52
52
  export JUDGMENT_API_KEY="$JUDGEVAL_GH_JUDGMENT_API_KEY"
53
53
  export JUDGMENT_ORG_ID="$JUDGEVAL_GH_JUDGMENT_ORG_ID"
54
- uv run pytest tests -n auto
54
+ uv run pytest tests/tracer/llm -n auto
55
55
 
56
56
  run-e2e-tests:
57
57
  needs: [validate-branch]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: judgeval
3
- Version: 0.16.9
3
+ Version: 0.18.0
4
4
  Summary: Judgeval Package
5
5
  Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
6
6
  Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -63,8 +63,7 @@ Judgeval's agent monitoring infra provides a simple harness for integrating GRPO
63
63
  await trainer.train(
64
64
  agent_function=your_agent_function, # entry point to your agent
65
65
  scorers=[RewardScorer()], # Custom scorer you define based on task criteria, acts as reward
66
- prompts=training_prompts, # Tasks
67
- rft_provider="fireworks"
66
+ prompts=training_prompts # Tasks
68
67
  )
69
68
  ```
70
69
 
@@ -36,8 +36,7 @@ Judgeval's agent monitoring infra provides a simple harness for integrating GRPO
36
36
  await trainer.train(
37
37
  agent_function=your_agent_function, # entry point to your agent
38
38
  scorers=[RewardScorer()], # Custom scorer you define based on task criteria, acts as reward
39
- prompts=training_prompts, # Tasks
40
- rft_provider="fireworks"
39
+ prompts=training_prompts # Tasks
41
40
  )
42
41
  ```
43
42
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "judgeval"
3
- version = "0.16.9"
3
+ version = "0.18.0"
4
4
  authors = [
5
5
  { name = "Andrew Li", email = "andrew@judgmentlabs.ai" },
6
6
  { name = "Alex Shan", email = "alex@judgmentlabs.ai" },
@@ -43,6 +43,11 @@ JUDGEVAL_PATHS: List[str] = [
43
43
  "/e2e_fetch_trace/",
44
44
  "/e2e_fetch_span_score/",
45
45
  "/e2e_fetch_trace_scorer_span_score/",
46
+ "/prompts/insert/",
47
+ "/prompts/fetch/",
48
+ "/prompts/tag/",
49
+ "/prompts/untag/",
50
+ "/prompts/get_prompt_versions/",
46
51
  ]
47
52
 
48
53
 
@@ -41,6 +41,11 @@ JUDGEVAL_PATHS: List[str] = [
41
41
  "/projects/resolve/",
42
42
  "/e2e_fetch_trace/",
43
43
  "/e2e_fetch_span_score/",
44
+ "/prompts/insert/",
45
+ "/prompts/fetch/",
46
+ "/prompts/tag/",
47
+ "/prompts/untag/",
48
+ "/prompts/get_prompt_versions/",
44
49
  ]
45
50
 
46
51
 
@@ -6,6 +6,7 @@ from judgeval.data.evaluation_run import ExampleEvaluationRun
6
6
 
7
7
 
8
8
  from typing import List, Optional, Union, Sequence
9
+ import ast
9
10
  from judgeval.scorers import ExampleAPIScorerConfig
10
11
  from judgeval.scorers.example_scorer import ExampleScorer
11
12
  from judgeval.data.example import Example
@@ -81,6 +82,7 @@ class JudgmentClient(metaclass=SingletonMeta):
81
82
  scorer_file_path: str,
82
83
  requirements_file_path: Optional[str] = None,
83
84
  unique_name: Optional[str] = None,
85
+ overwrite: bool = False,
84
86
  ) -> bool:
85
87
  """
86
88
  Upload custom ExampleScorer from files to backend.
@@ -89,6 +91,7 @@ class JudgmentClient(metaclass=SingletonMeta):
89
91
  scorer_file_path: Path to Python file containing CustomScorer class
90
92
  requirements_file_path: Optional path to requirements.txt
91
93
  unique_name: Optional unique identifier (auto-detected from scorer.name if not provided)
94
+ overwrite: Whether to overwrite existing scorer if it already exists
92
95
 
93
96
  Returns:
94
97
  bool: True if upload successful
@@ -111,6 +114,31 @@ class JudgmentClient(metaclass=SingletonMeta):
111
114
  with open(scorer_file_path, "r") as f:
112
115
  scorer_code = f.read()
113
116
 
117
+ try:
118
+ tree = ast.parse(scorer_code, filename=scorer_file_path)
119
+ except SyntaxError as e:
120
+ error_msg = f"Invalid Python syntax in {scorer_file_path}: {e}"
121
+ judgeval_logger.error(error_msg)
122
+ raise ValueError(error_msg)
123
+
124
+ scorer_classes = []
125
+ for node in ast.walk(tree):
126
+ if isinstance(node, ast.ClassDef):
127
+ for base in node.bases:
128
+ if (isinstance(base, ast.Name) and base.id == "ExampleScorer") or (
129
+ isinstance(base, ast.Attribute) and base.attr == "ExampleScorer"
130
+ ):
131
+ scorer_classes.append(node.name)
132
+
133
+ if len(scorer_classes) > 1:
134
+ error_msg = f"Multiple ExampleScorer classes found in {scorer_file_path}: {scorer_classes}. Please only upload one scorer class per file."
135
+ judgeval_logger.error(error_msg)
136
+ raise ValueError(error_msg)
137
+ elif len(scorer_classes) == 0:
138
+ error_msg = f"No ExampleScorer class was found in {scorer_file_path}. Please ensure the file contains a valid scorer class that inherits from ExampleScorer."
139
+ judgeval_logger.error(error_msg)
140
+ raise ValueError(error_msg)
141
+
114
142
  # Read requirements (optional)
115
143
  requirements_text = ""
116
144
  if requirements_file_path and os.path.exists(requirements_file_path):
@@ -127,6 +155,7 @@ class JudgmentClient(metaclass=SingletonMeta):
127
155
  "scorer_name": unique_name,
128
156
  "scorer_code": scorer_code,
129
157
  "requirements_text": requirements_text,
158
+ "overwrite": overwrite,
130
159
  }
131
160
  )
132
161
 
@@ -189,6 +189,59 @@ class JudgmentSyncClient:
189
189
  payload,
190
190
  )
191
191
 
192
+ def prompts_insert(self, payload: PromptInsertRequest) -> PromptInsertResponse:
193
+ return self._request(
194
+ "POST",
195
+ url_for("/prompts/insert/"),
196
+ payload,
197
+ )
198
+
199
+ def prompts_tag(self, payload: PromptTagRequest) -> PromptTagResponse:
200
+ return self._request(
201
+ "POST",
202
+ url_for("/prompts/tag/"),
203
+ payload,
204
+ )
205
+
206
+ def prompts_untag(self, payload: PromptUntagRequest) -> PromptUntagResponse:
207
+ return self._request(
208
+ "POST",
209
+ url_for("/prompts/untag/"),
210
+ payload,
211
+ )
212
+
213
+ def prompts_fetch(
214
+ self,
215
+ project_id: str,
216
+ name: str,
217
+ commit_id: Optional[str] = None,
218
+ tag: Optional[str] = None,
219
+ ) -> PromptFetchResponse:
220
+ query_params = {}
221
+ query_params["project_id"] = project_id
222
+ query_params["name"] = name
223
+ if commit_id is not None:
224
+ query_params["commit_id"] = commit_id
225
+ if tag is not None:
226
+ query_params["tag"] = tag
227
+ return self._request(
228
+ "GET",
229
+ url_for("/prompts/fetch/"),
230
+ query_params,
231
+ )
232
+
233
+ def prompts_get_prompt_versions(
234
+ self, project_id: str, name: str
235
+ ) -> PromptVersionsResponse:
236
+ query_params = {}
237
+ query_params["project_id"] = project_id
238
+ query_params["name"] = name
239
+ return self._request(
240
+ "GET",
241
+ url_for("/prompts/get_prompt_versions/"),
242
+ query_params,
243
+ )
244
+
192
245
  def projects_resolve(
193
246
  self, payload: ResolveProjectNameRequest
194
247
  ) -> ResolveProjectNameResponse:
@@ -381,6 +434,61 @@ class JudgmentAsyncClient:
381
434
  payload,
382
435
  )
383
436
 
437
+ async def prompts_insert(
438
+ self, payload: PromptInsertRequest
439
+ ) -> PromptInsertResponse:
440
+ return await self._request(
441
+ "POST",
442
+ url_for("/prompts/insert/"),
443
+ payload,
444
+ )
445
+
446
+ async def prompts_tag(self, payload: PromptTagRequest) -> PromptTagResponse:
447
+ return await self._request(
448
+ "POST",
449
+ url_for("/prompts/tag/"),
450
+ payload,
451
+ )
452
+
453
+ async def prompts_untag(self, payload: PromptUntagRequest) -> PromptUntagResponse:
454
+ return await self._request(
455
+ "POST",
456
+ url_for("/prompts/untag/"),
457
+ payload,
458
+ )
459
+
460
+ async def prompts_fetch(
461
+ self,
462
+ project_id: str,
463
+ name: str,
464
+ commit_id: Optional[str] = None,
465
+ tag: Optional[str] = None,
466
+ ) -> PromptFetchResponse:
467
+ query_params = {}
468
+ query_params["project_id"] = project_id
469
+ query_params["name"] = name
470
+ if commit_id is not None:
471
+ query_params["commit_id"] = commit_id
472
+ if tag is not None:
473
+ query_params["tag"] = tag
474
+ return await self._request(
475
+ "GET",
476
+ url_for("/prompts/fetch/"),
477
+ query_params,
478
+ )
479
+
480
+ async def prompts_get_prompt_versions(
481
+ self, project_id: str, name: str
482
+ ) -> PromptVersionsResponse:
483
+ query_params = {}
484
+ query_params["project_id"] = project_id
485
+ query_params["name"] = name
486
+ return await self._request(
487
+ "GET",
488
+ url_for("/prompts/get_prompt_versions/"),
489
+ query_params,
490
+ )
491
+
384
492
  async def projects_resolve(
385
493
  self, payload: ResolveProjectNameRequest
386
494
  ) -> ResolveProjectNameResponse:
@@ -1,6 +1,6 @@
1
1
  # generated by datamodel-codegen:
2
2
  # filename: .openapi.json
3
- # timestamp: 2025-10-15T19:25:00+00:00
3
+ # timestamp: 2025-10-21T01:37:42+00:00
4
4
 
5
5
  from __future__ import annotations
6
6
  from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
@@ -80,6 +80,7 @@ class CustomScorerUploadPayload(TypedDict):
80
80
  scorer_name: str
81
81
  scorer_code: str
82
82
  requirements_text: str
83
+ overwrite: NotRequired[bool]
83
84
 
84
85
 
85
86
  class CustomScorerTemplateResponse(TypedDict):
@@ -88,6 +89,40 @@ class CustomScorerTemplateResponse(TypedDict):
88
89
  message: str
89
90
 
90
91
 
92
+ class PromptInsertRequest(TypedDict):
93
+ project_id: str
94
+ name: str
95
+ prompt: str
96
+ tags: List[str]
97
+
98
+
99
+ class PromptInsertResponse(TypedDict):
100
+ commit_id: str
101
+ parent_commit_id: NotRequired[Optional[str]]
102
+ created_at: str
103
+
104
+
105
+ class PromptTagRequest(TypedDict):
106
+ project_id: str
107
+ name: str
108
+ commit_id: str
109
+ tags: List[str]
110
+
111
+
112
+ class PromptTagResponse(TypedDict):
113
+ commit_id: str
114
+
115
+
116
+ class PromptUntagRequest(TypedDict):
117
+ project_id: str
118
+ name: str
119
+ tags: List[str]
120
+
121
+
122
+ class PromptUntagResponse(TypedDict):
123
+ commit_ids: List[str]
124
+
125
+
91
126
  class ResolveProjectNameRequest(TypedDict):
92
127
  project_name: str
93
128
 
@@ -169,6 +204,18 @@ class PromptScorer(TypedDict):
169
204
  is_trace: NotRequired[Optional[bool]]
170
205
 
171
206
 
207
+ class PromptCommitInfo(TypedDict):
208
+ name: str
209
+ prompt: str
210
+ tags: List[str]
211
+ commit_id: str
212
+ parent_commit_id: NotRequired[Optional[str]]
213
+ created_at: str
214
+ first_name: str
215
+ last_name: str
216
+ user_email: str
217
+
218
+
172
219
  class ScorerData(TypedDict):
173
220
  id: NotRequired[str]
174
221
  name: str
@@ -265,6 +312,14 @@ class FetchPromptScorersResponse(TypedDict):
265
312
  scorers: List[PromptScorer]
266
313
 
267
314
 
315
+ class PromptFetchResponse(TypedDict):
316
+ commit: NotRequired[Optional[PromptCommitInfo]]
317
+
318
+
319
+ class PromptVersionsResponse(TypedDict):
320
+ versions: List[PromptCommitInfo]
321
+
322
+
268
323
  class ScoringResult(TypedDict):
269
324
  success: bool
270
325
  scorers_data: List[ScorerData]
@@ -26,6 +26,12 @@ def upload_scorer(
26
26
  unique_name: str = typer.Option(
27
27
  None, help="Custom name for the scorer (auto-detected if not provided)"
28
28
  ),
29
+ overwrite: bool = typer.Option(
30
+ False,
31
+ "--overwrite",
32
+ "-o",
33
+ help="Overwrite existing scorer if it already exists",
34
+ ),
29
35
  ):
30
36
  # Validate file paths
31
37
  if not Path(scorer_file_path).exists():
@@ -43,6 +49,7 @@ def upload_scorer(
43
49
  scorer_file_path=scorer_file_path,
44
50
  requirements_file_path=requirements_file_path,
45
51
  unique_name=unique_name,
52
+ overwrite=overwrite,
46
53
  )
47
54
 
48
55
  if not result:
@@ -1,6 +1,6 @@
1
1
  # generated by datamodel-codegen:
2
2
  # filename: .openapi.json
3
- # timestamp: 2025-10-15T19:24:59+00:00
3
+ # timestamp: 2025-10-21T01:37:41+00:00
4
4
 
5
5
  from __future__ import annotations
6
6
  from typing import Annotated, Any, Dict, List, Optional, Union
@@ -87,6 +87,7 @@ class CustomScorerUploadPayload(BaseModel):
87
87
  scorer_name: Annotated[str, Field(title="Scorer Name")]
88
88
  scorer_code: Annotated[str, Field(title="Scorer Code")]
89
89
  requirements_text: Annotated[str, Field(title="Requirements Text")]
90
+ overwrite: Annotated[Optional[bool], Field(title="Overwrite")] = False
90
91
 
91
92
 
92
93
  class CustomScorerTemplateResponse(BaseModel):
@@ -95,6 +96,40 @@ class CustomScorerTemplateResponse(BaseModel):
95
96
  message: Annotated[str, Field(title="Message")]
96
97
 
97
98
 
99
+ class PromptInsertRequest(BaseModel):
100
+ project_id: Annotated[str, Field(title="Project Id")]
101
+ name: Annotated[str, Field(title="Name")]
102
+ prompt: Annotated[str, Field(title="Prompt")]
103
+ tags: Annotated[List[str], Field(title="Tags")]
104
+
105
+
106
+ class PromptInsertResponse(BaseModel):
107
+ commit_id: Annotated[str, Field(title="Commit Id")]
108
+ parent_commit_id: Annotated[Optional[str], Field(title="Parent Commit Id")] = None
109
+ created_at: Annotated[str, Field(title="Created At")]
110
+
111
+
112
+ class PromptTagRequest(BaseModel):
113
+ project_id: Annotated[str, Field(title="Project Id")]
114
+ name: Annotated[str, Field(title="Name")]
115
+ commit_id: Annotated[str, Field(title="Commit Id")]
116
+ tags: Annotated[List[str], Field(title="Tags")]
117
+
118
+
119
+ class PromptTagResponse(BaseModel):
120
+ commit_id: Annotated[str, Field(title="Commit Id")]
121
+
122
+
123
+ class PromptUntagRequest(BaseModel):
124
+ project_id: Annotated[str, Field(title="Project Id")]
125
+ name: Annotated[str, Field(title="Name")]
126
+ tags: Annotated[List[str], Field(title="Tags")]
127
+
128
+
129
+ class PromptUntagResponse(BaseModel):
130
+ commit_ids: Annotated[List[str], Field(title="Commit Ids")]
131
+
132
+
98
133
  class ResolveProjectNameRequest(BaseModel):
99
134
  project_name: Annotated[str, Field(title="Project Name")]
100
135
 
@@ -187,6 +222,18 @@ class PromptScorer(BaseModel):
187
222
  is_trace: Annotated[Optional[bool], Field(title="Is Trace")] = False
188
223
 
189
224
 
225
+ class PromptCommitInfo(BaseModel):
226
+ name: Annotated[str, Field(title="Name")]
227
+ prompt: Annotated[str, Field(title="Prompt")]
228
+ tags: Annotated[List[str], Field(title="Tags")]
229
+ commit_id: Annotated[str, Field(title="Commit Id")]
230
+ parent_commit_id: Annotated[Optional[str], Field(title="Parent Commit Id")] = None
231
+ created_at: Annotated[str, Field(title="Created At")]
232
+ first_name: Annotated[str, Field(title="First Name")]
233
+ last_name: Annotated[str, Field(title="Last Name")]
234
+ user_email: Annotated[str, Field(title="User Email")]
235
+
236
+
190
237
  class ScorerData(BaseModel):
191
238
  id: Annotated[Optional[str], Field(title="Id")] = None
192
239
  name: Annotated[str, Field(title="Name")]
@@ -299,6 +346,14 @@ class FetchPromptScorersResponse(BaseModel):
299
346
  scorers: Annotated[List[PromptScorer], Field(title="Scorers")]
300
347
 
301
348
 
349
+ class PromptFetchResponse(BaseModel):
350
+ commit: Optional[PromptCommitInfo] = None
351
+
352
+
353
+ class PromptVersionsResponse(BaseModel):
354
+ versions: Annotated[List[PromptCommitInfo], Field(title="Versions")]
355
+
356
+
302
357
  class ScoringResult(BaseModel):
303
358
  success: Annotated[bool, Field(title="Success")]
304
359
  scorers_data: Annotated[List[ScorerData], Field(title="Scorers Data")]