judgeval 0.3.0__tar.gz → 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. {judgeval-0.3.0 → judgeval-0.3.2}/PKG-INFO +1 -1
  2. {judgeval-0.3.0 → judgeval-0.3.2}/pyproject.toml +1 -1
  3. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/api/constants.py +1 -1
  4. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/tracer/trace_manager.py +5 -1
  5. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/constants.py +2 -0
  6. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/data/judgment_types.py +1 -2
  7. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/judges/together_judge.py +2 -1
  8. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/run_evaluation.py +1 -1
  9. {judgeval-0.3.0 → judgeval-0.3.2}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  10. {judgeval-0.3.0 → judgeval-0.3.2}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  11. {judgeval-0.3.0 → judgeval-0.3.2}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  12. {judgeval-0.3.0 → judgeval-0.3.2}/.github/pull_request_template.md +0 -0
  13. {judgeval-0.3.0 → judgeval-0.3.2}/.github/workflows/blocked-pr.yaml +0 -0
  14. {judgeval-0.3.0 → judgeval-0.3.2}/.github/workflows/ci.yaml +0 -0
  15. {judgeval-0.3.0 → judgeval-0.3.2}/.github/workflows/lint.yaml +0 -0
  16. {judgeval-0.3.0 → judgeval-0.3.2}/.github/workflows/merge-branch-check.yaml +0 -0
  17. {judgeval-0.3.0 → judgeval-0.3.2}/.github/workflows/mypy.yaml +0 -0
  18. {judgeval-0.3.0 → judgeval-0.3.2}/.github/workflows/pre-commit-autoupdate.yaml +0 -0
  19. {judgeval-0.3.0 → judgeval-0.3.2}/.github/workflows/release.yaml +0 -0
  20. {judgeval-0.3.0 → judgeval-0.3.2}/.github/workflows/validate-branch.yaml +0 -0
  21. {judgeval-0.3.0 → judgeval-0.3.2}/.gitignore +0 -0
  22. {judgeval-0.3.0 → judgeval-0.3.2}/.pre-commit-config.yaml +0 -0
  23. {judgeval-0.3.0 → judgeval-0.3.2}/LICENSE.md +0 -0
  24. {judgeval-0.3.0 → judgeval-0.3.2}/README.md +0 -0
  25. {judgeval-0.3.0 → judgeval-0.3.2}/assets/Screenshot 2025-05-17 at 8.14.27/342/200/257PM.png" +0 -0
  26. {judgeval-0.3.0 → judgeval-0.3.2}/assets/agent.gif +0 -0
  27. {judgeval-0.3.0 → judgeval-0.3.2}/assets/agent_trace_example.png +0 -0
  28. {judgeval-0.3.0 → judgeval-0.3.2}/assets/data.gif +0 -0
  29. {judgeval-0.3.0 → judgeval-0.3.2}/assets/dataset_clustering_screenshot.png +0 -0
  30. {judgeval-0.3.0 → judgeval-0.3.2}/assets/dataset_clustering_screenshot_dm.png +0 -0
  31. {judgeval-0.3.0 → judgeval-0.3.2}/assets/datasets_preview_screenshot.png +0 -0
  32. {judgeval-0.3.0 → judgeval-0.3.2}/assets/document.gif +0 -0
  33. {judgeval-0.3.0 → judgeval-0.3.2}/assets/error_analysis_dashboard.png +0 -0
  34. {judgeval-0.3.0 → judgeval-0.3.2}/assets/errors.png +0 -0
  35. {judgeval-0.3.0 → judgeval-0.3.2}/assets/experiments_dashboard_screenshot.png +0 -0
  36. {judgeval-0.3.0 → judgeval-0.3.2}/assets/experiments_page.png +0 -0
  37. {judgeval-0.3.0 → judgeval-0.3.2}/assets/experiments_pagev2.png +0 -0
  38. {judgeval-0.3.0 → judgeval-0.3.2}/assets/logo-dark.svg +0 -0
  39. {judgeval-0.3.0 → judgeval-0.3.2}/assets/logo-light.svg +0 -0
  40. {judgeval-0.3.0 → judgeval-0.3.2}/assets/monitoring_screenshot.png +0 -0
  41. {judgeval-0.3.0 → judgeval-0.3.2}/assets/new_darkmode.svg +0 -0
  42. {judgeval-0.3.0 → judgeval-0.3.2}/assets/new_lightmode.svg +0 -0
  43. {judgeval-0.3.0 → judgeval-0.3.2}/assets/online_eval.png +0 -0
  44. {judgeval-0.3.0 → judgeval-0.3.2}/assets/product_shot.png +0 -0
  45. {judgeval-0.3.0 → judgeval-0.3.2}/assets/test.png +0 -0
  46. {judgeval-0.3.0 → judgeval-0.3.2}/assets/tests.png +0 -0
  47. {judgeval-0.3.0 → judgeval-0.3.2}/assets/trace.gif +0 -0
  48. {judgeval-0.3.0 → judgeval-0.3.2}/assets/trace_demo.png +0 -0
  49. {judgeval-0.3.0 → judgeval-0.3.2}/assets/trace_screenshot.png +0 -0
  50. {judgeval-0.3.0 → judgeval-0.3.2}/assets/trace_screenshot_old.png +0 -0
  51. {judgeval-0.3.0 → judgeval-0.3.2}/pytest.ini +0 -0
  52. {judgeval-0.3.0 → judgeval-0.3.2}/src/.coveragerc +0 -0
  53. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/__init__.py +0 -0
  54. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/clients.py +0 -0
  55. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/__init__.py +0 -0
  56. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/api/__init__.py +0 -0
  57. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/api/api.py +0 -0
  58. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/exceptions.py +0 -0
  59. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/logger.py +0 -0
  60. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/storage/__init__.py +0 -0
  61. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/storage/s3_storage.py +0 -0
  62. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/tracer/__init__.py +0 -0
  63. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/tracer/constants.py +0 -0
  64. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/tracer/core.py +0 -0
  65. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/tracer/otel_exporter.py +0 -0
  66. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/tracer/otel_span_processor.py +0 -0
  67. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/tracer/span_processor.py +0 -0
  68. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/tracer/span_transformer.py +0 -0
  69. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/utils.py +0 -0
  70. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/data/__init__.py +0 -0
  71. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/data/example.py +0 -0
  72. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/data/result.py +0 -0
  73. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/data/scorer_data.py +0 -0
  74. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/data/scripts/fix_default_factory.py +0 -0
  75. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/data/scripts/openapi_transform.py +0 -0
  76. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/data/tool.py +0 -0
  77. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/data/trace.py +0 -0
  78. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/data/trace_run.py +0 -0
  79. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/dataset.py +0 -0
  80. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/evaluation_run.py +0 -0
  81. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/integrations/langgraph.py +0 -0
  82. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/judges/__init__.py +0 -0
  83. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/judges/base_judge.py +0 -0
  84. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/judges/litellm_judge.py +0 -0
  85. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/judges/mixture_of_judges.py +0 -0
  86. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/judges/utils.py +0 -0
  87. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/judgment_client.py +0 -0
  88. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/rules.py +0 -0
  89. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/__init__.py +0 -0
  90. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/agent_scorer.py +0 -0
  91. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/api_scorer.py +0 -0
  92. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/base_scorer.py +0 -0
  93. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/example_scorer.py +0 -0
  94. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/exceptions.py +0 -0
  95. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/judgeval_scorers/__init__.py +0 -0
  96. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -0
  97. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +0 -0
  98. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +0 -0
  99. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -0
  100. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -0
  101. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +0 -0
  102. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -0
  103. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +0 -0
  104. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +0 -0
  105. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +0 -0
  106. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -0
  107. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/score.py +0 -0
  108. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/utils.py +0 -0
  109. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/tracer/__init__.py +0 -0
  110. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/utils/alerts.py +0 -0
  111. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/utils/file_utils.py +0 -0
  112. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/utils/requests.py +0 -0
  113. {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/version_check.py +0 -0
  114. {judgeval-0.3.0 → judgeval-0.3.2}/src/update_types.sh +0 -0
  115. {judgeval-0.3.0 → judgeval-0.3.2}/update_version.py +0 -0
  116. {judgeval-0.3.0 → judgeval-0.3.2}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: judgeval
3
- Version: 0.3.0
3
+ Version: 0.3.2
4
4
  Summary: Judgeval Package
5
5
  Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
6
6
  Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "judgeval"
3
- version = "0.3.0"
3
+ version = "0.3.2"
4
4
  authors = [
5
5
  { name="Andrew Li", email="andrew@judgmentlabs.ai" },
6
6
  { name="Alex Shan", email="alex@judgmentlabs.ai" },
@@ -142,7 +142,7 @@ class DatasetStatsPayload(TypedDict):
142
142
 
143
143
 
144
144
  # Projects API
145
- JUDGMENT_PROJECT_DELETE_API_URL = f"{ROOT_API}/projects/delete_from_judgeval"
145
+ JUDGMENT_PROJECT_DELETE_API_URL = f"{ROOT_API}/projects/delete_from_judgeval/"
146
146
  JUDGMENT_PROJECT_CREATE_API_URL = f"{ROOT_API}/projects/add/"
147
147
 
148
148
 
@@ -54,7 +54,6 @@ class TraceManagerClient:
54
54
  Returns:
55
55
  dict: Server response containing UI URL and other metadata
56
56
  """
57
- server_response = self.api_client.upsert_trace(trace_data)
58
57
 
59
58
  if self.tracer and self.tracer.use_s3 and final_save:
60
59
  try:
@@ -67,6 +66,11 @@ class TraceManagerClient:
67
66
  except Exception as e:
68
67
  judgeval_logger.warning(f"Failed to save trace to S3: {str(e)}")
69
68
 
69
+ trace_data.pop("trace_spans", None)
70
+ trace_data.pop("evaluation_runs", None)
71
+
72
+ server_response = self.api_client.upsert_trace(trace_data)
73
+
70
74
  if not offline_mode and show_link and "ui_results_url" in server_response:
71
75
  pretty_str = f"\n🔍 You can view your trace data here: [rgb(106,0,255)][link={server_response['ui_results_url']}]View Trace[/link]\n"
72
76
  rprint(pretty_str)
@@ -104,6 +104,8 @@ TOGETHER_SUPPORTED_MODELS = [
104
104
  "mistralai/Mistral-7B-Instruct-v0.1",
105
105
  ]
106
106
 
107
+ DEFAULT_TOGETHER_MODEL = "meta-llama/Meta-Llama-3-8B-Instruct-Lite"
108
+
107
109
  JUDGMENT_SUPPORTED_MODELS = {"osiris-large", "osiris-mini", "osiris"}
108
110
 
109
111
  ACCEPTABLE_MODELS = (
@@ -1,6 +1,6 @@
1
1
  # generated by datamodel-codegen:
2
2
  # filename: openapi_new.json
3
- # timestamp: 2025-07-26T00:14:40+00:00
3
+ # timestamp: 2025-07-29T18:13:07+00:00
4
4
 
5
5
  from __future__ import annotations
6
6
 
@@ -135,7 +135,6 @@ class TraceJudgmentType(BaseModel):
135
135
  created_at: Annotated[str, Field(title="Created At")]
136
136
  duration: Annotated[float, Field(title="Duration")]
137
137
  trace_spans: Annotated[List[TraceSpanJudgmentType], Field(title="Trace Spans")]
138
- overwrite: Annotated[Optional[bool], Field(title="Overwrite")] = False
139
138
  offline_mode: Annotated[Optional[bool], Field(title="Offline Mode")] = False
140
139
  rules: Annotated[Optional[Dict[str, Any]], Field(title="Rules")] = Field(
141
140
  default_factory=dict
@@ -11,6 +11,7 @@ from judgeval.common.utils import (
11
11
  afetch_together_api_response,
12
12
  )
13
13
  from judgeval.common.logger import judgeval_logger
14
+ from judgeval.constants import DEFAULT_TOGETHER_MODEL
14
15
 
15
16
  BASE_CONVERSATION = [
16
17
  {"role": "system", "content": "You are a helpful assistant."},
@@ -18,7 +19,7 @@ BASE_CONVERSATION = [
18
19
 
19
20
 
20
21
  class TogetherJudge(JudgevalJudge):
21
- def __init__(self, model: str = "Qwen/Qwen2.5-72B-Instruct-Turbo", **kwargs):
22
+ def __init__(self, model: str = DEFAULT_TOGETHER_MODEL, **kwargs):
22
23
  self.model = model
23
24
  self.kwargs = kwargs
24
25
  super().__init__(model_name=model)
@@ -414,7 +414,7 @@ def _poll_evaluation_until_complete(
414
414
  expected_scorer_data_count: int,
415
415
  poll_interval_seconds: float = 5,
416
416
  max_failures: int = 5,
417
- max_poll_count: int = 24, # This should be equivalent to 120 seconds
417
+ max_poll_count: int = 60, # This should be equivalent to 5 minutes
418
418
  ) -> Tuple[List[ScoringResult], str]:
419
419
  """
420
420
  Polls until the evaluation is complete and returns the results.
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes