judgeval 0.3.0__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. {judgeval-0.3.0 → judgeval-0.3.1}/PKG-INFO +1 -1
  2. {judgeval-0.3.0 → judgeval-0.3.1}/pyproject.toml +1 -1
  3. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/common/api/constants.py +1 -1
  4. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/common/tracer/core.py +0 -2
  5. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/constants.py +2 -0
  6. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/data/judgment_types.py +1 -2
  7. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/judges/together_judge.py +2 -1
  8. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/run_evaluation.py +1 -1
  9. {judgeval-0.3.0 → judgeval-0.3.1}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  10. {judgeval-0.3.0 → judgeval-0.3.1}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  11. {judgeval-0.3.0 → judgeval-0.3.1}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  12. {judgeval-0.3.0 → judgeval-0.3.1}/.github/pull_request_template.md +0 -0
  13. {judgeval-0.3.0 → judgeval-0.3.1}/.github/workflows/blocked-pr.yaml +0 -0
  14. {judgeval-0.3.0 → judgeval-0.3.1}/.github/workflows/ci.yaml +0 -0
  15. {judgeval-0.3.0 → judgeval-0.3.1}/.github/workflows/lint.yaml +0 -0
  16. {judgeval-0.3.0 → judgeval-0.3.1}/.github/workflows/merge-branch-check.yaml +0 -0
  17. {judgeval-0.3.0 → judgeval-0.3.1}/.github/workflows/mypy.yaml +0 -0
  18. {judgeval-0.3.0 → judgeval-0.3.1}/.github/workflows/pre-commit-autoupdate.yaml +0 -0
  19. {judgeval-0.3.0 → judgeval-0.3.1}/.github/workflows/release.yaml +0 -0
  20. {judgeval-0.3.0 → judgeval-0.3.1}/.github/workflows/validate-branch.yaml +0 -0
  21. {judgeval-0.3.0 → judgeval-0.3.1}/.gitignore +0 -0
  22. {judgeval-0.3.0 → judgeval-0.3.1}/.pre-commit-config.yaml +0 -0
  23. {judgeval-0.3.0 → judgeval-0.3.1}/LICENSE.md +0 -0
  24. {judgeval-0.3.0 → judgeval-0.3.1}/README.md +0 -0
  25. {judgeval-0.3.0 → judgeval-0.3.1}/assets/Screenshot 2025-05-17 at 8.14.27/342/200/257PM.png" +0 -0
  26. {judgeval-0.3.0 → judgeval-0.3.1}/assets/agent.gif +0 -0
  27. {judgeval-0.3.0 → judgeval-0.3.1}/assets/agent_trace_example.png +0 -0
  28. {judgeval-0.3.0 → judgeval-0.3.1}/assets/data.gif +0 -0
  29. {judgeval-0.3.0 → judgeval-0.3.1}/assets/dataset_clustering_screenshot.png +0 -0
  30. {judgeval-0.3.0 → judgeval-0.3.1}/assets/dataset_clustering_screenshot_dm.png +0 -0
  31. {judgeval-0.3.0 → judgeval-0.3.1}/assets/datasets_preview_screenshot.png +0 -0
  32. {judgeval-0.3.0 → judgeval-0.3.1}/assets/document.gif +0 -0
  33. {judgeval-0.3.0 → judgeval-0.3.1}/assets/error_analysis_dashboard.png +0 -0
  34. {judgeval-0.3.0 → judgeval-0.3.1}/assets/errors.png +0 -0
  35. {judgeval-0.3.0 → judgeval-0.3.1}/assets/experiments_dashboard_screenshot.png +0 -0
  36. {judgeval-0.3.0 → judgeval-0.3.1}/assets/experiments_page.png +0 -0
  37. {judgeval-0.3.0 → judgeval-0.3.1}/assets/experiments_pagev2.png +0 -0
  38. {judgeval-0.3.0 → judgeval-0.3.1}/assets/logo-dark.svg +0 -0
  39. {judgeval-0.3.0 → judgeval-0.3.1}/assets/logo-light.svg +0 -0
  40. {judgeval-0.3.0 → judgeval-0.3.1}/assets/monitoring_screenshot.png +0 -0
  41. {judgeval-0.3.0 → judgeval-0.3.1}/assets/new_darkmode.svg +0 -0
  42. {judgeval-0.3.0 → judgeval-0.3.1}/assets/new_lightmode.svg +0 -0
  43. {judgeval-0.3.0 → judgeval-0.3.1}/assets/online_eval.png +0 -0
  44. {judgeval-0.3.0 → judgeval-0.3.1}/assets/product_shot.png +0 -0
  45. {judgeval-0.3.0 → judgeval-0.3.1}/assets/test.png +0 -0
  46. {judgeval-0.3.0 → judgeval-0.3.1}/assets/tests.png +0 -0
  47. {judgeval-0.3.0 → judgeval-0.3.1}/assets/trace.gif +0 -0
  48. {judgeval-0.3.0 → judgeval-0.3.1}/assets/trace_demo.png +0 -0
  49. {judgeval-0.3.0 → judgeval-0.3.1}/assets/trace_screenshot.png +0 -0
  50. {judgeval-0.3.0 → judgeval-0.3.1}/assets/trace_screenshot_old.png +0 -0
  51. {judgeval-0.3.0 → judgeval-0.3.1}/pytest.ini +0 -0
  52. {judgeval-0.3.0 → judgeval-0.3.1}/src/.coveragerc +0 -0
  53. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/__init__.py +0 -0
  54. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/clients.py +0 -0
  55. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/common/__init__.py +0 -0
  56. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/common/api/__init__.py +0 -0
  57. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/common/api/api.py +0 -0
  58. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/common/exceptions.py +0 -0
  59. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/common/logger.py +0 -0
  60. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/common/storage/__init__.py +0 -0
  61. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/common/storage/s3_storage.py +0 -0
  62. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/common/tracer/__init__.py +0 -0
  63. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/common/tracer/constants.py +0 -0
  64. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/common/tracer/otel_exporter.py +0 -0
  65. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/common/tracer/otel_span_processor.py +0 -0
  66. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/common/tracer/span_processor.py +0 -0
  67. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/common/tracer/span_transformer.py +0 -0
  68. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/common/tracer/trace_manager.py +0 -0
  69. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/common/utils.py +0 -0
  70. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/data/__init__.py +0 -0
  71. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/data/example.py +0 -0
  72. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/data/result.py +0 -0
  73. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/data/scorer_data.py +0 -0
  74. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/data/scripts/fix_default_factory.py +0 -0
  75. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/data/scripts/openapi_transform.py +0 -0
  76. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/data/tool.py +0 -0
  77. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/data/trace.py +0 -0
  78. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/data/trace_run.py +0 -0
  79. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/dataset.py +0 -0
  80. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/evaluation_run.py +0 -0
  81. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/integrations/langgraph.py +0 -0
  82. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/judges/__init__.py +0 -0
  83. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/judges/base_judge.py +0 -0
  84. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/judges/litellm_judge.py +0 -0
  85. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/judges/mixture_of_judges.py +0 -0
  86. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/judges/utils.py +0 -0
  87. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/judgment_client.py +0 -0
  88. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/rules.py +0 -0
  89. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/scorers/__init__.py +0 -0
  90. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/scorers/agent_scorer.py +0 -0
  91. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/scorers/api_scorer.py +0 -0
  92. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/scorers/base_scorer.py +0 -0
  93. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/scorers/example_scorer.py +0 -0
  94. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/scorers/exceptions.py +0 -0
  95. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/scorers/judgeval_scorers/__init__.py +0 -0
  96. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -0
  97. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +0 -0
  98. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +0 -0
  99. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -0
  100. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -0
  101. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +0 -0
  102. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -0
  103. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +0 -0
  104. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +0 -0
  105. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +0 -0
  106. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -0
  107. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/scorers/score.py +0 -0
  108. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/scorers/utils.py +0 -0
  109. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/tracer/__init__.py +0 -0
  110. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/utils/alerts.py +0 -0
  111. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/utils/file_utils.py +0 -0
  112. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/utils/requests.py +0 -0
  113. {judgeval-0.3.0 → judgeval-0.3.1}/src/judgeval/version_check.py +0 -0
  114. {judgeval-0.3.0 → judgeval-0.3.1}/src/update_types.sh +0 -0
  115. {judgeval-0.3.0 → judgeval-0.3.1}/update_version.py +0 -0
  116. {judgeval-0.3.0 → judgeval-0.3.1}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: judgeval
3
- Version: 0.3.0
3
+ Version: 0.3.1
4
4
  Summary: Judgeval Package
5
5
  Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
6
6
  Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "judgeval"
3
- version = "0.3.0"
3
+ version = "0.3.1"
4
4
  authors = [
5
5
  { name="Andrew Li", email="andrew@judgmentlabs.ai" },
6
6
  { name="Alex Shan", email="alex@judgmentlabs.ai" },
@@ -142,7 +142,7 @@ class DatasetStatsPayload(TypedDict):
142
142
 
143
143
 
144
144
  # Projects API
145
- JUDGMENT_PROJECT_DELETE_API_URL = f"{ROOT_API}/projects/delete_from_judgeval"
145
+ JUDGMENT_PROJECT_DELETE_API_URL = f"{ROOT_API}/projects/delete_from_judgeval/"
146
146
  JUDGMENT_PROJECT_CREATE_API_URL = f"{ROOT_API}/projects/add/"
147
147
 
148
148
 
@@ -418,8 +418,6 @@ class TraceClient:
418
418
  self.start_time or time.time(), timezone.utc
419
419
  ).isoformat(),
420
420
  "duration": total_duration,
421
- "trace_spans": [span.model_dump() for span in self.trace_spans],
422
- "evaluation_runs": [run.model_dump() for run in self.evaluation_runs],
423
421
  "offline_mode": self.tracer.offline_mode,
424
422
  "parent_trace_id": self.parent_trace_id,
425
423
  "parent_name": self.parent_name,
@@ -104,6 +104,8 @@ TOGETHER_SUPPORTED_MODELS = [
104
104
  "mistralai/Mistral-7B-Instruct-v0.1",
105
105
  ]
106
106
 
107
+ DEFAULT_TOGETHER_MODEL = "meta-llama/Meta-Llama-3-8B-Instruct-Lite"
108
+
107
109
  JUDGMENT_SUPPORTED_MODELS = {"osiris-large", "osiris-mini", "osiris"}
108
110
 
109
111
  ACCEPTABLE_MODELS = (
@@ -1,6 +1,6 @@
1
1
  # generated by datamodel-codegen:
2
2
  # filename: openapi_new.json
3
- # timestamp: 2025-07-26T00:14:40+00:00
3
+ # timestamp: 2025-07-29T18:13:07+00:00
4
4
 
5
5
  from __future__ import annotations
6
6
 
@@ -135,7 +135,6 @@ class TraceJudgmentType(BaseModel):
135
135
  created_at: Annotated[str, Field(title="Created At")]
136
136
  duration: Annotated[float, Field(title="Duration")]
137
137
  trace_spans: Annotated[List[TraceSpanJudgmentType], Field(title="Trace Spans")]
138
- overwrite: Annotated[Optional[bool], Field(title="Overwrite")] = False
139
138
  offline_mode: Annotated[Optional[bool], Field(title="Offline Mode")] = False
140
139
  rules: Annotated[Optional[Dict[str, Any]], Field(title="Rules")] = Field(
141
140
  default_factory=dict
@@ -11,6 +11,7 @@ from judgeval.common.utils import (
11
11
  afetch_together_api_response,
12
12
  )
13
13
  from judgeval.common.logger import judgeval_logger
14
+ from judgeval.constants import DEFAULT_TOGETHER_MODEL
14
15
 
15
16
  BASE_CONVERSATION = [
16
17
  {"role": "system", "content": "You are a helpful assistant."},
@@ -18,7 +19,7 @@ BASE_CONVERSATION = [
18
19
 
19
20
 
20
21
  class TogetherJudge(JudgevalJudge):
21
- def __init__(self, model: str = "Qwen/Qwen2.5-72B-Instruct-Turbo", **kwargs):
22
+ def __init__(self, model: str = DEFAULT_TOGETHER_MODEL, **kwargs):
22
23
  self.model = model
23
24
  self.kwargs = kwargs
24
25
  super().__init__(model_name=model)
@@ -414,7 +414,7 @@ def _poll_evaluation_until_complete(
414
414
  expected_scorer_data_count: int,
415
415
  poll_interval_seconds: float = 5,
416
416
  max_failures: int = 5,
417
- max_poll_count: int = 24, # This should be equivalent to 120 seconds
417
+ max_poll_count: int = 60, # This should be equivalent to 5 minutes
418
418
  ) -> Tuple[List[ScoringResult], str]:
419
419
  """
420
420
  Polls until the evaluation is complete and returns the results.
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes