judgeval 0.0.51__tar.gz → 0.0.53__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- judgeval-0.0.53/.github/ISSUE_TEMPLATE/bug_report.md +41 -0
- judgeval-0.0.53/.github/ISSUE_TEMPLATE/feature_request.md +43 -0
- judgeval-0.0.53/.github/pull_request_template.md +23 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/.gitignore +6 -1
- {judgeval-0.0.51 → judgeval-0.0.53}/PKG-INFO +3 -2
- {judgeval-0.0.51 → judgeval-0.0.53}/README.md +1 -1
- judgeval-0.0.53/assets/agent.gif +0 -0
- judgeval-0.0.53/assets/data.gif +0 -0
- judgeval-0.0.53/assets/document.gif +0 -0
- judgeval-0.0.53/assets/trace.gif +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/pyproject.toml +2 -1
- judgeval-0.0.53/src/judgeval/common/logger.py +60 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/common/s3_storage.py +2 -6
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/common/tracer.py +182 -262
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/common/utils.py +16 -36
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/constants.py +14 -20
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/data/__init__.py +0 -2
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/data/datasets/dataset.py +6 -10
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/data/datasets/eval_dataset_client.py +25 -27
- judgeval-0.0.53/src/judgeval/data/example.py +61 -0
- judgeval-0.0.53/src/judgeval/data/judgment_types.py +214 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/data/result.py +7 -25
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/data/scorer_data.py +28 -40
- judgeval-0.0.53/src/judgeval/data/scripts/fix_default_factory.py +23 -0
- judgeval-0.0.53/src/judgeval/data/scripts/openapi_transform.py +123 -0
- judgeval-0.0.53/src/judgeval/data/tool.py +5 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/data/trace.py +31 -50
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/data/trace_run.py +3 -3
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/evaluation_run.py +16 -23
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/integrations/langgraph.py +11 -12
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/judges/litellm_judge.py +3 -6
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/judges/mixture_of_judges.py +8 -25
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/judges/together_judge.py +3 -6
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/judgment_client.py +22 -24
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/rules.py +7 -19
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/run_evaluation.py +79 -242
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/scorers/__init__.py +4 -20
- judgeval-0.0.53/src/judgeval/scorers/agent_scorer.py +21 -0
- judgeval-0.0.53/src/judgeval/scorers/api_scorer.py +70 -0
- judgeval-0.0.53/src/judgeval/scorers/base_scorer.py +98 -0
- judgeval-0.0.53/src/judgeval/scorers/example_scorer.py +19 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -20
- judgeval-0.0.53/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +21 -0
- judgeval-0.0.53/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +12 -0
- judgeval-0.0.53/src/judgeval/scorers/judgeval_scorers/api_scorers/classifier_scorer.py +73 -0
- judgeval-0.0.53/src/judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +14 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +4 -4
- judgeval-0.0.53/src/judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +21 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +4 -4
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +4 -4
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +4 -4
- judgeval-0.0.53/src/judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +27 -0
- judgeval-0.0.53/src/judgeval/scorers/score.py +180 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/scorers/utils.py +6 -88
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/utils/file_utils.py +4 -6
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/version_check.py +3 -2
- judgeval-0.0.53/src/update_types.sh +14 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/uv.lock +901 -1221
- judgeval-0.0.51/.github/pull_request_template.md +0 -13
- judgeval-0.0.51/assets/agent.gif +0 -0
- judgeval-0.0.51/assets/data.gif +0 -0
- judgeval-0.0.51/assets/document.gif +0 -0
- judgeval-0.0.51/assets/trace.gif +0 -0
- judgeval-0.0.51/src/judgeval/common/logger.py +0 -213
- judgeval-0.0.51/src/judgeval/data/custom_example.py +0 -19
- judgeval-0.0.51/src/judgeval/data/example.py +0 -194
- judgeval-0.0.51/src/judgeval/data/tool.py +0 -56
- judgeval-0.0.51/src/judgeval/scorers/api_scorer.py +0 -80
- judgeval-0.0.51/src/judgeval/scorers/judgeval_scorer.py +0 -177
- judgeval-0.0.51/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +0 -28
- judgeval-0.0.51/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +0 -27
- judgeval-0.0.51/src/judgeval/scorers/judgeval_scorers/api_scorers/classifier_scorer.py +0 -125
- judgeval-0.0.51/src/judgeval/scorers/judgeval_scorers/api_scorers/comparison.py +0 -45
- judgeval-0.0.51/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py +0 -29
- judgeval-0.0.51/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py +0 -29
- judgeval-0.0.51/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py +0 -32
- judgeval-0.0.51/src/judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -22
- judgeval-0.0.51/src/judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +0 -28
- judgeval-0.0.51/src/judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py +0 -28
- judgeval-0.0.51/src/judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py +0 -38
- judgeval-0.0.51/src/judgeval/scorers/judgeval_scorers/api_scorers/summarization.py +0 -27
- judgeval-0.0.51/src/judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -23
- judgeval-0.0.51/src/judgeval/scorers/prompt_scorer.py +0 -296
- judgeval-0.0.51/src/judgeval/scorers/score.py +0 -465
- {judgeval-0.0.51 → judgeval-0.0.53}/.github/workflows/blocked-pr.yaml +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/.github/workflows/ci.yaml +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/.github/workflows/lint.yaml +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/.github/workflows/merge-branch-check.yaml +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/.github/workflows/release.yaml +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/.github/workflows/validate-branch.yaml +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/.pre-commit-config.yaml +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/LICENSE.md +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/assets/Screenshot 2025-05-17 at 8.14.27/342/200/257PM.png" +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/assets/dataset_clustering_screenshot.png +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/assets/dataset_clustering_screenshot_dm.png +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/assets/datasets_preview_screenshot.png +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/assets/error_analysis_dashboard.png +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/assets/experiments_dashboard_screenshot.png +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/assets/experiments_page.png +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/assets/experiments_pagev2.png +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/assets/logo-dark.svg +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/assets/logo-light.svg +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/assets/monitoring_screenshot.png +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/assets/new_darkmode.svg +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/assets/new_lightmode.svg +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/assets/product_shot.png +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/assets/trace_demo.png +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/assets/trace_screenshot.png +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/assets/trace_screenshot_old.png +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/pytest.ini +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/src/.coveragerc +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/__init__.py +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/clients.py +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/common/__init__.py +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/common/exceptions.py +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/data/datasets/__init__.py +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/judges/__init__.py +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/judges/base_judge.py +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/judges/utils.py +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/scorers/exceptions.py +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/scorers/judgeval_scorers/__init__.py +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/scorers/judgeval_scorers/classifiers/__init__.py +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/tracer/__init__.py +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/utils/alerts.py +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/src/judgeval/utils/requests.py +0 -0
- {judgeval-0.0.51 → judgeval-0.0.53}/update_version.py +0 -0
@@ -0,0 +1,41 @@
|
|
1
|
+
---
|
2
|
+
name: Bug report
|
3
|
+
about: Create a report to help us improve Judgeval
|
4
|
+
title: "[BUG]"
|
5
|
+
labels: potential bug
|
6
|
+
|
7
|
+
---
|
8
|
+
|
9
|
+
## Describe the bug
|
10
|
+
A clear and concise description of what the bug is.
|
11
|
+
|
12
|
+
## To Reproduce
|
13
|
+
Steps to reproduce the behavior:
|
14
|
+
1. Go to '...'
|
15
|
+
2. Click on '....'
|
16
|
+
3. Scroll down to '....'
|
17
|
+
4. See error
|
18
|
+
|
19
|
+
## Expected behavior
|
20
|
+
A clear and concise description of what you expected to happen.
|
21
|
+
|
22
|
+
## Screenshots
|
23
|
+
If applicable, add screenshots to help explain your problem.
|
24
|
+
|
25
|
+
## Environment (please complete the following information):
|
26
|
+
- OS: [e.g. MacOS, Linux, Windows]
|
27
|
+
- Browser (if website issue): [e.g. Chrome, Safari, Firefox]
|
28
|
+
- Browser Version (if website issue): [e.g. 22]
|
29
|
+
- SDK Version: [e.g. 1.2.3]
|
30
|
+
- Programming Language/Runtime (if SDK issue): [e.g. Python 3.11, Python 3.12, etc.]
|
31
|
+
- Package Manager (if SDK issue): [e.g. uv, pip, pipenv]
|
32
|
+
|
33
|
+
## Additional context
|
34
|
+
Add any other context about the problem here.
|
35
|
+
|
36
|
+
## Are you interested to contribute a fix for this bug?
|
37
|
+
If this is a confirmed bug, the Judgment community is happy to support with guidance and review via [Discord](https://discord.com/invite/tGVFf8UBUY).
|
38
|
+
|
39
|
+
- [ ] Yes
|
40
|
+
- [ ] No
|
41
|
+
|
@@ -0,0 +1,43 @@
|
|
1
|
+
---
|
2
|
+
name: Feature Request
|
3
|
+
about: Suggest an idea for Judgeval
|
4
|
+
title: "[FEATURE]"
|
5
|
+
labels: feature-request
|
6
|
+
|
7
|
+
---
|
8
|
+
|
9
|
+
## Is your feature request related to a problem? Please describe.
|
10
|
+
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
|
11
|
+
|
12
|
+
## Describe the solution you'd like
|
13
|
+
A clear and concise description of what you want to happen.
|
14
|
+
|
15
|
+
## Describe alternatives you've considered
|
16
|
+
A clear and concise description of any alternative solutions or features you've considered.
|
17
|
+
|
18
|
+
## Which component(s) does this affect?
|
19
|
+
- [ ] SDK (open for community contributions)
|
20
|
+
- [ ] Website (internal development only)
|
21
|
+
- [ ] Documentation (open for community contributions)
|
22
|
+
- [ ] Not sure
|
23
|
+
|
24
|
+
## Use case and impact
|
25
|
+
Describe your specific use case and how this feature would benefit you or other users. Include:
|
26
|
+
- How often would you use this feature?
|
27
|
+
- How many users might benefit from this?
|
28
|
+
- Is this blocking your current implementation?
|
29
|
+
|
30
|
+
## Proposed API/Interface (if applicable)
|
31
|
+
If you have ideas about how this feature should be exposed (API methods, UI elements, etc.), please describe them here.
|
32
|
+
|
33
|
+
## Additional context
|
34
|
+
Add any other context, screenshots, code examples, or links to related issues/discussions about the feature request here.
|
35
|
+
|
36
|
+
## Are you interested in contributing this feature?
|
37
|
+
The Judgment community is happy to provide guidance and review for contributions via [Discord](https://discord.com/invite/tGVFf8UBUY).
|
38
|
+
|
39
|
+
- [ ] Yes, I'd like to implement this
|
40
|
+
- [ ] Yes, I'd like to help with design/planning
|
41
|
+
- [ ] No, but I'd be happy to test it
|
42
|
+
- [ ] No
|
43
|
+
|
@@ -0,0 +1,23 @@
|
|
1
|
+
## 📝 Summary
|
2
|
+
|
3
|
+
<!-- Add your list of changes, make it a list to improve the PR reviewers' experience. Ie:
|
4
|
+
- [ ] 1. Remove duplicate filter table
|
5
|
+
- [ ] 2. Reenabled filtering on new ExperimentRunsTableClient component, reapplied filtering changes
|
6
|
+
- [ ] 3. Added only search and filter when enter is pressed or apply filter is pressed
|
7
|
+
- [ ] 4. Error message for applying incomplete filters
|
8
|
+
- [ ] 5. Deletion should now work again for table
|
9
|
+
- [ ] 6. Comparison should now work again for table
|
10
|
+
-->
|
11
|
+
- [ ] 1. ...
|
12
|
+
|
13
|
+
## 🎥 Demo of Changes
|
14
|
+
|
15
|
+
<!-- Add a short 1-3 minute video describing/demoing the changes -->
|
16
|
+
|
17
|
+
## ✅ Checklist
|
18
|
+
|
19
|
+
- [ ] Tagged Linear ticket in PR title. Ie. PR Title (JUD-XXXX)
|
20
|
+
- [ ] Video demo of changes
|
21
|
+
- [ ] Reviewers assigned
|
22
|
+
- [ ] Docs updated ([if necessary](https://github.com/JudgmentLabs/docs))
|
23
|
+
- [ ] Cookbooks updated ([if necessary](https://github.com/JudgmentLabs/judgment-cookbook))
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: judgeval
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.53
|
4
4
|
Summary: Judgeval Package
|
5
5
|
Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
|
6
6
|
Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
|
@@ -12,6 +12,7 @@ Classifier: Programming Language :: Python :: 3
|
|
12
12
|
Requires-Python: >=3.11
|
13
13
|
Requires-Dist: anthropic
|
14
14
|
Requires-Dist: boto3
|
15
|
+
Requires-Dist: datamodel-code-generator>=0.31.1
|
15
16
|
Requires-Dist: google-genai
|
16
17
|
Requires-Dist: langchain-anthropic
|
17
18
|
Requires-Dist: langchain-core
|
@@ -51,7 +52,7 @@ We're hiring! Join us in our mission to enable self-learning agents by providing
|
|
51
52
|
|
52
53
|
</div>
|
53
54
|
|
54
|
-
Judgeval offers **open-source tooling** for tracing
|
55
|
+
Judgeval offers **open-source tooling** for tracing and evaluating autonomous, stateful agents. It **provides runtime data from agent-environment interactions** for continuous learning and self-improvement.
|
55
56
|
|
56
57
|
## 🎬 See Judgeval in Action
|
57
58
|
|
@@ -22,7 +22,7 @@ We're hiring! Join us in our mission to enable self-learning agents by providing
|
|
22
22
|
|
23
23
|
</div>
|
24
24
|
|
25
|
-
Judgeval offers **open-source tooling** for tracing
|
25
|
+
Judgeval offers **open-source tooling** for tracing and evaluating autonomous, stateful agents. It **provides runtime data from agent-environment interactions** for continuous learning and self-improvement.
|
26
26
|
|
27
27
|
## 🎬 See Judgeval in Action
|
28
28
|
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "judgeval"
|
3
|
-
version = "0.0.
|
3
|
+
version = "0.0.53"
|
4
4
|
authors = [
|
5
5
|
{ name="Andrew Li", email="andrew@judgmentlabs.ai" },
|
6
6
|
{ name="Alex Shan", email="alex@judgmentlabs.ai" },
|
@@ -31,6 +31,7 @@ dependencies = [
|
|
31
31
|
"google-genai",
|
32
32
|
"boto3",
|
33
33
|
"matplotlib>=3.10.3",
|
34
|
+
"datamodel-code-generator>=0.31.1",
|
34
35
|
]
|
35
36
|
|
36
37
|
[project.urls]
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# logger.py
|
2
|
+
|
3
|
+
import logging
|
4
|
+
import sys
|
5
|
+
import os
|
6
|
+
|
7
|
+
# ANSI escape sequences
|
8
|
+
RESET = "\033[0m"
|
9
|
+
RED = "\033[31m"
|
10
|
+
YELLOW = "\033[33m"
|
11
|
+
BLUE = "\033[34m"
|
12
|
+
GRAY = "\033[90m"
|
13
|
+
|
14
|
+
|
15
|
+
class ColorFormatter(logging.Formatter):
|
16
|
+
"""
|
17
|
+
Wrap the final formatted log record in ANSI color codes based on level.
|
18
|
+
"""
|
19
|
+
|
20
|
+
COLORS = {
|
21
|
+
logging.DEBUG: GRAY,
|
22
|
+
logging.INFO: GRAY,
|
23
|
+
logging.WARNING: YELLOW,
|
24
|
+
logging.ERROR: RED,
|
25
|
+
logging.CRITICAL: RED,
|
26
|
+
}
|
27
|
+
|
28
|
+
def __init__(self, fmt=None, datefmt=None, use_color=True):
|
29
|
+
super().__init__(fmt=fmt, datefmt=datefmt)
|
30
|
+
self.use_color = use_color and sys.stdout.isatty()
|
31
|
+
|
32
|
+
def format(self, record):
|
33
|
+
message = super().format(record)
|
34
|
+
if self.use_color:
|
35
|
+
color = self.COLORS.get(record.levelno, "")
|
36
|
+
if color:
|
37
|
+
message = f"{color}{message}{RESET}"
|
38
|
+
return message
|
39
|
+
|
40
|
+
|
41
|
+
def _setup_judgeval_logger():
|
42
|
+
use_color = sys.stdout.isatty() and os.getenv("NO_COLOR") is None
|
43
|
+
handler = logging.StreamHandler(sys.stdout)
|
44
|
+
handler.setLevel(logging.DEBUG)
|
45
|
+
handler.setFormatter(
|
46
|
+
ColorFormatter(
|
47
|
+
fmt="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
48
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
49
|
+
use_color=use_color,
|
50
|
+
)
|
51
|
+
)
|
52
|
+
|
53
|
+
logger = logging.getLogger("judgeval")
|
54
|
+
logger.setLevel(logging.DEBUG)
|
55
|
+
logger.addHandler(handler)
|
56
|
+
return logger
|
57
|
+
|
58
|
+
|
59
|
+
# Global logger you can import elsewhere
|
60
|
+
judgeval_logger = _setup_judgeval_logger()
|
@@ -4,7 +4,7 @@ import boto3
|
|
4
4
|
from typing import Optional
|
5
5
|
from datetime import datetime, UTC
|
6
6
|
from botocore.exceptions import ClientError
|
7
|
-
from judgeval.common.logger import
|
7
|
+
from judgeval.common.logger import judgeval_logger
|
8
8
|
|
9
9
|
|
10
10
|
class S3Storage:
|
@@ -42,7 +42,6 @@ class S3Storage:
|
|
42
42
|
error_code = e.response["Error"]["Code"]
|
43
43
|
if error_code == "404":
|
44
44
|
# Bucket doesn't exist, create it
|
45
|
-
info(f"Bucket {self.bucket_name} doesn't exist, creating it ...")
|
46
45
|
try:
|
47
46
|
self.s3_client.create_bucket(
|
48
47
|
Bucket=self.bucket_name,
|
@@ -52,14 +51,13 @@ class S3Storage:
|
|
52
51
|
) if self.s3_client.meta.region_name != "us-east-1" else self.s3_client.create_bucket(
|
53
52
|
Bucket=self.bucket_name
|
54
53
|
)
|
55
|
-
info(f"Created S3 bucket: {self.bucket_name}")
|
56
54
|
except ClientError as create_error:
|
57
55
|
if (
|
58
56
|
create_error.response["Error"]["Code"]
|
59
57
|
== "BucketAlreadyOwnedByYou"
|
60
58
|
):
|
61
59
|
# Bucket was just created by another process
|
62
|
-
warning(
|
60
|
+
judgeval_logger.warning(
|
63
61
|
f"Bucket {self.bucket_name} was just created by another process"
|
64
62
|
)
|
65
63
|
pass
|
@@ -90,8 +88,6 @@ class S3Storage:
|
|
90
88
|
# Convert trace data to JSON string
|
91
89
|
trace_json = json.dumps(trace_data)
|
92
90
|
|
93
|
-
# Upload to S3
|
94
|
-
info(f"Uploading trace to S3 at key {s3_key}, in bucket {self.bucket_name} ...")
|
95
91
|
self.s3_client.put_object(
|
96
92
|
Bucket=self.bucket_name,
|
97
93
|
Key=s3_key,
|