judgeval 0.7.1__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- judgeval/__init__.py +139 -12
- judgeval/api/__init__.py +501 -0
- judgeval/api/api_types.py +344 -0
- judgeval/cli.py +2 -4
- judgeval/constants.py +10 -26
- judgeval/data/evaluation_run.py +49 -26
- judgeval/data/example.py +2 -2
- judgeval/data/judgment_types.py +266 -82
- judgeval/data/result.py +4 -5
- judgeval/data/scorer_data.py +4 -2
- judgeval/data/tool.py +2 -2
- judgeval/data/trace.py +7 -50
- judgeval/data/trace_run.py +7 -4
- judgeval/{dataset.py → dataset/__init__.py} +43 -28
- judgeval/env.py +67 -0
- judgeval/{run_evaluation.py → evaluation/__init__.py} +29 -95
- judgeval/exceptions.py +27 -0
- judgeval/integrations/langgraph/__init__.py +788 -0
- judgeval/judges/__init__.py +2 -2
- judgeval/judges/litellm_judge.py +75 -15
- judgeval/judges/together_judge.py +86 -18
- judgeval/judges/utils.py +7 -21
- judgeval/{common/logger.py → logger.py} +8 -6
- judgeval/scorers/__init__.py +0 -4
- judgeval/scorers/agent_scorer.py +3 -7
- judgeval/scorers/api_scorer.py +8 -13
- judgeval/scorers/base_scorer.py +52 -32
- judgeval/scorers/example_scorer.py +1 -3
- judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -14
- judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +45 -20
- judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +2 -2
- judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +3 -3
- judgeval/scorers/score.py +21 -31
- judgeval/scorers/trace_api_scorer.py +5 -0
- judgeval/scorers/utils.py +1 -103
- judgeval/tracer/__init__.py +1075 -2
- judgeval/tracer/constants.py +1 -0
- judgeval/tracer/exporters/__init__.py +37 -0
- judgeval/tracer/exporters/s3.py +119 -0
- judgeval/tracer/exporters/store.py +43 -0
- judgeval/tracer/exporters/utils.py +32 -0
- judgeval/tracer/keys.py +67 -0
- judgeval/tracer/llm/__init__.py +1233 -0
- judgeval/{common/tracer → tracer/llm}/providers.py +5 -10
- judgeval/{local_eval_queue.py → tracer/local_eval_queue.py} +15 -10
- judgeval/tracer/managers.py +188 -0
- judgeval/tracer/processors/__init__.py +181 -0
- judgeval/tracer/utils.py +20 -0
- judgeval/trainer/__init__.py +5 -0
- judgeval/{common/trainer → trainer}/config.py +12 -9
- judgeval/{common/trainer → trainer}/console.py +2 -9
- judgeval/{common/trainer → trainer}/trainable_model.py +12 -7
- judgeval/{common/trainer → trainer}/trainer.py +119 -17
- judgeval/utils/async_utils.py +2 -3
- judgeval/utils/decorators.py +24 -0
- judgeval/utils/file_utils.py +37 -4
- judgeval/utils/guards.py +32 -0
- judgeval/utils/meta.py +14 -0
- judgeval/{common/api/json_encoder.py → utils/serialize.py} +7 -1
- judgeval/utils/testing.py +88 -0
- judgeval/utils/url.py +10 -0
- judgeval/{version_check.py → utils/version_check.py} +3 -3
- judgeval/version.py +5 -0
- judgeval/warnings.py +4 -0
- {judgeval-0.7.1.dist-info → judgeval-0.9.0.dist-info}/METADATA +12 -14
- judgeval-0.9.0.dist-info/RECORD +80 -0
- judgeval/clients.py +0 -35
- judgeval/common/__init__.py +0 -13
- judgeval/common/api/__init__.py +0 -3
- judgeval/common/api/api.py +0 -375
- judgeval/common/api/constants.py +0 -186
- judgeval/common/exceptions.py +0 -27
- judgeval/common/storage/__init__.py +0 -6
- judgeval/common/storage/s3_storage.py +0 -97
- judgeval/common/tracer/__init__.py +0 -31
- judgeval/common/tracer/constants.py +0 -22
- judgeval/common/tracer/core.py +0 -2427
- judgeval/common/tracer/otel_exporter.py +0 -108
- judgeval/common/tracer/otel_span_processor.py +0 -188
- judgeval/common/tracer/span_processor.py +0 -37
- judgeval/common/tracer/span_transformer.py +0 -207
- judgeval/common/tracer/trace_manager.py +0 -101
- judgeval/common/trainer/__init__.py +0 -5
- judgeval/common/utils.py +0 -948
- judgeval/integrations/langgraph.py +0 -844
- judgeval/judges/mixture_of_judges.py +0 -287
- judgeval/judgment_client.py +0 -267
- judgeval/rules.py +0 -521
- judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -52
- judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -28
- judgeval/utils/alerts.py +0 -93
- judgeval/utils/requests.py +0 -50
- judgeval-0.7.1.dist-info/RECORD +0 -82
- {judgeval-0.7.1.dist-info → judgeval-0.9.0.dist-info}/WHEEL +0 -0
- {judgeval-0.7.1.dist-info → judgeval-0.9.0.dist-info}/entry_points.txt +0 -0
- {judgeval-0.7.1.dist-info → judgeval-0.9.0.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,80 @@
|
|
1
|
+
judgeval/__init__.py,sha256=w6X0PmRy4_syBWPrK0sMoTDnEwUhsorPIg5iBKnNGUs,4907
|
2
|
+
judgeval/cli.py,sha256=R5IiIQmSVg21kQHX2kL3sOeXCxvvAMSqyva3Z9AoSXc,1560
|
3
|
+
judgeval/constants.py,sha256=fqzSY7tDfseWy1trLjCSGC6WVOFEm_4hvA8IFpv7CUc,3683
|
4
|
+
judgeval/env.py,sha256=R0bj7XU29RIVVQjkVMa11ObhOYVMbaE_3LTvL3I9dWM,2212
|
5
|
+
judgeval/exceptions.py,sha256=tTbfe4yoOtPXmn22UQz9-6a-5PT9uOko85xaRRwr0Sw,621
|
6
|
+
judgeval/logger.py,sha256=ZWbp0QfT1CJnQIjV-Zle4n489nFCKEmD2-ukx--iiow,1553
|
7
|
+
judgeval/version.py,sha256=kJtYsih3hTYZ_rY_Lt0RcFqvjAfF5Xo1uNq0jZWJ5pw,73
|
8
|
+
judgeval/warnings.py,sha256=LbGte14ppiFjrkp-JJYueZ40NWFvMkWRvPXr6r-fUWw,73
|
9
|
+
judgeval/api/__init__.py,sha256=DZ-dijtkzUsjY3CBdCh6TH_PHC5qlI_tAFCBgvAZNjU,14538
|
10
|
+
judgeval/api/api_types.py,sha256=4xyqlmV9mEoTUIbii-bj7oS0fVwWrJ_UhYxpXvcBywA,9198
|
11
|
+
judgeval/data/__init__.py,sha256=1QagDcSQtfnJ632t9Dnq8d7XjAqhmY4mInOWt8qH9tM,455
|
12
|
+
judgeval/data/evaluation_run.py,sha256=G7ad4eDQTjketfcQRITk8bs8CIO8rm058H1G_qkLmhc,4729
|
13
|
+
judgeval/data/example.py,sha256=aTZg0GWQmUEBHk1n9Asw8sz-8YBWKlFsMZYjwq1DfrI,917
|
14
|
+
judgeval/data/judgment_types.py,sha256=b2pDeEOSl_zHJLDzqr0AGYbZ5zrooJMr5VmK-bDrN4o,17082
|
15
|
+
judgeval/data/result.py,sha256=JQ6f0XzL9p0oPmx-_z2NKUcISO6pISsVZ5dT1jkBeZs,2120
|
16
|
+
judgeval/data/scorer_data.py,sha256=g9PE0DNLikW0LgxGWhgpCiNVOX8PzqEaZKivifLOUDI,2997
|
17
|
+
judgeval/data/tool.py,sha256=bj_WxFg22mypUUVR5KqQRxMDHWvKwiE1MMPjLnTCoDU,99
|
18
|
+
judgeval/data/trace.py,sha256=HTeucJqNdFsQI3Ybb6mJ8NkmHkc1vIddzQ7BtQs25k8,1315
|
19
|
+
judgeval/data/trace_run.py,sha256=VCQUdDlrHixyiqWW1RUiCtLgqMt-3oW1M1A7CCer2Ok,1635
|
20
|
+
judgeval/data/scripts/fix_default_factory.py,sha256=lvp2JwYZqz-XpD9LZNa3mANZVP-jJSZoNzolI6JWERM,591
|
21
|
+
judgeval/data/scripts/openapi_transform.py,sha256=Sm04JClzyP1ga8KA3gkIdsae8Hlx-XU7-x0gHCQYOhg,3877
|
22
|
+
judgeval/dataset/__init__.py,sha256=xlg4VgEvbReWOlk6MK4GqJubSOeo17VqoIyjdMwmIf8,6573
|
23
|
+
judgeval/evaluation/__init__.py,sha256=O0sk3zP5jbHPtknT6DuB6ijkJ3-0I54mMf1UzDPKMF0,15409
|
24
|
+
judgeval/integrations/langgraph/__init__.py,sha256=VvqCKOk65A2gLlr8uWrJVzpRF5OnIja5zwF4hGPEFsw,27540
|
25
|
+
judgeval/judges/__init__.py,sha256=e7JnTc1TG_SwqydDHTXHIP0EBazQxt-ydMQG7ghSU5A,228
|
26
|
+
judgeval/judges/base_judge.py,sha256=_dz0qWsKRxzXxpRY9l6mrxTRYPSF2FE4ZXkrzhZ4gbY,986
|
27
|
+
judgeval/judges/litellm_judge.py,sha256=5vEF0IUo7HVWnOF2ww-DMke8Xkarnz32B_qbgKjc0-I,4182
|
28
|
+
judgeval/judges/together_judge.py,sha256=GzwlXZJzle8hT-vWKmq39JyIeanJqJfHDOkrksUbzk0,4398
|
29
|
+
judgeval/judges/utils.py,sha256=ITbYwvjU3o9-FIAReFvxh24yJrx9LV3l9BnSBgKUpxg,2068
|
30
|
+
judgeval/scorers/__init__.py,sha256=a5f_QcC7P9DjoOu_DMmADlkIXebo0d3zEJDJ7mhN3tM,640
|
31
|
+
judgeval/scorers/agent_scorer.py,sha256=V1NSwhGWgtXPsX-blKLkDLsPPbEiP-A4614X-95dtlQ,565
|
32
|
+
judgeval/scorers/api_scorer.py,sha256=M7cwJ2YY2Mw0pCo1UH-29jwrNd2PdiBRdQtmWS5ijXA,2173
|
33
|
+
judgeval/scorers/base_scorer.py,sha256=8uhkmj78R6-Stenl1eo6IVqKSBgkLpoqR0acGi-Fxik,2788
|
34
|
+
judgeval/scorers/example_scorer.py,sha256=o_BGUztJXjnKnuOqIa9T4PXe0wPoWg63FyH518N1LxA,561
|
35
|
+
judgeval/scorers/exceptions.py,sha256=ACDHK5-TWiF3NTk-wycaedpbrdobm-CvvC1JA_iP-Mk,179
|
36
|
+
judgeval/scorers/score.py,sha256=xquM59SCtNeuAsrBsHFgBQk3CHp4-bms4oFs24xfcU0,7176
|
37
|
+
judgeval/scorers/trace_api_scorer.py,sha256=B2Vp8Jj2I7N-G1weHMm1b_9gVbn0BMcOtestMFNtx08,112
|
38
|
+
judgeval/scorers/utils.py,sha256=iSZONwK0HecxUPz-cMCyra_87DSCag1E8BdpF2a4_44,377
|
39
|
+
judgeval/scorers/judgeval_scorers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
40
|
+
judgeval/scorers/judgeval_scorers/api_scorers/__init__.py,sha256=MFsxDPZoZibJlsz4RgtLehA3rVcEfS9o5cw0l8gI5IM,1046
|
41
|
+
judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py,sha256=zJsU0VrUmRhY9qav48c6jTyDqUwI3JzhV9ajtlJCe0M,544
|
42
|
+
judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py,sha256=UDfzTO9Fx0FA5o0wfD8kprrGA4eW-43Rn9Gc0BQtKgY,393
|
43
|
+
judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py,sha256=mbBvirNcivu9dP6deM7FogDXrdwI9o8yqsO8IeKPSb4,309
|
44
|
+
judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py,sha256=ps51bTgQsD9xGYsk1v9bx0WxQMqywSllCE9_xlJkLd8,531
|
45
|
+
judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py,sha256=aQzu-TiGqG74JDQ927evv5yGmnZw2AOolyHvlIhiUbI,683
|
46
|
+
judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py,sha256=C-9Q7s9K7mcgFMcEL0I_7XQZMRqrL5MFRi9G6Dx8-v8,8505
|
47
|
+
judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py,sha256=BhrLnIASZOTT9XJ6giYSoVfdR7NYsjRRTOTNioNtEiU,610
|
48
|
+
judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py,sha256=bMu0WMJaXdMyDTN42sVLoWV-lrUHCEa8iDrCI_K7nlQ,808
|
49
|
+
judgeval/tracer/__init__.py,sha256=WmRmfvBOkzwEaAljwJ_ClNDJt1zB6J562G5BYXNMySY,36914
|
50
|
+
judgeval/tracer/constants.py,sha256=ae8tivAW97awJQxdRB9OMqX50wOLX3zqChT_AGkPBu0,85
|
51
|
+
judgeval/tracer/keys.py,sha256=qXPoZSkEhVF-YYfQ9-zeDMVdr4GtpPf2W7MPJaN2AQo,2889
|
52
|
+
judgeval/tracer/local_eval_queue.py,sha256=Amt7xkdmVJH1l2itm-ogiIW5oDaLnACisGfsdZjazn0,7228
|
53
|
+
judgeval/tracer/managers.py,sha256=h2ZHJ61_vf3cS-HlEUiodFzKDUuQWIhYC6n7pMVyM9c,6113
|
54
|
+
judgeval/tracer/utils.py,sha256=jljfr-oiCy8agOh0apAoR04tR2XRAzFg51On_LPzue8,600
|
55
|
+
judgeval/tracer/exporters/__init__.py,sha256=lnZXfPGaQH844HAIuZCQqjqhnmZGA98kHY8Xp-Oi4Ws,1220
|
56
|
+
judgeval/tracer/exporters/s3.py,sha256=N9gmw17cnR0VkfAQQkLsNj5BksgNRETThR5qYhWRjP4,4360
|
57
|
+
judgeval/tracer/exporters/store.py,sha256=KQV3cyqteesByQjR-9VdPXT9OlUZ-6F08ogqj837_c0,1012
|
58
|
+
judgeval/tracer/exporters/utils.py,sha256=JRcoSQuEHxMDJbXfyrUIfA2SHBVkZM82h4bTbYGxkNw,1154
|
59
|
+
judgeval/tracer/llm/__init__.py,sha256=p9uwWPg9k-NcWjj9TbwQj55sHhBOqRYx2-Ld6YHaFUs,42625
|
60
|
+
judgeval/tracer/llm/providers.py,sha256=QQLJlSNnDjXRAc2Wqw78o254COJUSXX39D7D_mx3NVA,2651
|
61
|
+
judgeval/tracer/processors/__init__.py,sha256=Fpm_cocYj3fwWAE9hcVder6TcYcycbXJWpmdyOjw5uY,5802
|
62
|
+
judgeval/trainer/__init__.py,sha256=h_DDVV7HFF7HUPAJFpt2d9wjqgnmEVcHxqZyB1k7pPQ,257
|
63
|
+
judgeval/trainer/config.py,sha256=8s0X8B334PJomorwONaUpb6K8cAMxRdYAeQdtx7HPHs,4258
|
64
|
+
judgeval/trainer/console.py,sha256=PJ0rCnDwC7aoW-VsLDS96ZyMyagh-l9EOJKff1ATIpo,4342
|
65
|
+
judgeval/trainer/trainable_model.py,sha256=vSDtHJJ-fLczC2gkaY9jG6TQvLgWqaVjElm1l8YlJcU,8959
|
66
|
+
judgeval/trainer/trainer.py,sha256=_dlV0NSD4jfNgTb2GwghWGBmnoNsooQq85nvIWW5VR4,16550
|
67
|
+
judgeval/utils/async_utils.py,sha256=lgCgi8gkLUcAEepruEkx-AGQgJnAJpKmBIhZx6Y0q2s,935
|
68
|
+
judgeval/utils/decorators.py,sha256=rdqY1w0zNL6O6GU6Wdeo0-x5EgpFTEhU2vkgiWsRYdc,525
|
69
|
+
judgeval/utils/file_utils.py,sha256=3LI1YCZwO5ogTgJreyOgRgDksey3natO2Td1PQqaPyY,3252
|
70
|
+
judgeval/utils/guards.py,sha256=QBb6m6KElxdvt2bskLZCKh_zGHbBcqV-VfGzT63o3hY,807
|
71
|
+
judgeval/utils/meta.py,sha256=wQFCLJTNKF9yUdXcw37AT6mC-wqzZpAvjn5gP_6flD8,349
|
72
|
+
judgeval/utils/serialize.py,sha256=AtpdMQXC03xhImLJjSNCi-PqsUek2mcwvtd2ryrATTQ,6057
|
73
|
+
judgeval/utils/testing.py,sha256=kJOq4LlEXaNThfg9oSIRqSK7IH8AwLgbukjn5uxMY7A,3661
|
74
|
+
judgeval/utils/url.py,sha256=Shf0v3XcbaWpL0m1eGJEEO_z4TsQCnDB2Rl25OTUmiI,195
|
75
|
+
judgeval/utils/version_check.py,sha256=kcF6SvB6GbVKI0Gv9QRVm-kvBn9_z-c3jmPORsXO3h0,1015
|
76
|
+
judgeval-0.9.0.dist-info/METADATA,sha256=LHcfmmHNxDbQQ2jrSTHqkjxYQkK11MMtj_4Jkd9wASs,8869
|
77
|
+
judgeval-0.9.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
78
|
+
judgeval-0.9.0.dist-info/entry_points.txt,sha256=-eoeD-oDLn4A7MSgeBS9Akwanf3_0r0cgEleBcIOjg0,46
|
79
|
+
judgeval-0.9.0.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
|
80
|
+
judgeval-0.9.0.dist-info/RECORD,,
|
judgeval/clients.py
DELETED
@@ -1,35 +0,0 @@
|
|
1
|
-
import os
|
2
|
-
from dotenv import load_dotenv
|
3
|
-
from openai import OpenAI
|
4
|
-
from typing import Optional
|
5
|
-
|
6
|
-
PATH_TO_DOTENV = os.path.join(os.path.dirname(__file__), ".env")
|
7
|
-
load_dotenv(dotenv_path=PATH_TO_DOTENV)
|
8
|
-
|
9
|
-
|
10
|
-
# Initialize optional OpenAI client
|
11
|
-
client: Optional["OpenAI"] = None
|
12
|
-
if os.getenv("OPENAI_API_KEY"):
|
13
|
-
try:
|
14
|
-
from openai import OpenAI
|
15
|
-
|
16
|
-
client = OpenAI()
|
17
|
-
except ImportError:
|
18
|
-
# openai package not installed
|
19
|
-
pass
|
20
|
-
|
21
|
-
# Initialize optional Together clients
|
22
|
-
together_client: Optional["Together"] = None
|
23
|
-
async_together_client: Optional["AsyncTogether"] = None
|
24
|
-
|
25
|
-
# Only initialize Together clients if API key is available
|
26
|
-
|
27
|
-
together_api_key = os.getenv("TOGETHERAI_API_KEY") or os.getenv("TOGETHER_API_KEY")
|
28
|
-
if together_api_key:
|
29
|
-
try:
|
30
|
-
from together import Together, AsyncTogether
|
31
|
-
|
32
|
-
together_client = Together(api_key=together_api_key)
|
33
|
-
async_together_client = AsyncTogether(api_key=together_api_key)
|
34
|
-
except Exception:
|
35
|
-
pass
|
judgeval/common/__init__.py
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
from judgeval.common.utils import (
|
2
|
-
get_chat_completion,
|
3
|
-
aget_chat_completion,
|
4
|
-
get_completion_multiple_models,
|
5
|
-
aget_completion_multiple_models,
|
6
|
-
)
|
7
|
-
|
8
|
-
__all__ = [
|
9
|
-
"get_chat_completion",
|
10
|
-
"aget_chat_completion",
|
11
|
-
"get_completion_multiple_models",
|
12
|
-
"aget_completion_multiple_models",
|
13
|
-
]
|
judgeval/common/api/__init__.py
DELETED
judgeval/common/api/api.py
DELETED
@@ -1,375 +0,0 @@
|
|
1
|
-
from typing import Literal, List, Dict, Any, Union, Optional
|
2
|
-
from requests import exceptions
|
3
|
-
from judgeval.common.api.constants import (
|
4
|
-
JUDGMENT_TRACES_FETCH_API_URL,
|
5
|
-
JUDGMENT_TRACES_UPSERT_API_URL,
|
6
|
-
JUDGMENT_TRACES_DELETE_API_URL,
|
7
|
-
JUDGMENT_TRACES_SPANS_BATCH_API_URL,
|
8
|
-
JUDGMENT_TRACES_EVALUATION_RUNS_BATCH_API_URL,
|
9
|
-
JUDGMENT_DATASETS_PUSH_API_URL,
|
10
|
-
JUDGMENT_DATASETS_APPEND_EXAMPLES_API_URL,
|
11
|
-
JUDGMENT_DATASETS_PULL_API_URL,
|
12
|
-
JUDGMENT_DATASETS_DELETE_API_URL,
|
13
|
-
JUDGMENT_DATASETS_PROJECT_STATS_API_URL,
|
14
|
-
JUDGMENT_PROJECT_DELETE_API_URL,
|
15
|
-
JUDGMENT_PROJECT_CREATE_API_URL,
|
16
|
-
JUDGMENT_EVAL_API_URL,
|
17
|
-
JUDGMENT_TRACE_EVAL_API_URL,
|
18
|
-
JUDGMENT_EVAL_LOG_API_URL,
|
19
|
-
JUDGMENT_EVAL_FETCH_API_URL,
|
20
|
-
JUDGMENT_EVAL_DELETE_API_URL,
|
21
|
-
JUDGMENT_ADD_TO_RUN_EVAL_QUEUE_API_URL,
|
22
|
-
JUDGMENT_GET_EVAL_STATUS_API_URL,
|
23
|
-
JUDGMENT_SCORER_SAVE_API_URL,
|
24
|
-
JUDGMENT_SCORER_FETCH_API_URL,
|
25
|
-
JUDGMENT_SCORER_EXISTS_API_URL,
|
26
|
-
JUDGMENT_CUSTOM_SCORER_UPLOAD_API_URL,
|
27
|
-
JUDGMENT_DATASETS_APPEND_TRACES_API_URL,
|
28
|
-
)
|
29
|
-
from judgeval.common.api.constants import (
|
30
|
-
TraceFetchPayload,
|
31
|
-
TraceDeletePayload,
|
32
|
-
SpansBatchPayload,
|
33
|
-
EvaluationEntryResponse,
|
34
|
-
EvaluationRunsBatchPayload,
|
35
|
-
DatasetPushPayload,
|
36
|
-
DatasetAppendPayload,
|
37
|
-
DatasetPullPayload,
|
38
|
-
DatasetDeletePayload,
|
39
|
-
DatasetStatsPayload,
|
40
|
-
ProjectCreatePayload,
|
41
|
-
ProjectDeletePayload,
|
42
|
-
EvalRunRequestBody,
|
43
|
-
DeleteEvalRunRequestBody,
|
44
|
-
EvalLogPayload,
|
45
|
-
EvalStatusPayload,
|
46
|
-
ScorerSavePayload,
|
47
|
-
ScorerFetchPayload,
|
48
|
-
ScorerExistsPayload,
|
49
|
-
CustomScorerUploadPayload,
|
50
|
-
CustomScorerTemplateResponse,
|
51
|
-
)
|
52
|
-
from judgeval.utils.requests import requests
|
53
|
-
from judgeval.common.api.json_encoder import json_encoder
|
54
|
-
|
55
|
-
|
56
|
-
class JudgmentAPIException(exceptions.HTTPError):
|
57
|
-
"""
|
58
|
-
Exception raised when an error occurs while executing a Judgment API request.
|
59
|
-
Extends requests.exceptions.HTTPError to provide access to the response object.
|
60
|
-
"""
|
61
|
-
|
62
|
-
def __init__(self, message: str, response=None, request=None):
|
63
|
-
super().__init__(message, response=response, request=request)
|
64
|
-
self.message = message
|
65
|
-
self.response = response
|
66
|
-
self.request = request
|
67
|
-
|
68
|
-
@property
|
69
|
-
def status_code(self) -> Union[int, None]:
|
70
|
-
"""Get the HTTP status code from the response."""
|
71
|
-
return self.response.status_code if self.response else None
|
72
|
-
|
73
|
-
@property
|
74
|
-
def response_json(self) -> Dict[str, Any]:
|
75
|
-
"""Get the JSON response body."""
|
76
|
-
try:
|
77
|
-
return self.response.json() if self.response else {}
|
78
|
-
except (ValueError, AttributeError):
|
79
|
-
return {}
|
80
|
-
|
81
|
-
@property
|
82
|
-
def error_detail(self) -> str:
|
83
|
-
"""Get the error detail from the response JSON."""
|
84
|
-
return self.response_json.get("detail", "An unknown error occurred.")
|
85
|
-
|
86
|
-
|
87
|
-
class JudgmentApiClient:
|
88
|
-
def __init__(self, api_key: str, organization_id: str):
|
89
|
-
self.api_key = api_key
|
90
|
-
self.organization_id = organization_id
|
91
|
-
|
92
|
-
def _do_request(
|
93
|
-
self,
|
94
|
-
method: Literal["POST", "PATCH", "GET", "DELETE"],
|
95
|
-
url: str,
|
96
|
-
payload: Any,
|
97
|
-
timeout: Optional[Union[float, tuple]] = None,
|
98
|
-
) -> Any:
|
99
|
-
# Prepare request kwargs with optional timeout
|
100
|
-
request_kwargs = self._request_kwargs()
|
101
|
-
if timeout is not None:
|
102
|
-
request_kwargs["timeout"] = timeout
|
103
|
-
|
104
|
-
if method == "GET":
|
105
|
-
r = requests.request(
|
106
|
-
method,
|
107
|
-
url,
|
108
|
-
params=payload,
|
109
|
-
headers=self._headers(),
|
110
|
-
**request_kwargs,
|
111
|
-
)
|
112
|
-
else:
|
113
|
-
r = requests.request(
|
114
|
-
method,
|
115
|
-
url,
|
116
|
-
json=json_encoder(payload),
|
117
|
-
headers=self._headers(),
|
118
|
-
**request_kwargs,
|
119
|
-
)
|
120
|
-
|
121
|
-
try:
|
122
|
-
r.raise_for_status()
|
123
|
-
except exceptions.HTTPError as e:
|
124
|
-
try:
|
125
|
-
detail = r.json().get("detail", "")
|
126
|
-
except Exception:
|
127
|
-
detail = r.text
|
128
|
-
|
129
|
-
raise JudgmentAPIException(
|
130
|
-
f"HTTP {r.status_code}: {r.reason}, {detail}",
|
131
|
-
response=r,
|
132
|
-
request=e.request,
|
133
|
-
)
|
134
|
-
|
135
|
-
return r.json()
|
136
|
-
|
137
|
-
def send_spans_batch(self, spans: List[Dict[str, Any]]):
|
138
|
-
payload: SpansBatchPayload = {
|
139
|
-
"spans": spans,
|
140
|
-
"organization_id": self.organization_id,
|
141
|
-
}
|
142
|
-
|
143
|
-
return self._do_request("POST", JUDGMENT_TRACES_SPANS_BATCH_API_URL, payload)
|
144
|
-
|
145
|
-
def send_evaluation_runs_batch(
|
146
|
-
self, evaluation_entries: List[EvaluationEntryResponse]
|
147
|
-
):
|
148
|
-
payload: EvaluationRunsBatchPayload = {
|
149
|
-
"organization_id": self.organization_id,
|
150
|
-
"evaluation_entries": evaluation_entries,
|
151
|
-
}
|
152
|
-
|
153
|
-
return self._do_request(
|
154
|
-
"POST", JUDGMENT_TRACES_EVALUATION_RUNS_BATCH_API_URL, payload
|
155
|
-
)
|
156
|
-
|
157
|
-
def fetch_trace(self, trace_id: str):
|
158
|
-
payload: TraceFetchPayload = {"trace_id": trace_id}
|
159
|
-
return self._do_request("POST", JUDGMENT_TRACES_FETCH_API_URL, payload)
|
160
|
-
|
161
|
-
def upsert_trace(self, trace_data: Dict[str, Any]):
|
162
|
-
return self._do_request("POST", JUDGMENT_TRACES_UPSERT_API_URL, trace_data)
|
163
|
-
|
164
|
-
def delete_trace(self, trace_id: str):
|
165
|
-
payload: TraceDeletePayload = {"trace_ids": [trace_id]}
|
166
|
-
return self._do_request("DELETE", JUDGMENT_TRACES_DELETE_API_URL, payload)
|
167
|
-
|
168
|
-
def delete_traces(self, trace_ids: List[str]):
|
169
|
-
payload: TraceDeletePayload = {"trace_ids": trace_ids}
|
170
|
-
return self._do_request("DELETE", JUDGMENT_TRACES_DELETE_API_URL, payload)
|
171
|
-
|
172
|
-
def delete_project(self, project_name: str):
|
173
|
-
payload: ProjectDeletePayload = {"project_name": project_name}
|
174
|
-
return self._do_request("DELETE", JUDGMENT_PROJECT_DELETE_API_URL, payload)
|
175
|
-
|
176
|
-
def create_project(self, project_name: str):
|
177
|
-
payload: ProjectCreatePayload = {"project_name": project_name}
|
178
|
-
return self._do_request("POST", JUDGMENT_PROJECT_CREATE_API_URL, payload)
|
179
|
-
|
180
|
-
def run_evaluation(self, evaluation_run: Dict[str, Any]):
|
181
|
-
return self._do_request("POST", JUDGMENT_EVAL_API_URL, evaluation_run)
|
182
|
-
|
183
|
-
def run_trace_evaluation(self, trace_run: Dict[str, Any]):
|
184
|
-
return self._do_request("POST", JUDGMENT_TRACE_EVAL_API_URL, trace_run)
|
185
|
-
|
186
|
-
def log_evaluation_results(
|
187
|
-
self, results: List[Dict[str, Any]], run: Dict[str, Any]
|
188
|
-
):
|
189
|
-
payload: EvalLogPayload = {"results": results, "run": run}
|
190
|
-
return self._do_request("POST", JUDGMENT_EVAL_LOG_API_URL, payload)
|
191
|
-
|
192
|
-
def fetch_evaluation_results(self, experiment_run_id: str, project_name: str):
|
193
|
-
payload: EvalRunRequestBody = {
|
194
|
-
"project_name": project_name,
|
195
|
-
"experiment_run_id": experiment_run_id,
|
196
|
-
}
|
197
|
-
return self._do_request("POST", JUDGMENT_EVAL_FETCH_API_URL, payload)
|
198
|
-
|
199
|
-
def delete_evaluation_results(self, project_name: str, eval_names: List[str]):
|
200
|
-
payload: DeleteEvalRunRequestBody = {
|
201
|
-
"project_name": project_name,
|
202
|
-
"eval_names": eval_names,
|
203
|
-
"judgment_api_key": self.api_key,
|
204
|
-
}
|
205
|
-
return self._do_request("POST", JUDGMENT_EVAL_DELETE_API_URL, payload)
|
206
|
-
|
207
|
-
def add_to_evaluation_queue(self, payload: Dict[str, Any]):
|
208
|
-
return self._do_request("POST", JUDGMENT_ADD_TO_RUN_EVAL_QUEUE_API_URL, payload)
|
209
|
-
|
210
|
-
def get_evaluation_status(self, experiment_run_id: str, project_name: str):
|
211
|
-
payload: EvalStatusPayload = {
|
212
|
-
"experiment_run_id": experiment_run_id,
|
213
|
-
"project_name": project_name,
|
214
|
-
"judgment_api_key": self.api_key,
|
215
|
-
}
|
216
|
-
return self._do_request("GET", JUDGMENT_GET_EVAL_STATUS_API_URL, payload)
|
217
|
-
|
218
|
-
def save_scorer(
|
219
|
-
self, name: str, prompt: str, threshold: float, options: Optional[dict] = None
|
220
|
-
):
|
221
|
-
payload: ScorerSavePayload = {
|
222
|
-
"name": name,
|
223
|
-
"prompt": prompt,
|
224
|
-
"threshold": threshold,
|
225
|
-
"options": options,
|
226
|
-
}
|
227
|
-
try:
|
228
|
-
return self._do_request("POST", JUDGMENT_SCORER_SAVE_API_URL, payload)
|
229
|
-
except JudgmentAPIException as e:
|
230
|
-
if e.status_code == 500:
|
231
|
-
raise JudgmentAPIException(
|
232
|
-
f"The server is temporarily unavailable. Please try your request again in a few moments. Error details: {e.error_detail}",
|
233
|
-
response=e.response,
|
234
|
-
request=e.request,
|
235
|
-
)
|
236
|
-
raise JudgmentAPIException(
|
237
|
-
f"Failed to save classifier scorer: {e.error_detail}",
|
238
|
-
response=e.response,
|
239
|
-
request=e.request,
|
240
|
-
)
|
241
|
-
|
242
|
-
def fetch_scorer(self, name: str):
|
243
|
-
payload: ScorerFetchPayload = {"name": name}
|
244
|
-
try:
|
245
|
-
return self._do_request("POST", JUDGMENT_SCORER_FETCH_API_URL, payload)
|
246
|
-
except JudgmentAPIException as e:
|
247
|
-
if e.status_code == 500:
|
248
|
-
raise JudgmentAPIException(
|
249
|
-
f"The server is temporarily unavailable. Please try your request again in a few moments. Error details: {e.error_detail}",
|
250
|
-
response=e.response,
|
251
|
-
request=e.request,
|
252
|
-
)
|
253
|
-
raise JudgmentAPIException(
|
254
|
-
f"Failed to fetch classifier scorer '{name}': {e.error_detail}",
|
255
|
-
response=e.response,
|
256
|
-
request=e.request,
|
257
|
-
)
|
258
|
-
|
259
|
-
def scorer_exists(self, name: str):
|
260
|
-
payload: ScorerExistsPayload = {"name": name}
|
261
|
-
try:
|
262
|
-
return self._do_request("POST", JUDGMENT_SCORER_EXISTS_API_URL, payload)
|
263
|
-
except JudgmentAPIException as e:
|
264
|
-
if e.status_code == 500:
|
265
|
-
raise JudgmentAPIException(
|
266
|
-
f"The server is temporarily unavailable. Please try your request again in a few moments. Error details: {e.error_detail}",
|
267
|
-
response=e.response,
|
268
|
-
request=e.request,
|
269
|
-
)
|
270
|
-
raise JudgmentAPIException(
|
271
|
-
f"Failed to check if scorer exists: {e.error_detail}",
|
272
|
-
response=e.response,
|
273
|
-
request=e.request,
|
274
|
-
)
|
275
|
-
|
276
|
-
def upload_custom_scorer(
|
277
|
-
self,
|
278
|
-
scorer_name: str,
|
279
|
-
scorer_code: str,
|
280
|
-
requirements_text: str,
|
281
|
-
) -> CustomScorerTemplateResponse:
|
282
|
-
"""Upload custom scorer to backend"""
|
283
|
-
payload: CustomScorerUploadPayload = {
|
284
|
-
"scorer_name": scorer_name,
|
285
|
-
"scorer_code": scorer_code,
|
286
|
-
"requirements_text": requirements_text,
|
287
|
-
}
|
288
|
-
|
289
|
-
try:
|
290
|
-
# Use longer timeout for custom scorer upload (5 minutes)
|
291
|
-
response = self._do_request(
|
292
|
-
"POST",
|
293
|
-
JUDGMENT_CUSTOM_SCORER_UPLOAD_API_URL,
|
294
|
-
payload,
|
295
|
-
timeout=(10, 300),
|
296
|
-
)
|
297
|
-
return response
|
298
|
-
except JudgmentAPIException as e:
|
299
|
-
raise e
|
300
|
-
|
301
|
-
def push_dataset(
|
302
|
-
self,
|
303
|
-
dataset_alias: str,
|
304
|
-
project_name: str,
|
305
|
-
examples: List[Dict[str, Any]],
|
306
|
-
traces: List[Dict[str, Any]],
|
307
|
-
overwrite: bool = False,
|
308
|
-
):
|
309
|
-
payload: DatasetPushPayload = {
|
310
|
-
"dataset_alias": dataset_alias,
|
311
|
-
"project_name": project_name,
|
312
|
-
"examples": examples,
|
313
|
-
"traces": traces,
|
314
|
-
"overwrite": overwrite,
|
315
|
-
}
|
316
|
-
return self._do_request("POST", JUDGMENT_DATASETS_PUSH_API_URL, payload)
|
317
|
-
|
318
|
-
def append_examples(
|
319
|
-
self, dataset_alias: str, project_name: str, examples: List[Dict[str, Any]]
|
320
|
-
):
|
321
|
-
payload: DatasetAppendPayload = {
|
322
|
-
"dataset_alias": dataset_alias,
|
323
|
-
"project_name": project_name,
|
324
|
-
"examples": examples,
|
325
|
-
}
|
326
|
-
return self._do_request(
|
327
|
-
"POST", JUDGMENT_DATASETS_APPEND_EXAMPLES_API_URL, payload
|
328
|
-
)
|
329
|
-
|
330
|
-
def append_traces(
|
331
|
-
self, dataset_alias: str, project_name: str, traces: List[Dict[str, Any]]
|
332
|
-
):
|
333
|
-
payload: DatasetAppendPayload = {
|
334
|
-
"dataset_alias": dataset_alias,
|
335
|
-
"project_name": project_name,
|
336
|
-
"traces": traces,
|
337
|
-
}
|
338
|
-
return self._do_request(
|
339
|
-
"POST", JUDGMENT_DATASETS_APPEND_TRACES_API_URL, payload
|
340
|
-
)
|
341
|
-
|
342
|
-
def pull_dataset(self, dataset_alias: str, project_name: str):
|
343
|
-
payload: DatasetPullPayload = {
|
344
|
-
"dataset_alias": dataset_alias,
|
345
|
-
"project_name": project_name,
|
346
|
-
}
|
347
|
-
return self._do_request("POST", JUDGMENT_DATASETS_PULL_API_URL, payload)
|
348
|
-
|
349
|
-
def delete_dataset(self, dataset_alias: str, project_name: str):
|
350
|
-
payload: DatasetDeletePayload = {
|
351
|
-
"dataset_alias": dataset_alias,
|
352
|
-
"project_name": project_name,
|
353
|
-
}
|
354
|
-
return self._do_request("POST", JUDGMENT_DATASETS_DELETE_API_URL, payload)
|
355
|
-
|
356
|
-
def get_project_dataset_stats(self, project_name: str):
|
357
|
-
payload: DatasetStatsPayload = {"project_name": project_name}
|
358
|
-
return self._do_request(
|
359
|
-
"POST", JUDGMENT_DATASETS_PROJECT_STATS_API_URL, payload
|
360
|
-
)
|
361
|
-
|
362
|
-
def _headers(self) -> Dict[str, str]:
|
363
|
-
return {
|
364
|
-
"Content-Type": "application/json",
|
365
|
-
"Authorization": f"Bearer {self.api_key}",
|
366
|
-
"X-Organization-Id": self.organization_id,
|
367
|
-
}
|
368
|
-
|
369
|
-
def _request_kwargs(self):
|
370
|
-
# NOTE: We may want to configure custom kwargs that different requests may need.
|
371
|
-
# For this purpose we can store that as a property of self, and return the appropriate kwargs from this method.
|
372
|
-
return {
|
373
|
-
"verify": True,
|
374
|
-
"timeout": 30,
|
375
|
-
}
|