judgeval 0.7.1__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. judgeval/__init__.py +139 -12
  2. judgeval/api/__init__.py +501 -0
  3. judgeval/api/api_types.py +344 -0
  4. judgeval/cli.py +2 -4
  5. judgeval/constants.py +10 -26
  6. judgeval/data/evaluation_run.py +49 -26
  7. judgeval/data/example.py +2 -2
  8. judgeval/data/judgment_types.py +266 -82
  9. judgeval/data/result.py +4 -5
  10. judgeval/data/scorer_data.py +4 -2
  11. judgeval/data/tool.py +2 -2
  12. judgeval/data/trace.py +7 -50
  13. judgeval/data/trace_run.py +7 -4
  14. judgeval/{dataset.py → dataset/__init__.py} +43 -28
  15. judgeval/env.py +67 -0
  16. judgeval/{run_evaluation.py → evaluation/__init__.py} +29 -95
  17. judgeval/exceptions.py +27 -0
  18. judgeval/integrations/langgraph/__init__.py +788 -0
  19. judgeval/judges/__init__.py +2 -2
  20. judgeval/judges/litellm_judge.py +75 -15
  21. judgeval/judges/together_judge.py +86 -18
  22. judgeval/judges/utils.py +7 -21
  23. judgeval/{common/logger.py → logger.py} +8 -6
  24. judgeval/scorers/__init__.py +0 -4
  25. judgeval/scorers/agent_scorer.py +3 -7
  26. judgeval/scorers/api_scorer.py +8 -13
  27. judgeval/scorers/base_scorer.py +52 -32
  28. judgeval/scorers/example_scorer.py +1 -3
  29. judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -14
  30. judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +45 -20
  31. judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +2 -2
  32. judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +3 -3
  33. judgeval/scorers/score.py +21 -31
  34. judgeval/scorers/trace_api_scorer.py +5 -0
  35. judgeval/scorers/utils.py +1 -103
  36. judgeval/tracer/__init__.py +1075 -2
  37. judgeval/tracer/constants.py +1 -0
  38. judgeval/tracer/exporters/__init__.py +37 -0
  39. judgeval/tracer/exporters/s3.py +119 -0
  40. judgeval/tracer/exporters/store.py +43 -0
  41. judgeval/tracer/exporters/utils.py +32 -0
  42. judgeval/tracer/keys.py +67 -0
  43. judgeval/tracer/llm/__init__.py +1233 -0
  44. judgeval/{common/tracer → tracer/llm}/providers.py +5 -10
  45. judgeval/{local_eval_queue.py → tracer/local_eval_queue.py} +15 -10
  46. judgeval/tracer/managers.py +188 -0
  47. judgeval/tracer/processors/__init__.py +181 -0
  48. judgeval/tracer/utils.py +20 -0
  49. judgeval/trainer/__init__.py +5 -0
  50. judgeval/{common/trainer → trainer}/config.py +12 -9
  51. judgeval/{common/trainer → trainer}/console.py +2 -9
  52. judgeval/{common/trainer → trainer}/trainable_model.py +12 -7
  53. judgeval/{common/trainer → trainer}/trainer.py +119 -17
  54. judgeval/utils/async_utils.py +2 -3
  55. judgeval/utils/decorators.py +24 -0
  56. judgeval/utils/file_utils.py +37 -4
  57. judgeval/utils/guards.py +32 -0
  58. judgeval/utils/meta.py +14 -0
  59. judgeval/{common/api/json_encoder.py → utils/serialize.py} +7 -1
  60. judgeval/utils/testing.py +88 -0
  61. judgeval/utils/url.py +10 -0
  62. judgeval/{version_check.py → utils/version_check.py} +3 -3
  63. judgeval/version.py +5 -0
  64. judgeval/warnings.py +4 -0
  65. {judgeval-0.7.1.dist-info → judgeval-0.9.0.dist-info}/METADATA +12 -14
  66. judgeval-0.9.0.dist-info/RECORD +80 -0
  67. judgeval/clients.py +0 -35
  68. judgeval/common/__init__.py +0 -13
  69. judgeval/common/api/__init__.py +0 -3
  70. judgeval/common/api/api.py +0 -375
  71. judgeval/common/api/constants.py +0 -186
  72. judgeval/common/exceptions.py +0 -27
  73. judgeval/common/storage/__init__.py +0 -6
  74. judgeval/common/storage/s3_storage.py +0 -97
  75. judgeval/common/tracer/__init__.py +0 -31
  76. judgeval/common/tracer/constants.py +0 -22
  77. judgeval/common/tracer/core.py +0 -2427
  78. judgeval/common/tracer/otel_exporter.py +0 -108
  79. judgeval/common/tracer/otel_span_processor.py +0 -188
  80. judgeval/common/tracer/span_processor.py +0 -37
  81. judgeval/common/tracer/span_transformer.py +0 -207
  82. judgeval/common/tracer/trace_manager.py +0 -101
  83. judgeval/common/trainer/__init__.py +0 -5
  84. judgeval/common/utils.py +0 -948
  85. judgeval/integrations/langgraph.py +0 -844
  86. judgeval/judges/mixture_of_judges.py +0 -287
  87. judgeval/judgment_client.py +0 -267
  88. judgeval/rules.py +0 -521
  89. judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -52
  90. judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -28
  91. judgeval/utils/alerts.py +0 -93
  92. judgeval/utils/requests.py +0 -50
  93. judgeval-0.7.1.dist-info/RECORD +0 -82
  94. {judgeval-0.7.1.dist-info → judgeval-0.9.0.dist-info}/WHEEL +0 -0
  95. {judgeval-0.7.1.dist-info → judgeval-0.9.0.dist-info}/entry_points.txt +0 -0
  96. {judgeval-0.7.1.dist-info → judgeval-0.9.0.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,80 @@
1
+ judgeval/__init__.py,sha256=w6X0PmRy4_syBWPrK0sMoTDnEwUhsorPIg5iBKnNGUs,4907
2
+ judgeval/cli.py,sha256=R5IiIQmSVg21kQHX2kL3sOeXCxvvAMSqyva3Z9AoSXc,1560
3
+ judgeval/constants.py,sha256=fqzSY7tDfseWy1trLjCSGC6WVOFEm_4hvA8IFpv7CUc,3683
4
+ judgeval/env.py,sha256=R0bj7XU29RIVVQjkVMa11ObhOYVMbaE_3LTvL3I9dWM,2212
5
+ judgeval/exceptions.py,sha256=tTbfe4yoOtPXmn22UQz9-6a-5PT9uOko85xaRRwr0Sw,621
6
+ judgeval/logger.py,sha256=ZWbp0QfT1CJnQIjV-Zle4n489nFCKEmD2-ukx--iiow,1553
7
+ judgeval/version.py,sha256=kJtYsih3hTYZ_rY_Lt0RcFqvjAfF5Xo1uNq0jZWJ5pw,73
8
+ judgeval/warnings.py,sha256=LbGte14ppiFjrkp-JJYueZ40NWFvMkWRvPXr6r-fUWw,73
9
+ judgeval/api/__init__.py,sha256=DZ-dijtkzUsjY3CBdCh6TH_PHC5qlI_tAFCBgvAZNjU,14538
10
+ judgeval/api/api_types.py,sha256=4xyqlmV9mEoTUIbii-bj7oS0fVwWrJ_UhYxpXvcBywA,9198
11
+ judgeval/data/__init__.py,sha256=1QagDcSQtfnJ632t9Dnq8d7XjAqhmY4mInOWt8qH9tM,455
12
+ judgeval/data/evaluation_run.py,sha256=G7ad4eDQTjketfcQRITk8bs8CIO8rm058H1G_qkLmhc,4729
13
+ judgeval/data/example.py,sha256=aTZg0GWQmUEBHk1n9Asw8sz-8YBWKlFsMZYjwq1DfrI,917
14
+ judgeval/data/judgment_types.py,sha256=b2pDeEOSl_zHJLDzqr0AGYbZ5zrooJMr5VmK-bDrN4o,17082
15
+ judgeval/data/result.py,sha256=JQ6f0XzL9p0oPmx-_z2NKUcISO6pISsVZ5dT1jkBeZs,2120
16
+ judgeval/data/scorer_data.py,sha256=g9PE0DNLikW0LgxGWhgpCiNVOX8PzqEaZKivifLOUDI,2997
17
+ judgeval/data/tool.py,sha256=bj_WxFg22mypUUVR5KqQRxMDHWvKwiE1MMPjLnTCoDU,99
18
+ judgeval/data/trace.py,sha256=HTeucJqNdFsQI3Ybb6mJ8NkmHkc1vIddzQ7BtQs25k8,1315
19
+ judgeval/data/trace_run.py,sha256=VCQUdDlrHixyiqWW1RUiCtLgqMt-3oW1M1A7CCer2Ok,1635
20
+ judgeval/data/scripts/fix_default_factory.py,sha256=lvp2JwYZqz-XpD9LZNa3mANZVP-jJSZoNzolI6JWERM,591
21
+ judgeval/data/scripts/openapi_transform.py,sha256=Sm04JClzyP1ga8KA3gkIdsae8Hlx-XU7-x0gHCQYOhg,3877
22
+ judgeval/dataset/__init__.py,sha256=xlg4VgEvbReWOlk6MK4GqJubSOeo17VqoIyjdMwmIf8,6573
23
+ judgeval/evaluation/__init__.py,sha256=O0sk3zP5jbHPtknT6DuB6ijkJ3-0I54mMf1UzDPKMF0,15409
24
+ judgeval/integrations/langgraph/__init__.py,sha256=VvqCKOk65A2gLlr8uWrJVzpRF5OnIja5zwF4hGPEFsw,27540
25
+ judgeval/judges/__init__.py,sha256=e7JnTc1TG_SwqydDHTXHIP0EBazQxt-ydMQG7ghSU5A,228
26
+ judgeval/judges/base_judge.py,sha256=_dz0qWsKRxzXxpRY9l6mrxTRYPSF2FE4ZXkrzhZ4gbY,986
27
+ judgeval/judges/litellm_judge.py,sha256=5vEF0IUo7HVWnOF2ww-DMke8Xkarnz32B_qbgKjc0-I,4182
28
+ judgeval/judges/together_judge.py,sha256=GzwlXZJzle8hT-vWKmq39JyIeanJqJfHDOkrksUbzk0,4398
29
+ judgeval/judges/utils.py,sha256=ITbYwvjU3o9-FIAReFvxh24yJrx9LV3l9BnSBgKUpxg,2068
30
+ judgeval/scorers/__init__.py,sha256=a5f_QcC7P9DjoOu_DMmADlkIXebo0d3zEJDJ7mhN3tM,640
31
+ judgeval/scorers/agent_scorer.py,sha256=V1NSwhGWgtXPsX-blKLkDLsPPbEiP-A4614X-95dtlQ,565
32
+ judgeval/scorers/api_scorer.py,sha256=M7cwJ2YY2Mw0pCo1UH-29jwrNd2PdiBRdQtmWS5ijXA,2173
33
+ judgeval/scorers/base_scorer.py,sha256=8uhkmj78R6-Stenl1eo6IVqKSBgkLpoqR0acGi-Fxik,2788
34
+ judgeval/scorers/example_scorer.py,sha256=o_BGUztJXjnKnuOqIa9T4PXe0wPoWg63FyH518N1LxA,561
35
+ judgeval/scorers/exceptions.py,sha256=ACDHK5-TWiF3NTk-wycaedpbrdobm-CvvC1JA_iP-Mk,179
36
+ judgeval/scorers/score.py,sha256=xquM59SCtNeuAsrBsHFgBQk3CHp4-bms4oFs24xfcU0,7176
37
+ judgeval/scorers/trace_api_scorer.py,sha256=B2Vp8Jj2I7N-G1weHMm1b_9gVbn0BMcOtestMFNtx08,112
38
+ judgeval/scorers/utils.py,sha256=iSZONwK0HecxUPz-cMCyra_87DSCag1E8BdpF2a4_44,377
39
+ judgeval/scorers/judgeval_scorers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
+ judgeval/scorers/judgeval_scorers/api_scorers/__init__.py,sha256=MFsxDPZoZibJlsz4RgtLehA3rVcEfS9o5cw0l8gI5IM,1046
41
+ judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py,sha256=zJsU0VrUmRhY9qav48c6jTyDqUwI3JzhV9ajtlJCe0M,544
42
+ judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py,sha256=UDfzTO9Fx0FA5o0wfD8kprrGA4eW-43Rn9Gc0BQtKgY,393
43
+ judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py,sha256=mbBvirNcivu9dP6deM7FogDXrdwI9o8yqsO8IeKPSb4,309
44
+ judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py,sha256=ps51bTgQsD9xGYsk1v9bx0WxQMqywSllCE9_xlJkLd8,531
45
+ judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py,sha256=aQzu-TiGqG74JDQ927evv5yGmnZw2AOolyHvlIhiUbI,683
46
+ judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py,sha256=C-9Q7s9K7mcgFMcEL0I_7XQZMRqrL5MFRi9G6Dx8-v8,8505
47
+ judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py,sha256=BhrLnIASZOTT9XJ6giYSoVfdR7NYsjRRTOTNioNtEiU,610
48
+ judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py,sha256=bMu0WMJaXdMyDTN42sVLoWV-lrUHCEa8iDrCI_K7nlQ,808
49
+ judgeval/tracer/__init__.py,sha256=WmRmfvBOkzwEaAljwJ_ClNDJt1zB6J562G5BYXNMySY,36914
50
+ judgeval/tracer/constants.py,sha256=ae8tivAW97awJQxdRB9OMqX50wOLX3zqChT_AGkPBu0,85
51
+ judgeval/tracer/keys.py,sha256=qXPoZSkEhVF-YYfQ9-zeDMVdr4GtpPf2W7MPJaN2AQo,2889
52
+ judgeval/tracer/local_eval_queue.py,sha256=Amt7xkdmVJH1l2itm-ogiIW5oDaLnACisGfsdZjazn0,7228
53
+ judgeval/tracer/managers.py,sha256=h2ZHJ61_vf3cS-HlEUiodFzKDUuQWIhYC6n7pMVyM9c,6113
54
+ judgeval/tracer/utils.py,sha256=jljfr-oiCy8agOh0apAoR04tR2XRAzFg51On_LPzue8,600
55
+ judgeval/tracer/exporters/__init__.py,sha256=lnZXfPGaQH844HAIuZCQqjqhnmZGA98kHY8Xp-Oi4Ws,1220
56
+ judgeval/tracer/exporters/s3.py,sha256=N9gmw17cnR0VkfAQQkLsNj5BksgNRETThR5qYhWRjP4,4360
57
+ judgeval/tracer/exporters/store.py,sha256=KQV3cyqteesByQjR-9VdPXT9OlUZ-6F08ogqj837_c0,1012
58
+ judgeval/tracer/exporters/utils.py,sha256=JRcoSQuEHxMDJbXfyrUIfA2SHBVkZM82h4bTbYGxkNw,1154
59
+ judgeval/tracer/llm/__init__.py,sha256=p9uwWPg9k-NcWjj9TbwQj55sHhBOqRYx2-Ld6YHaFUs,42625
60
+ judgeval/tracer/llm/providers.py,sha256=QQLJlSNnDjXRAc2Wqw78o254COJUSXX39D7D_mx3NVA,2651
61
+ judgeval/tracer/processors/__init__.py,sha256=Fpm_cocYj3fwWAE9hcVder6TcYcycbXJWpmdyOjw5uY,5802
62
+ judgeval/trainer/__init__.py,sha256=h_DDVV7HFF7HUPAJFpt2d9wjqgnmEVcHxqZyB1k7pPQ,257
63
+ judgeval/trainer/config.py,sha256=8s0X8B334PJomorwONaUpb6K8cAMxRdYAeQdtx7HPHs,4258
64
+ judgeval/trainer/console.py,sha256=PJ0rCnDwC7aoW-VsLDS96ZyMyagh-l9EOJKff1ATIpo,4342
65
+ judgeval/trainer/trainable_model.py,sha256=vSDtHJJ-fLczC2gkaY9jG6TQvLgWqaVjElm1l8YlJcU,8959
66
+ judgeval/trainer/trainer.py,sha256=_dlV0NSD4jfNgTb2GwghWGBmnoNsooQq85nvIWW5VR4,16550
67
+ judgeval/utils/async_utils.py,sha256=lgCgi8gkLUcAEepruEkx-AGQgJnAJpKmBIhZx6Y0q2s,935
68
+ judgeval/utils/decorators.py,sha256=rdqY1w0zNL6O6GU6Wdeo0-x5EgpFTEhU2vkgiWsRYdc,525
69
+ judgeval/utils/file_utils.py,sha256=3LI1YCZwO5ogTgJreyOgRgDksey3natO2Td1PQqaPyY,3252
70
+ judgeval/utils/guards.py,sha256=QBb6m6KElxdvt2bskLZCKh_zGHbBcqV-VfGzT63o3hY,807
71
+ judgeval/utils/meta.py,sha256=wQFCLJTNKF9yUdXcw37AT6mC-wqzZpAvjn5gP_6flD8,349
72
+ judgeval/utils/serialize.py,sha256=AtpdMQXC03xhImLJjSNCi-PqsUek2mcwvtd2ryrATTQ,6057
73
+ judgeval/utils/testing.py,sha256=kJOq4LlEXaNThfg9oSIRqSK7IH8AwLgbukjn5uxMY7A,3661
74
+ judgeval/utils/url.py,sha256=Shf0v3XcbaWpL0m1eGJEEO_z4TsQCnDB2Rl25OTUmiI,195
75
+ judgeval/utils/version_check.py,sha256=kcF6SvB6GbVKI0Gv9QRVm-kvBn9_z-c3jmPORsXO3h0,1015
76
+ judgeval-0.9.0.dist-info/METADATA,sha256=LHcfmmHNxDbQQ2jrSTHqkjxYQkK11MMtj_4Jkd9wASs,8869
77
+ judgeval-0.9.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
78
+ judgeval-0.9.0.dist-info/entry_points.txt,sha256=-eoeD-oDLn4A7MSgeBS9Akwanf3_0r0cgEleBcIOjg0,46
79
+ judgeval-0.9.0.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
80
+ judgeval-0.9.0.dist-info/RECORD,,
judgeval/clients.py DELETED
@@ -1,35 +0,0 @@
1
- import os
2
- from dotenv import load_dotenv
3
- from openai import OpenAI
4
- from typing import Optional
5
-
6
- PATH_TO_DOTENV = os.path.join(os.path.dirname(__file__), ".env")
7
- load_dotenv(dotenv_path=PATH_TO_DOTENV)
8
-
9
-
10
- # Initialize optional OpenAI client
11
- client: Optional["OpenAI"] = None
12
- if os.getenv("OPENAI_API_KEY"):
13
- try:
14
- from openai import OpenAI
15
-
16
- client = OpenAI()
17
- except ImportError:
18
- # openai package not installed
19
- pass
20
-
21
- # Initialize optional Together clients
22
- together_client: Optional["Together"] = None
23
- async_together_client: Optional["AsyncTogether"] = None
24
-
25
- # Only initialize Together clients if API key is available
26
-
27
- together_api_key = os.getenv("TOGETHERAI_API_KEY") or os.getenv("TOGETHER_API_KEY")
28
- if together_api_key:
29
- try:
30
- from together import Together, AsyncTogether
31
-
32
- together_client = Together(api_key=together_api_key)
33
- async_together_client = AsyncTogether(api_key=together_api_key)
34
- except Exception:
35
- pass
@@ -1,13 +0,0 @@
1
- from judgeval.common.utils import (
2
- get_chat_completion,
3
- aget_chat_completion,
4
- get_completion_multiple_models,
5
- aget_completion_multiple_models,
6
- )
7
-
8
- __all__ = [
9
- "get_chat_completion",
10
- "aget_chat_completion",
11
- "get_completion_multiple_models",
12
- "aget_completion_multiple_models",
13
- ]
@@ -1,3 +0,0 @@
1
- from .api import JudgmentApiClient, JudgmentAPIException
2
-
3
- __all__ = ["JudgmentApiClient", "JudgmentAPIException"]
@@ -1,375 +0,0 @@
1
- from typing import Literal, List, Dict, Any, Union, Optional
2
- from requests import exceptions
3
- from judgeval.common.api.constants import (
4
- JUDGMENT_TRACES_FETCH_API_URL,
5
- JUDGMENT_TRACES_UPSERT_API_URL,
6
- JUDGMENT_TRACES_DELETE_API_URL,
7
- JUDGMENT_TRACES_SPANS_BATCH_API_URL,
8
- JUDGMENT_TRACES_EVALUATION_RUNS_BATCH_API_URL,
9
- JUDGMENT_DATASETS_PUSH_API_URL,
10
- JUDGMENT_DATASETS_APPEND_EXAMPLES_API_URL,
11
- JUDGMENT_DATASETS_PULL_API_URL,
12
- JUDGMENT_DATASETS_DELETE_API_URL,
13
- JUDGMENT_DATASETS_PROJECT_STATS_API_URL,
14
- JUDGMENT_PROJECT_DELETE_API_URL,
15
- JUDGMENT_PROJECT_CREATE_API_URL,
16
- JUDGMENT_EVAL_API_URL,
17
- JUDGMENT_TRACE_EVAL_API_URL,
18
- JUDGMENT_EVAL_LOG_API_URL,
19
- JUDGMENT_EVAL_FETCH_API_URL,
20
- JUDGMENT_EVAL_DELETE_API_URL,
21
- JUDGMENT_ADD_TO_RUN_EVAL_QUEUE_API_URL,
22
- JUDGMENT_GET_EVAL_STATUS_API_URL,
23
- JUDGMENT_SCORER_SAVE_API_URL,
24
- JUDGMENT_SCORER_FETCH_API_URL,
25
- JUDGMENT_SCORER_EXISTS_API_URL,
26
- JUDGMENT_CUSTOM_SCORER_UPLOAD_API_URL,
27
- JUDGMENT_DATASETS_APPEND_TRACES_API_URL,
28
- )
29
- from judgeval.common.api.constants import (
30
- TraceFetchPayload,
31
- TraceDeletePayload,
32
- SpansBatchPayload,
33
- EvaluationEntryResponse,
34
- EvaluationRunsBatchPayload,
35
- DatasetPushPayload,
36
- DatasetAppendPayload,
37
- DatasetPullPayload,
38
- DatasetDeletePayload,
39
- DatasetStatsPayload,
40
- ProjectCreatePayload,
41
- ProjectDeletePayload,
42
- EvalRunRequestBody,
43
- DeleteEvalRunRequestBody,
44
- EvalLogPayload,
45
- EvalStatusPayload,
46
- ScorerSavePayload,
47
- ScorerFetchPayload,
48
- ScorerExistsPayload,
49
- CustomScorerUploadPayload,
50
- CustomScorerTemplateResponse,
51
- )
52
- from judgeval.utils.requests import requests
53
- from judgeval.common.api.json_encoder import json_encoder
54
-
55
-
56
- class JudgmentAPIException(exceptions.HTTPError):
57
- """
58
- Exception raised when an error occurs while executing a Judgment API request.
59
- Extends requests.exceptions.HTTPError to provide access to the response object.
60
- """
61
-
62
- def __init__(self, message: str, response=None, request=None):
63
- super().__init__(message, response=response, request=request)
64
- self.message = message
65
- self.response = response
66
- self.request = request
67
-
68
- @property
69
- def status_code(self) -> Union[int, None]:
70
- """Get the HTTP status code from the response."""
71
- return self.response.status_code if self.response else None
72
-
73
- @property
74
- def response_json(self) -> Dict[str, Any]:
75
- """Get the JSON response body."""
76
- try:
77
- return self.response.json() if self.response else {}
78
- except (ValueError, AttributeError):
79
- return {}
80
-
81
- @property
82
- def error_detail(self) -> str:
83
- """Get the error detail from the response JSON."""
84
- return self.response_json.get("detail", "An unknown error occurred.")
85
-
86
-
87
- class JudgmentApiClient:
88
- def __init__(self, api_key: str, organization_id: str):
89
- self.api_key = api_key
90
- self.organization_id = organization_id
91
-
92
- def _do_request(
93
- self,
94
- method: Literal["POST", "PATCH", "GET", "DELETE"],
95
- url: str,
96
- payload: Any,
97
- timeout: Optional[Union[float, tuple]] = None,
98
- ) -> Any:
99
- # Prepare request kwargs with optional timeout
100
- request_kwargs = self._request_kwargs()
101
- if timeout is not None:
102
- request_kwargs["timeout"] = timeout
103
-
104
- if method == "GET":
105
- r = requests.request(
106
- method,
107
- url,
108
- params=payload,
109
- headers=self._headers(),
110
- **request_kwargs,
111
- )
112
- else:
113
- r = requests.request(
114
- method,
115
- url,
116
- json=json_encoder(payload),
117
- headers=self._headers(),
118
- **request_kwargs,
119
- )
120
-
121
- try:
122
- r.raise_for_status()
123
- except exceptions.HTTPError as e:
124
- try:
125
- detail = r.json().get("detail", "")
126
- except Exception:
127
- detail = r.text
128
-
129
- raise JudgmentAPIException(
130
- f"HTTP {r.status_code}: {r.reason}, {detail}",
131
- response=r,
132
- request=e.request,
133
- )
134
-
135
- return r.json()
136
-
137
- def send_spans_batch(self, spans: List[Dict[str, Any]]):
138
- payload: SpansBatchPayload = {
139
- "spans": spans,
140
- "organization_id": self.organization_id,
141
- }
142
-
143
- return self._do_request("POST", JUDGMENT_TRACES_SPANS_BATCH_API_URL, payload)
144
-
145
- def send_evaluation_runs_batch(
146
- self, evaluation_entries: List[EvaluationEntryResponse]
147
- ):
148
- payload: EvaluationRunsBatchPayload = {
149
- "organization_id": self.organization_id,
150
- "evaluation_entries": evaluation_entries,
151
- }
152
-
153
- return self._do_request(
154
- "POST", JUDGMENT_TRACES_EVALUATION_RUNS_BATCH_API_URL, payload
155
- )
156
-
157
- def fetch_trace(self, trace_id: str):
158
- payload: TraceFetchPayload = {"trace_id": trace_id}
159
- return self._do_request("POST", JUDGMENT_TRACES_FETCH_API_URL, payload)
160
-
161
- def upsert_trace(self, trace_data: Dict[str, Any]):
162
- return self._do_request("POST", JUDGMENT_TRACES_UPSERT_API_URL, trace_data)
163
-
164
- def delete_trace(self, trace_id: str):
165
- payload: TraceDeletePayload = {"trace_ids": [trace_id]}
166
- return self._do_request("DELETE", JUDGMENT_TRACES_DELETE_API_URL, payload)
167
-
168
- def delete_traces(self, trace_ids: List[str]):
169
- payload: TraceDeletePayload = {"trace_ids": trace_ids}
170
- return self._do_request("DELETE", JUDGMENT_TRACES_DELETE_API_URL, payload)
171
-
172
- def delete_project(self, project_name: str):
173
- payload: ProjectDeletePayload = {"project_name": project_name}
174
- return self._do_request("DELETE", JUDGMENT_PROJECT_DELETE_API_URL, payload)
175
-
176
- def create_project(self, project_name: str):
177
- payload: ProjectCreatePayload = {"project_name": project_name}
178
- return self._do_request("POST", JUDGMENT_PROJECT_CREATE_API_URL, payload)
179
-
180
- def run_evaluation(self, evaluation_run: Dict[str, Any]):
181
- return self._do_request("POST", JUDGMENT_EVAL_API_URL, evaluation_run)
182
-
183
- def run_trace_evaluation(self, trace_run: Dict[str, Any]):
184
- return self._do_request("POST", JUDGMENT_TRACE_EVAL_API_URL, trace_run)
185
-
186
- def log_evaluation_results(
187
- self, results: List[Dict[str, Any]], run: Dict[str, Any]
188
- ):
189
- payload: EvalLogPayload = {"results": results, "run": run}
190
- return self._do_request("POST", JUDGMENT_EVAL_LOG_API_URL, payload)
191
-
192
- def fetch_evaluation_results(self, experiment_run_id: str, project_name: str):
193
- payload: EvalRunRequestBody = {
194
- "project_name": project_name,
195
- "experiment_run_id": experiment_run_id,
196
- }
197
- return self._do_request("POST", JUDGMENT_EVAL_FETCH_API_URL, payload)
198
-
199
- def delete_evaluation_results(self, project_name: str, eval_names: List[str]):
200
- payload: DeleteEvalRunRequestBody = {
201
- "project_name": project_name,
202
- "eval_names": eval_names,
203
- "judgment_api_key": self.api_key,
204
- }
205
- return self._do_request("POST", JUDGMENT_EVAL_DELETE_API_URL, payload)
206
-
207
- def add_to_evaluation_queue(self, payload: Dict[str, Any]):
208
- return self._do_request("POST", JUDGMENT_ADD_TO_RUN_EVAL_QUEUE_API_URL, payload)
209
-
210
- def get_evaluation_status(self, experiment_run_id: str, project_name: str):
211
- payload: EvalStatusPayload = {
212
- "experiment_run_id": experiment_run_id,
213
- "project_name": project_name,
214
- "judgment_api_key": self.api_key,
215
- }
216
- return self._do_request("GET", JUDGMENT_GET_EVAL_STATUS_API_URL, payload)
217
-
218
- def save_scorer(
219
- self, name: str, prompt: str, threshold: float, options: Optional[dict] = None
220
- ):
221
- payload: ScorerSavePayload = {
222
- "name": name,
223
- "prompt": prompt,
224
- "threshold": threshold,
225
- "options": options,
226
- }
227
- try:
228
- return self._do_request("POST", JUDGMENT_SCORER_SAVE_API_URL, payload)
229
- except JudgmentAPIException as e:
230
- if e.status_code == 500:
231
- raise JudgmentAPIException(
232
- f"The server is temporarily unavailable. Please try your request again in a few moments. Error details: {e.error_detail}",
233
- response=e.response,
234
- request=e.request,
235
- )
236
- raise JudgmentAPIException(
237
- f"Failed to save classifier scorer: {e.error_detail}",
238
- response=e.response,
239
- request=e.request,
240
- )
241
-
242
- def fetch_scorer(self, name: str):
243
- payload: ScorerFetchPayload = {"name": name}
244
- try:
245
- return self._do_request("POST", JUDGMENT_SCORER_FETCH_API_URL, payload)
246
- except JudgmentAPIException as e:
247
- if e.status_code == 500:
248
- raise JudgmentAPIException(
249
- f"The server is temporarily unavailable. Please try your request again in a few moments. Error details: {e.error_detail}",
250
- response=e.response,
251
- request=e.request,
252
- )
253
- raise JudgmentAPIException(
254
- f"Failed to fetch classifier scorer '{name}': {e.error_detail}",
255
- response=e.response,
256
- request=e.request,
257
- )
258
-
259
- def scorer_exists(self, name: str):
260
- payload: ScorerExistsPayload = {"name": name}
261
- try:
262
- return self._do_request("POST", JUDGMENT_SCORER_EXISTS_API_URL, payload)
263
- except JudgmentAPIException as e:
264
- if e.status_code == 500:
265
- raise JudgmentAPIException(
266
- f"The server is temporarily unavailable. Please try your request again in a few moments. Error details: {e.error_detail}",
267
- response=e.response,
268
- request=e.request,
269
- )
270
- raise JudgmentAPIException(
271
- f"Failed to check if scorer exists: {e.error_detail}",
272
- response=e.response,
273
- request=e.request,
274
- )
275
-
276
- def upload_custom_scorer(
277
- self,
278
- scorer_name: str,
279
- scorer_code: str,
280
- requirements_text: str,
281
- ) -> CustomScorerTemplateResponse:
282
- """Upload custom scorer to backend"""
283
- payload: CustomScorerUploadPayload = {
284
- "scorer_name": scorer_name,
285
- "scorer_code": scorer_code,
286
- "requirements_text": requirements_text,
287
- }
288
-
289
- try:
290
- # Use longer timeout for custom scorer upload (5 minutes)
291
- response = self._do_request(
292
- "POST",
293
- JUDGMENT_CUSTOM_SCORER_UPLOAD_API_URL,
294
- payload,
295
- timeout=(10, 300),
296
- )
297
- return response
298
- except JudgmentAPIException as e:
299
- raise e
300
-
301
- def push_dataset(
302
- self,
303
- dataset_alias: str,
304
- project_name: str,
305
- examples: List[Dict[str, Any]],
306
- traces: List[Dict[str, Any]],
307
- overwrite: bool = False,
308
- ):
309
- payload: DatasetPushPayload = {
310
- "dataset_alias": dataset_alias,
311
- "project_name": project_name,
312
- "examples": examples,
313
- "traces": traces,
314
- "overwrite": overwrite,
315
- }
316
- return self._do_request("POST", JUDGMENT_DATASETS_PUSH_API_URL, payload)
317
-
318
- def append_examples(
319
- self, dataset_alias: str, project_name: str, examples: List[Dict[str, Any]]
320
- ):
321
- payload: DatasetAppendPayload = {
322
- "dataset_alias": dataset_alias,
323
- "project_name": project_name,
324
- "examples": examples,
325
- }
326
- return self._do_request(
327
- "POST", JUDGMENT_DATASETS_APPEND_EXAMPLES_API_URL, payload
328
- )
329
-
330
- def append_traces(
331
- self, dataset_alias: str, project_name: str, traces: List[Dict[str, Any]]
332
- ):
333
- payload: DatasetAppendPayload = {
334
- "dataset_alias": dataset_alias,
335
- "project_name": project_name,
336
- "traces": traces,
337
- }
338
- return self._do_request(
339
- "POST", JUDGMENT_DATASETS_APPEND_TRACES_API_URL, payload
340
- )
341
-
342
- def pull_dataset(self, dataset_alias: str, project_name: str):
343
- payload: DatasetPullPayload = {
344
- "dataset_alias": dataset_alias,
345
- "project_name": project_name,
346
- }
347
- return self._do_request("POST", JUDGMENT_DATASETS_PULL_API_URL, payload)
348
-
349
- def delete_dataset(self, dataset_alias: str, project_name: str):
350
- payload: DatasetDeletePayload = {
351
- "dataset_alias": dataset_alias,
352
- "project_name": project_name,
353
- }
354
- return self._do_request("POST", JUDGMENT_DATASETS_DELETE_API_URL, payload)
355
-
356
- def get_project_dataset_stats(self, project_name: str):
357
- payload: DatasetStatsPayload = {"project_name": project_name}
358
- return self._do_request(
359
- "POST", JUDGMENT_DATASETS_PROJECT_STATS_API_URL, payload
360
- )
361
-
362
- def _headers(self) -> Dict[str, str]:
363
- return {
364
- "Content-Type": "application/json",
365
- "Authorization": f"Bearer {self.api_key}",
366
- "X-Organization-Id": self.organization_id,
367
- }
368
-
369
- def _request_kwargs(self):
370
- # NOTE: We may want to configure custom kwargs that different requests may need.
371
- # For this purpose we can store that as a property of self, and return the appropriate kwargs from this method.
372
- return {
373
- "verify": True,
374
- "timeout": 30,
375
- }