judgeval 0.0.29__tar.gz → 0.0.34__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. judgeval-0.0.34/.github/pull_request_template.md +31 -0
  2. judgeval-0.0.34/.github/workflows/ci.yaml +91 -0
  3. {judgeval-0.0.29 → judgeval-0.0.34}/PKG-INFO +15 -2
  4. {judgeval-0.0.29 → judgeval-0.0.34}/Pipfile +4 -1
  5. {judgeval-0.0.29 → judgeval-0.0.34}/Pipfile.lock +1112 -892
  6. {judgeval-0.0.29 → judgeval-0.0.34}/README.md +12 -0
  7. {judgeval-0.0.29 → judgeval-0.0.34}/docs/alerts/notifications.mdx +107 -15
  8. {judgeval-0.0.29 → judgeval-0.0.34}/docs/alerts/rules.mdx +55 -6
  9. judgeval-0.0.34/docs/api_reference/judgment_client.mdx +101 -0
  10. judgeval-0.0.34/docs/api_reference/trace.mdx +173 -0
  11. judgeval-0.0.34/docs/changelog/2025-04-21.mdx +19 -0
  12. judgeval-0.0.34/docs/clustering/clustering.mdx +68 -0
  13. judgeval-0.0.34/docs/evaluation/data_datasets.mdx +288 -0
  14. {judgeval-0.0.29 → judgeval-0.0.34}/docs/evaluation/data_examples.mdx +96 -7
  15. judgeval-0.0.34/docs/evaluation/data_sequences.mdx +80 -0
  16. judgeval-0.0.34/docs/evaluation/introduction.mdx +224 -0
  17. judgeval-0.0.34/docs/evaluation/judges.mdx +209 -0
  18. judgeval-0.0.34/docs/evaluation/scorers/agent/derailment.mdx +54 -0
  19. {judgeval-0.0.29 → judgeval-0.0.34}/docs/evaluation/scorers/custom_scorers.mdx +185 -0
  20. {judgeval-0.0.29/docs/evaluation/scorers → judgeval-0.0.34/docs/evaluation/scorers/default}/answer_correctness.mdx +31 -1
  21. {judgeval-0.0.29/docs/evaluation/scorers → judgeval-0.0.34/docs/evaluation/scorers/default}/answer_relevancy.mdx +29 -1
  22. {judgeval-0.0.29/docs/evaluation/scorers → judgeval-0.0.34/docs/evaluation/scorers/default}/comparison.mdx +44 -4
  23. {judgeval-0.0.29/docs/evaluation/scorers → judgeval-0.0.34/docs/evaluation/scorers/default}/contextual_precision.mdx +33 -1
  24. {judgeval-0.0.29/docs/evaluation/scorers → judgeval-0.0.34/docs/evaluation/scorers/default}/contextual_recall.mdx +33 -1
  25. {judgeval-0.0.29/docs/evaluation/scorers → judgeval-0.0.34/docs/evaluation/scorers/default}/contextual_relevancy.mdx +31 -1
  26. {judgeval-0.0.29/docs/evaluation/scorers → judgeval-0.0.34/docs/evaluation/scorers/default}/faithfulness.mdx +33 -2
  27. {judgeval-0.0.29/docs/evaluation/scorers → judgeval-0.0.34/docs/evaluation/scorers/default}/groundedness.mdx +1 -1
  28. {judgeval-0.0.29 → judgeval-0.0.34}/docs/evaluation/scorers/introduction.mdx +48 -25
  29. judgeval-0.0.34/docs/evaluation/unit_testing.mdx +93 -0
  30. {judgeval-0.0.29 → judgeval-0.0.34}/docs/getting_started.mdx +166 -188
  31. judgeval-0.0.34/docs/images/annotation_queue_ui.png +0 -0
  32. judgeval-0.0.34/docs/images/cluster.png +0 -0
  33. judgeval-0.0.34/docs/images/cluster_button.png +0 -0
  34. judgeval-0.0.34/docs/images/dashboard_annotation_queue_button.png +0 -0
  35. judgeval-0.0.34/docs/mcp_server/mcp_server.mdx +586 -0
  36. {judgeval-0.0.29 → judgeval-0.0.34}/docs/mint.json +35 -18
  37. judgeval-0.0.34/docs/monitoring/annotations.mdx +41 -0
  38. judgeval-0.0.34/docs/monitoring/tracing.mdx +443 -0
  39. {judgeval-0.0.29 → judgeval-0.0.34}/pyproject.toml +3 -2
  40. judgeval-0.0.34/src/demo/custom_scorer/main.py +43 -0
  41. judgeval-0.0.34/src/demo/custom_scorer/scorer.py +44 -0
  42. judgeval-0.0.34/src/demo/dataset.py +16 -0
  43. judgeval-0.0.34/src/demo/demo.py +54 -0
  44. judgeval-0.0.34/src/demo/demo2.py +144 -0
  45. judgeval-0.0.34/src/demo/new_bot/basic_bot.py +116 -0
  46. judgeval-0.0.34/src/demo/simple_trace.py +89 -0
  47. {judgeval-0.0.29/src/demo/new_trace → judgeval-0.0.34/src/demo/simplified_tracing}/example_complex_async.py +53 -35
  48. judgeval-0.0.34/src/demo/streaming_anthropic_demo.py +82 -0
  49. judgeval-0.0.34/src/demo/streaming_openai_demo.py +61 -0
  50. judgeval-0.0.34/src/demo/test.py +51 -0
  51. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/__init__.py +3 -1
  52. judgeval-0.0.34/src/judgeval/common/s3_storage.py +93 -0
  53. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/common/tracer.py +901 -177
  54. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/constants.py +5 -3
  55. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/data/__init__.py +4 -0
  56. judgeval-0.0.34/src/judgeval/data/custom_example.py +18 -0
  57. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/data/datasets/dataset.py +5 -1
  58. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/data/datasets/eval_dataset_client.py +64 -5
  59. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/data/example.py +1 -0
  60. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/data/result.py +7 -6
  61. judgeval-0.0.34/src/judgeval/data/sequence.py +49 -0
  62. judgeval-0.0.34/src/judgeval/data/sequence_run.py +44 -0
  63. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/evaluation_run.py +12 -7
  64. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/integrations/langgraph.py +89 -72
  65. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/judgment_client.py +86 -145
  66. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/rules.py +4 -7
  67. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/run_evaluation.py +87 -13
  68. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/__init__.py +6 -4
  69. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorer.py +3 -0
  70. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +2 -1
  71. judgeval-0.0.34/src/judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +21 -0
  72. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/score.py +6 -5
  73. judgeval-0.0.34/src/judgeval/version_check.py +22 -0
  74. judgeval-0.0.34/src/test.py +143 -0
  75. judgeval-0.0.29/.github/workflows/ci.yaml +0 -44
  76. judgeval-0.0.29/docs/api_reference/judgment_client.mdx +0 -61
  77. judgeval-0.0.29/docs/api_reference/trace.mdx +0 -82
  78. judgeval-0.0.29/docs/evaluation/data_datasets.mdx +0 -159
  79. judgeval-0.0.29/docs/evaluation/introduction.mdx +0 -111
  80. judgeval-0.0.29/docs/evaluation/judges.mdx +0 -88
  81. judgeval-0.0.29/docs/evaluation/scorers/hallucination.mdx +0 -54
  82. judgeval-0.0.29/docs/evaluation/unit_testing.mdx +0 -39
  83. judgeval-0.0.29/docs/judgment/introduction.mdx +0 -11
  84. judgeval-0.0.29/docs/monitoring/tracing.mdx +0 -214
  85. judgeval-0.0.29/src/demo/cookbooks/JNPR_Mist/test.py +0 -21
  86. judgeval-0.0.29/src/demo/cookbooks/linkd/text2sql.py +0 -14
  87. judgeval-0.0.29/src/demo/custom_example_demo/osiris_test.py +0 -22
  88. judgeval-0.0.29/src/demo/custom_example_demo/qodo_scorer.py +0 -78
  89. judgeval-0.0.29/src/demo/demo.py +0 -21
  90. judgeval-0.0.29/src/judgeval/data/custom_api_example.py +0 -91
  91. judgeval-0.0.29/src/judgeval/scorers/base_scorer.py +0 -58
  92. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/__init__.py +0 -169
  93. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/__init__.py +0 -27
  94. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/__init__.py +0 -4
  95. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/answer_correctness_scorer.py +0 -276
  96. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/prompts.py +0 -169
  97. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/__init__.py +0 -4
  98. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/answer_relevancy_scorer.py +0 -298
  99. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/prompts.py +0 -174
  100. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/comparison/comparison_scorer.py +0 -161
  101. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/comparison/prompts.py +0 -222
  102. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/__init__.py +0 -3
  103. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/contextual_precision_scorer.py +0 -264
  104. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/prompts.py +0 -106
  105. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/__init__.py +0 -3
  106. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/contextual_recall_scorer.py +0 -254
  107. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/prompts.py +0 -142
  108. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/__init__.py +0 -3
  109. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/contextual_relevancy_scorer.py +0 -245
  110. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/prompts.py +0 -121
  111. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/execution_order/__init__.py +0 -3
  112. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/execution_order/execution_order.py +0 -156
  113. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/__init__.py +0 -3
  114. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/faithfulness_scorer.py +0 -318
  115. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/prompts.py +0 -268
  116. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/hallucination/__init__.py +0 -3
  117. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/hallucination/hallucination_scorer.py +0 -264
  118. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/hallucination/prompts.py +0 -104
  119. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/instruction_adherence/instruction_adherence.py +0 -232
  120. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/instruction_adherence/prompt.py +0 -102
  121. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/__init__.py +0 -5
  122. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/json_correctness_scorer.py +0 -134
  123. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/summarization/__init__.py +0 -3
  124. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py +0 -247
  125. judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py +0 -551
  126. judgeval-0.0.29/src/test.py +0 -21
  127. {judgeval-0.0.29 → judgeval-0.0.34}/.gitignore +0 -0
  128. {judgeval-0.0.29 → judgeval-0.0.34}/LICENSE.md +0 -0
  129. {judgeval-0.0.29 → judgeval-0.0.34}/docs/README.md +0 -0
  130. {judgeval-0.0.29 → judgeval-0.0.34}/docs/alerts/platform_notifications.mdx +0 -0
  131. {judgeval-0.0.29 → judgeval-0.0.34}/docs/development.mdx +0 -0
  132. {judgeval-0.0.29 → judgeval-0.0.34}/docs/essentials/code.mdx +0 -0
  133. {judgeval-0.0.29 → judgeval-0.0.34}/docs/essentials/images.mdx +0 -0
  134. {judgeval-0.0.29 → judgeval-0.0.34}/docs/essentials/markdown.mdx +0 -0
  135. {judgeval-0.0.29 → judgeval-0.0.34}/docs/essentials/navigation.mdx +0 -0
  136. {judgeval-0.0.29 → judgeval-0.0.34}/docs/essentials/reusable-snippets.mdx +0 -0
  137. {judgeval-0.0.29 → judgeval-0.0.34}/docs/essentials/settings.mdx +0 -0
  138. {judgeval-0.0.29 → judgeval-0.0.34}/docs/evaluation/scorers/classifier_scorer.mdx +0 -0
  139. {judgeval-0.0.29/docs/evaluation/scorers → judgeval-0.0.34/docs/evaluation/scorers/default}/execution_order.mdx +0 -0
  140. {judgeval-0.0.29/docs/evaluation/scorers → judgeval-0.0.34/docs/evaluation/scorers/default}/json_correctness.mdx +0 -0
  141. {judgeval-0.0.29/docs/evaluation/scorers → judgeval-0.0.34/docs/evaluation/scorers/default}/summarization.mdx +0 -0
  142. {judgeval-0.0.29 → judgeval-0.0.34}/docs/favicon.svg +0 -0
  143. {judgeval-0.0.29 → judgeval-0.0.34}/docs/images/basic_trace_example.png +0 -0
  144. {judgeval-0.0.29 → judgeval-0.0.34}/docs/images/checks-passed.png +0 -0
  145. {judgeval-0.0.29 → judgeval-0.0.34}/docs/images/create_aggressive_scorer.png +0 -0
  146. {judgeval-0.0.29 → judgeval-0.0.34}/docs/images/create_scorer.png +0 -0
  147. {judgeval-0.0.29 → judgeval-0.0.34}/docs/images/evaluation_diagram.png +0 -0
  148. {judgeval-0.0.29 → judgeval-0.0.34}/docs/images/hero-dark.svg +0 -0
  149. {judgeval-0.0.29 → judgeval-0.0.34}/docs/images/hero-light.svg +0 -0
  150. {judgeval-0.0.29 → judgeval-0.0.34}/docs/images/notifications_page.png +0 -0
  151. {judgeval-0.0.29 → judgeval-0.0.34}/docs/images/online_eval_fault.png +0 -0
  152. {judgeval-0.0.29 → judgeval-0.0.34}/docs/images/reports_modal.png +0 -0
  153. {judgeval-0.0.29 → judgeval-0.0.34}/docs/images/trace_ss.png +0 -0
  154. {judgeval-0.0.29 → judgeval-0.0.34}/docs/integration/langgraph.mdx +0 -0
  155. {judgeval-0.0.29 → judgeval-0.0.34}/docs/introduction.mdx +0 -0
  156. {judgeval-0.0.29 → judgeval-0.0.34}/docs/logo/dark.svg +0 -0
  157. {judgeval-0.0.29 → judgeval-0.0.34}/docs/logo/light.svg +0 -0
  158. {judgeval-0.0.29 → judgeval-0.0.34}/docs/monitoring/introduction.mdx +0 -0
  159. {judgeval-0.0.29 → judgeval-0.0.34}/docs/monitoring/production_insights.mdx +0 -0
  160. {judgeval-0.0.29 → judgeval-0.0.34}/docs/notebooks/create_dataset.ipynb +0 -0
  161. {judgeval-0.0.29 → judgeval-0.0.34}/docs/notebooks/create_scorer.ipynb +0 -0
  162. {judgeval-0.0.29 → judgeval-0.0.34}/docs/notebooks/demo.ipynb +0 -0
  163. {judgeval-0.0.29 → judgeval-0.0.34}/docs/notebooks/prompt_scorer.ipynb +0 -0
  164. {judgeval-0.0.29 → judgeval-0.0.34}/docs/notebooks/quickstart.ipynb +0 -0
  165. {judgeval-0.0.29 → judgeval-0.0.34}/docs/quickstart.mdx +0 -0
  166. {judgeval-0.0.29 → judgeval-0.0.34}/docs/snippets/snippet-intro.mdx +0 -0
  167. {judgeval-0.0.29 → judgeval-0.0.34}/pytest.ini +0 -0
  168. {judgeval-0.0.29 → judgeval-0.0.34}/src/demo/travel_agent.py +0 -0
  169. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/clients.py +0 -0
  170. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/common/__init__.py +0 -0
  171. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/common/exceptions.py +0 -0
  172. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/common/logger.py +0 -0
  173. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/common/utils.py +0 -0
  174. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/data/datasets/__init__.py +0 -0
  175. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/data/scorer_data.py +0 -0
  176. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/judges/__init__.py +0 -0
  177. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/judges/base_judge.py +0 -0
  178. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/judges/litellm_judge.py +0 -0
  179. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/judges/mixture_of_judges.py +0 -0
  180. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/judges/together_judge.py +0 -0
  181. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/judges/utils.py +0 -0
  182. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/api_scorer.py +0 -0
  183. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/exceptions.py +0 -0
  184. {judgeval-0.0.29/src/judgeval/scorers/judgeval_scorers/local_implementations/comparison → judgeval-0.0.34/src/judgeval/scorers/judgeval_scorers}/__init__.py +0 -0
  185. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +0 -0
  186. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +0 -0
  187. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/api_scorers/comparison.py +0 -0
  188. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py +0 -0
  189. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py +0 -0
  190. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py +0 -0
  191. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -0
  192. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +0 -0
  193. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py +0 -0
  194. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -0
  195. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +0 -0
  196. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py +0 -0
  197. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/api_scorers/summarization.py +0 -0
  198. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/classifiers/__init__.py +0 -0
  199. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py +0 -0
  200. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py +0 -0
  201. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/prompt_scorer.py +0 -0
  202. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/scorers/utils.py +0 -0
  203. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/tracer/__init__.py +0 -0
  204. {judgeval-0.0.29 → judgeval-0.0.34}/src/judgeval/utils/alerts.py +0 -0
@@ -0,0 +1,31 @@
1
+ ## 📝 Summary
2
+
3
+ <!-- Provide a brief description of the changes introduced by this PR -->
4
+
5
+ ## 🎯 Purpose
6
+
7
+ <!-- Explain the motivation behind these changes. Why are they necessary? -->
8
+
9
+ ## 🎥 Demo of Changes
10
+
11
+ <!-- Add a short 1-3 minute video describing/demoing the changes -->
12
+
13
+ ## 🧪 Testing
14
+
15
+ <!-- Describe how the changes were tested (unit/manual) -->
16
+
17
+ ## ✅ Checklist
18
+
19
+ - [ ] Self-review
20
+ - [ ] Video demo of changes
21
+ - [ ] Unit Tests and CI/CD tests are passing
22
+ - [ ] Reviewers assigned
23
+
24
+
25
+ ## 📌 Linear Issue
26
+
27
+ <!-- Reference to associated Linear ticket, e.g., ABC-123 -->
28
+
29
+ ## ✏️ Additional Notes
30
+
31
+ <!-- Any additional information that doesn't fit into the other sections -->
@@ -0,0 +1,91 @@
1
+ name: CI
2
+
3
+ on:
4
+ pull_request_review:
5
+ types: [submitted]
6
+ branches:
7
+ - main
8
+
9
+ jobs:
10
+ run-tests:
11
+ strategy:
12
+ fail-fast: false
13
+ matrix:
14
+ os: [ubuntu-latest, macos-latest]
15
+ python-version:
16
+ - "3.11"
17
+ name: Test
18
+ runs-on: ${{ matrix.os }}
19
+ env:
20
+ PYTHONPATH: "."
21
+ OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
22
+ TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
23
+
24
+ steps:
25
+ - name: Checkout code
26
+ uses: actions/checkout@v4
27
+
28
+ - name: Set up Python
29
+ uses: actions/setup-python@v4
30
+ with:
31
+ python-version: ${{ matrix.python-version }}
32
+
33
+ - name: Install dependencies
34
+ run: |
35
+ pip install pipenv
36
+ pipenv install --dev
37
+
38
+
39
+ - name: Run tests
40
+ run: |
41
+ cd src
42
+ pipenv run pytest
43
+
44
+ run-e2e-tests:
45
+ if: "!contains(github.actor, '[bot]')" # Exclude if the actor is a bot
46
+ concurrency:
47
+ group: e2e-tests
48
+ strategy:
49
+ fail-fast: false
50
+ matrix:
51
+ os: [ubuntu-latest]
52
+ python-version:
53
+ - "3.11"
54
+ name: E2E Tests
55
+ runs-on: ${{ matrix.os }}
56
+ steps:
57
+ - name: Configure AWS Credentials
58
+ uses: aws-actions/configure-aws-credentials@v4
59
+ with:
60
+ aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
61
+ aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
62
+ aws-region: us-west-1
63
+
64
+ - name: Checkout code
65
+ uses: actions/checkout@v4
66
+
67
+ - name: Set up Python
68
+ uses: actions/setup-python@v4
69
+ with:
70
+ python-version: ${{ matrix.python-version }}
71
+
72
+ - name: Install judgeval dependencies
73
+ run: |
74
+ pip install pipenv
75
+ pipenv install --dev
76
+
77
+ - name: Check if server is running
78
+ run: |
79
+ if ! curl -s http://api.judgmentlabs.ai/health > /dev/null; then
80
+ echo "Production Judgment server is not running properly. Check logs on AWS CloudWatch for more details."
81
+ exit 1
82
+ else
83
+ echo "Server is running."
84
+ fi
85
+
86
+ - name: Run E2E tests
87
+ working-directory: src
88
+ run: |
89
+ SECRET_VARS=$(aws secretsmanager get-secret-value --secret-id gh-actions/api-keys/judgeval --query SecretString --output text)
90
+ export $(echo "$SECRET_VARS" | jq -r 'to_entries | .[] | "\(.key)=\(.value)"')
91
+ pipenv run pytest ./e2etests
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: judgeval
3
- Version: 0.0.29
3
+ Version: 0.0.34
4
4
  Summary: Judgeval Package
5
5
  Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
6
6
  Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -12,12 +12,13 @@ Classifier: Programming Language :: Python :: 3
12
12
  Requires-Python: >=3.11
13
13
  Requires-Dist: anthropic
14
14
  Requires-Dist: fastapi
15
+ Requires-Dist: google-genai
15
16
  Requires-Dist: langchain
16
17
  Requires-Dist: langchain-anthropic
17
18
  Requires-Dist: langchain-core
18
19
  Requires-Dist: langchain-huggingface
19
20
  Requires-Dist: langchain-openai
20
- Requires-Dist: litellm
21
+ Requires-Dist: litellm==1.38.12
21
22
  Requires-Dist: nest-asyncio
22
23
  Requires-Dist: openai
23
24
  Requires-Dist: openpyxl
@@ -94,9 +95,21 @@ Create a file named `traces.py` with the following code:
94
95
  from judgeval.common.tracer import Tracer, wrap
95
96
  from openai import OpenAI
96
97
 
98
+ # Basic initialization
97
99
  client = wrap(OpenAI())
98
100
  judgment = Tracer(project_name="my_project")
99
101
 
102
+ # Or with S3 storage enabled
103
+ # NOTE: Make sure AWS creds correspond to an account with write access to the specified S3 bucket
104
+ judgment = Tracer(
105
+ project_name="my_project",
106
+ use_s3=True,
107
+ s3_bucket_name="my-traces-bucket", # Bucket created automatically if it doesn't exist
108
+ s3_aws_access_key_id="your-access-key", # Optional: defaults to AWS_ACCESS_KEY_ID env var
109
+ s3_aws_secret_access_key="your-secret-key", # Optional: defaults to AWS_SECRET_ACCESS_KEY env var
110
+ s3_region_name="us-west-1" # Optional: defaults to AWS_REGION env var or "us-west-1"
111
+ )
112
+
100
113
  @judgment.observe(span_type="tool")
101
114
  def my_tool():
102
115
  return "Hello world!"
@@ -4,7 +4,7 @@ verify_ssl = true
4
4
  name = "pypi"
5
5
 
6
6
  [packages]
7
- litellm = "*"
7
+ litellm = "==1.38.12"
8
8
  python-dotenv = "==1.0.1"
9
9
  fastapi = "*"
10
10
  uvicorn = "*"
@@ -24,6 +24,9 @@ langchain-openai = "*"
24
24
  langchain-anthropic = "*"
25
25
  langchain-core = "*"
26
26
  langchain-community = "*"
27
+ langgraph = "*"
28
+ google-genai = "*"
29
+ boto3 = "*"
27
30
 
28
31
  [dev-packages]
29
32
  pytest = "*"