judgeval 0.0.30__tar.gz → 0.0.32__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. judgeval-0.0.32/.github/workflows/ci.yaml +91 -0
  2. {judgeval-0.0.30 → judgeval-0.0.32}/PKG-INFO +1 -1
  3. {judgeval-0.0.30 → judgeval-0.0.32}/Pipfile +2 -0
  4. {judgeval-0.0.30 → judgeval-0.0.32}/Pipfile.lock +1053 -662
  5. {judgeval-0.0.30 → judgeval-0.0.32}/docs/alerts/notifications.mdx +107 -15
  6. {judgeval-0.0.30 → judgeval-0.0.32}/docs/alerts/rules.mdx +55 -6
  7. judgeval-0.0.32/docs/api_reference/judgment_client.mdx +102 -0
  8. judgeval-0.0.32/docs/api_reference/trace.mdx +144 -0
  9. judgeval-0.0.32/docs/clustering/clustering.mdx +68 -0
  10. judgeval-0.0.32/docs/evaluation/data_datasets.mdx +288 -0
  11. {judgeval-0.0.30 → judgeval-0.0.32}/docs/evaluation/data_examples.mdx +96 -7
  12. judgeval-0.0.32/docs/evaluation/data_sequences.mdx +80 -0
  13. judgeval-0.0.32/docs/evaluation/introduction.mdx +224 -0
  14. judgeval-0.0.32/docs/evaluation/judges.mdx +209 -0
  15. judgeval-0.0.32/docs/evaluation/scorers/agent/derailment.mdx +54 -0
  16. {judgeval-0.0.30 → judgeval-0.0.32}/docs/evaluation/scorers/custom_scorers.mdx +185 -0
  17. {judgeval-0.0.30/docs/evaluation/scorers → judgeval-0.0.32/docs/evaluation/scorers/default}/answer_correctness.mdx +31 -1
  18. {judgeval-0.0.30/docs/evaluation/scorers → judgeval-0.0.32/docs/evaluation/scorers/default}/answer_relevancy.mdx +29 -1
  19. {judgeval-0.0.30/docs/evaluation/scorers → judgeval-0.0.32/docs/evaluation/scorers/default}/comparison.mdx +44 -4
  20. {judgeval-0.0.30/docs/evaluation/scorers → judgeval-0.0.32/docs/evaluation/scorers/default}/contextual_precision.mdx +33 -1
  21. {judgeval-0.0.30/docs/evaluation/scorers → judgeval-0.0.32/docs/evaluation/scorers/default}/contextual_recall.mdx +33 -1
  22. {judgeval-0.0.30/docs/evaluation/scorers → judgeval-0.0.32/docs/evaluation/scorers/default}/contextual_relevancy.mdx +31 -1
  23. {judgeval-0.0.30/docs/evaluation/scorers → judgeval-0.0.32/docs/evaluation/scorers/default}/faithfulness.mdx +33 -2
  24. {judgeval-0.0.30/docs/evaluation/scorers → judgeval-0.0.32/docs/evaluation/scorers/default}/groundedness.mdx +1 -1
  25. {judgeval-0.0.30 → judgeval-0.0.32}/docs/evaluation/scorers/introduction.mdx +29 -17
  26. judgeval-0.0.32/docs/evaluation/unit_testing.mdx +93 -0
  27. {judgeval-0.0.30 → judgeval-0.0.32}/docs/getting_started.mdx +156 -182
  28. judgeval-0.0.32/docs/images/annotation_queue_ui.png +0 -0
  29. judgeval-0.0.32/docs/images/cluster.png +0 -0
  30. judgeval-0.0.32/docs/images/cluster_button.png +0 -0
  31. judgeval-0.0.32/docs/images/dashboard_annotation_queue_button.png +0 -0
  32. judgeval-0.0.32/docs/mcp_server/mcp_server.mdx +563 -0
  33. {judgeval-0.0.30 → judgeval-0.0.32}/docs/mint.json +27 -14
  34. judgeval-0.0.32/docs/monitoring/annotations.mdx +41 -0
  35. judgeval-0.0.32/docs/monitoring/tracing.mdx +374 -0
  36. {judgeval-0.0.30 → judgeval-0.0.32}/pyproject.toml +1 -1
  37. judgeval-0.0.32/src/demo/custom_scorer/main.py +43 -0
  38. judgeval-0.0.32/src/demo/custom_scorer/scorer.py +44 -0
  39. judgeval-0.0.32/src/demo/dataset.py +16 -0
  40. judgeval-0.0.32/src/demo/demo.py +50 -0
  41. judgeval-0.0.32/src/demo/new_bot/basic_bot.py +116 -0
  42. judgeval-0.0.32/src/demo/simple_trace.py +89 -0
  43. {judgeval-0.0.30/src/demo/new_trace → judgeval-0.0.32/src/demo/simplified_tracing}/example_complex_async.py +3 -13
  44. judgeval-0.0.32/src/demo/test.py +51 -0
  45. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/__init__.py +3 -1
  46. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/common/tracer.py +352 -117
  47. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/constants.py +5 -3
  48. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/data/__init__.py +4 -0
  49. judgeval-0.0.32/src/judgeval/data/custom_example.py +18 -0
  50. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/data/datasets/dataset.py +5 -1
  51. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/data/datasets/eval_dataset_client.py +64 -5
  52. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/data/example.py +1 -0
  53. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/data/result.py +7 -6
  54. judgeval-0.0.32/src/judgeval/data/sequence.py +55 -0
  55. judgeval-0.0.32/src/judgeval/data/sequence_run.py +44 -0
  56. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/evaluation_run.py +12 -7
  57. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/integrations/langgraph.py +89 -72
  58. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/judgment_client.py +70 -68
  59. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/run_evaluation.py +87 -13
  60. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/__init__.py +2 -0
  61. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorer.py +3 -0
  62. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/__init__.py +7 -0
  63. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +2 -1
  64. judgeval-0.0.32/src/judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +21 -0
  65. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/score.py +6 -5
  66. judgeval-0.0.32/src/judgeval/version_check.py +22 -0
  67. judgeval-0.0.30/.github/workflows/ci.yaml +0 -44
  68. judgeval-0.0.30/docs/api_reference/judgment_client.mdx +0 -61
  69. judgeval-0.0.30/docs/api_reference/trace.mdx +0 -82
  70. judgeval-0.0.30/docs/evaluation/data_datasets.mdx +0 -159
  71. judgeval-0.0.30/docs/evaluation/introduction.mdx +0 -111
  72. judgeval-0.0.30/docs/evaluation/judges.mdx +0 -88
  73. judgeval-0.0.30/docs/evaluation/scorers/hallucination.mdx +0 -54
  74. judgeval-0.0.30/docs/evaluation/unit_testing.mdx +0 -39
  75. judgeval-0.0.30/docs/judgment/introduction.mdx +0 -11
  76. judgeval-0.0.30/docs/monitoring/tracing.mdx +0 -214
  77. judgeval-0.0.30/src/demo/cookbooks/JNPR_Mist/test.py +0 -21
  78. judgeval-0.0.30/src/demo/cookbooks/linkd/text2sql.py +0 -14
  79. judgeval-0.0.30/src/demo/custom_example_demo/osiris_test.py +0 -22
  80. judgeval-0.0.30/src/demo/custom_example_demo/qodo_scorer.py +0 -78
  81. judgeval-0.0.30/src/demo/demo.py +0 -21
  82. judgeval-0.0.30/src/judgeval/data/custom_api_example.py +0 -91
  83. judgeval-0.0.30/src/test.py +0 -143
  84. {judgeval-0.0.30 → judgeval-0.0.32}/.gitignore +0 -0
  85. {judgeval-0.0.30 → judgeval-0.0.32}/LICENSE.md +0 -0
  86. {judgeval-0.0.30 → judgeval-0.0.32}/README.md +0 -0
  87. {judgeval-0.0.30 → judgeval-0.0.32}/docs/README.md +0 -0
  88. {judgeval-0.0.30 → judgeval-0.0.32}/docs/alerts/platform_notifications.mdx +0 -0
  89. {judgeval-0.0.30 → judgeval-0.0.32}/docs/development.mdx +0 -0
  90. {judgeval-0.0.30 → judgeval-0.0.32}/docs/essentials/code.mdx +0 -0
  91. {judgeval-0.0.30 → judgeval-0.0.32}/docs/essentials/images.mdx +0 -0
  92. {judgeval-0.0.30 → judgeval-0.0.32}/docs/essentials/markdown.mdx +0 -0
  93. {judgeval-0.0.30 → judgeval-0.0.32}/docs/essentials/navigation.mdx +0 -0
  94. {judgeval-0.0.30 → judgeval-0.0.32}/docs/essentials/reusable-snippets.mdx +0 -0
  95. {judgeval-0.0.30 → judgeval-0.0.32}/docs/essentials/settings.mdx +0 -0
  96. {judgeval-0.0.30 → judgeval-0.0.32}/docs/evaluation/scorers/classifier_scorer.mdx +0 -0
  97. {judgeval-0.0.30/docs/evaluation/scorers → judgeval-0.0.32/docs/evaluation/scorers/default}/execution_order.mdx +0 -0
  98. {judgeval-0.0.30/docs/evaluation/scorers → judgeval-0.0.32/docs/evaluation/scorers/default}/json_correctness.mdx +0 -0
  99. {judgeval-0.0.30/docs/evaluation/scorers → judgeval-0.0.32/docs/evaluation/scorers/default}/summarization.mdx +0 -0
  100. {judgeval-0.0.30 → judgeval-0.0.32}/docs/favicon.svg +0 -0
  101. {judgeval-0.0.30 → judgeval-0.0.32}/docs/images/basic_trace_example.png +0 -0
  102. {judgeval-0.0.30 → judgeval-0.0.32}/docs/images/checks-passed.png +0 -0
  103. {judgeval-0.0.30 → judgeval-0.0.32}/docs/images/create_aggressive_scorer.png +0 -0
  104. {judgeval-0.0.30 → judgeval-0.0.32}/docs/images/create_scorer.png +0 -0
  105. {judgeval-0.0.30 → judgeval-0.0.32}/docs/images/evaluation_diagram.png +0 -0
  106. {judgeval-0.0.30 → judgeval-0.0.32}/docs/images/hero-dark.svg +0 -0
  107. {judgeval-0.0.30 → judgeval-0.0.32}/docs/images/hero-light.svg +0 -0
  108. {judgeval-0.0.30 → judgeval-0.0.32}/docs/images/notifications_page.png +0 -0
  109. {judgeval-0.0.30 → judgeval-0.0.32}/docs/images/online_eval_fault.png +0 -0
  110. {judgeval-0.0.30 → judgeval-0.0.32}/docs/images/reports_modal.png +0 -0
  111. {judgeval-0.0.30 → judgeval-0.0.32}/docs/images/trace_ss.png +0 -0
  112. {judgeval-0.0.30 → judgeval-0.0.32}/docs/integration/langgraph.mdx +0 -0
  113. {judgeval-0.0.30 → judgeval-0.0.32}/docs/introduction.mdx +0 -0
  114. {judgeval-0.0.30 → judgeval-0.0.32}/docs/logo/dark.svg +0 -0
  115. {judgeval-0.0.30 → judgeval-0.0.32}/docs/logo/light.svg +0 -0
  116. {judgeval-0.0.30 → judgeval-0.0.32}/docs/monitoring/introduction.mdx +0 -0
  117. {judgeval-0.0.30 → judgeval-0.0.32}/docs/monitoring/production_insights.mdx +0 -0
  118. {judgeval-0.0.30 → judgeval-0.0.32}/docs/notebooks/create_dataset.ipynb +0 -0
  119. {judgeval-0.0.30 → judgeval-0.0.32}/docs/notebooks/create_scorer.ipynb +0 -0
  120. {judgeval-0.0.30 → judgeval-0.0.32}/docs/notebooks/demo.ipynb +0 -0
  121. {judgeval-0.0.30 → judgeval-0.0.32}/docs/notebooks/prompt_scorer.ipynb +0 -0
  122. {judgeval-0.0.30 → judgeval-0.0.32}/docs/notebooks/quickstart.ipynb +0 -0
  123. {judgeval-0.0.30 → judgeval-0.0.32}/docs/quickstart.mdx +0 -0
  124. {judgeval-0.0.30 → judgeval-0.0.32}/docs/snippets/snippet-intro.mdx +0 -0
  125. {judgeval-0.0.30 → judgeval-0.0.32}/pytest.ini +0 -0
  126. {judgeval-0.0.30 → judgeval-0.0.32}/src/demo/travel_agent.py +0 -0
  127. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/clients.py +0 -0
  128. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/common/__init__.py +0 -0
  129. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/common/exceptions.py +0 -0
  130. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/common/logger.py +0 -0
  131. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/common/utils.py +0 -0
  132. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/data/datasets/__init__.py +0 -0
  133. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/data/scorer_data.py +0 -0
  134. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/judges/__init__.py +0 -0
  135. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/judges/base_judge.py +0 -0
  136. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/judges/litellm_judge.py +0 -0
  137. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/judges/mixture_of_judges.py +0 -0
  138. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/judges/together_judge.py +0 -0
  139. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/judges/utils.py +0 -0
  140. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/rules.py +0 -0
  141. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/api_scorer.py +0 -0
  142. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/base_scorer.py +0 -0
  143. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/exceptions.py +0 -0
  144. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +0 -0
  145. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +0 -0
  146. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/api_scorers/comparison.py +0 -0
  147. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py +0 -0
  148. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py +0 -0
  149. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py +0 -0
  150. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -0
  151. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +0 -0
  152. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py +0 -0
  153. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -0
  154. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +0 -0
  155. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py +0 -0
  156. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/api_scorers/summarization.py +0 -0
  157. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/classifiers/__init__.py +0 -0
  158. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py +0 -0
  159. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py +0 -0
  160. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/__init__.py +0 -0
  161. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/__init__.py +0 -0
  162. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/answer_correctness_scorer.py +0 -0
  163. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/prompts.py +0 -0
  164. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/__init__.py +0 -0
  165. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/answer_relevancy_scorer.py +0 -0
  166. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/prompts.py +0 -0
  167. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/comparison/__init__.py +0 -0
  168. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/comparison/comparison_scorer.py +0 -0
  169. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/comparison/prompts.py +0 -0
  170. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/__init__.py +0 -0
  171. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/contextual_precision_scorer.py +0 -0
  172. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/prompts.py +0 -0
  173. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/__init__.py +0 -0
  174. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/contextual_recall_scorer.py +0 -0
  175. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/prompts.py +0 -0
  176. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/__init__.py +0 -0
  177. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/contextual_relevancy_scorer.py +0 -0
  178. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/prompts.py +0 -0
  179. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/execution_order/__init__.py +0 -0
  180. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/execution_order/execution_order.py +0 -0
  181. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/__init__.py +0 -0
  182. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/faithfulness_scorer.py +0 -0
  183. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/prompts.py +0 -0
  184. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/hallucination/__init__.py +0 -0
  185. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/hallucination/hallucination_scorer.py +0 -0
  186. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/hallucination/prompts.py +0 -0
  187. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/instruction_adherence/instruction_adherence.py +0 -0
  188. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/instruction_adherence/prompt.py +0 -0
  189. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/__init__.py +0 -0
  190. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/json_correctness_scorer.py +0 -0
  191. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/summarization/__init__.py +0 -0
  192. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py +0 -0
  193. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py +0 -0
  194. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/prompt_scorer.py +0 -0
  195. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/scorers/utils.py +0 -0
  196. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/tracer/__init__.py +0 -0
  197. {judgeval-0.0.30 → judgeval-0.0.32}/src/judgeval/utils/alerts.py +0 -0
@@ -0,0 +1,91 @@
1
+ name: CI
2
+
3
+ on:
4
+ pull_request_review:
5
+ types: [submitted]
6
+ branches:
7
+ - main
8
+
9
+ jobs:
10
+ run-tests:
11
+ strategy:
12
+ fail-fast: false
13
+ matrix:
14
+ os: [ubuntu-latest, macos-latest]
15
+ python-version:
16
+ - "3.11"
17
+ name: Test
18
+ runs-on: ${{ matrix.os }}
19
+ env:
20
+ PYTHONPATH: "."
21
+ OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
22
+ TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
23
+
24
+ steps:
25
+ - name: Checkout code
26
+ uses: actions/checkout@v4
27
+
28
+ - name: Set up Python
29
+ uses: actions/setup-python@v4
30
+ with:
31
+ python-version: ${{ matrix.python-version }}
32
+
33
+ - name: Install dependencies
34
+ run: |
35
+ pip install pipenv
36
+ pipenv install --dev
37
+
38
+
39
+ - name: Run tests
40
+ run: |
41
+ cd src
42
+ pipenv run pytest
43
+
44
+ run-e2e-tests:
45
+ if: "!contains(github.actor, '[bot]')" # Exclude if the actor is a bot
46
+ concurrency:
47
+ group: e2e-tests
48
+ strategy:
49
+ fail-fast: false
50
+ matrix:
51
+ os: [ubuntu-latest]
52
+ python-version:
53
+ - "3.11"
54
+ name: E2E Tests
55
+ runs-on: ${{ matrix.os }}
56
+ steps:
57
+ - name: Configure AWS Credentials
58
+ uses: aws-actions/configure-aws-credentials@v4
59
+ with:
60
+ aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
61
+ aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
62
+ aws-region: us-west-1
63
+
64
+ - name: Checkout code
65
+ uses: actions/checkout@v4
66
+
67
+ - name: Set up Python
68
+ uses: actions/setup-python@v4
69
+ with:
70
+ python-version: ${{ matrix.python-version }}
71
+
72
+ - name: Install judgeval dependencies
73
+ run: |
74
+ pip install pipenv
75
+ pipenv install --dev
76
+
77
+ - name: Check if server is running
78
+ run: |
79
+ if ! curl -s http://api.judgmentlabs.ai/health > /dev/null; then
80
+ echo "Production Judgment server is not running properly. Check logs on AWS CloudWatch for more details."
81
+ exit 1
82
+ else
83
+ echo "Server is running."
84
+ fi
85
+
86
+ - name: Run E2E tests
87
+ working-directory: src
88
+ run: |
89
+ SECRET_VARS=$(aws secretsmanager get-secret-value --secret-id gh-actions/api-keys/judgeval --query SecretString --output text)
90
+ export $(echo "$SECRET_VARS" | jq -r 'to_entries | .[] | "\(.key)=\(.value)"')
91
+ pipenv run pytest ./e2etests
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: judgeval
3
- Version: 0.0.30
3
+ Version: 0.0.32
4
4
  Summary: Judgeval Package
5
5
  Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
6
6
  Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -24,6 +24,8 @@ langchain-openai = "*"
24
24
  langchain-anthropic = "*"
25
25
  langchain-core = "*"
26
26
  langchain-community = "*"
27
+ langgraph = "*"
28
+ google-genai = "*"
27
29
 
28
30
  [dev-packages]
29
31
  pytest = "*"