judgeval 0.0.16__tar.gz → 0.0.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. {judgeval-0.0.16 → judgeval-0.0.18}/.gitignore +2 -0
  2. {judgeval-0.0.16 → judgeval-0.0.18}/PKG-INFO +1 -1
  3. {judgeval-0.0.16 → judgeval-0.0.18}/Pipfile +1 -0
  4. {judgeval-0.0.16 → judgeval-0.0.18}/Pipfile.lock +58 -224
  5. judgeval-0.0.18/docs/evaluation/scorers/comparison.mdx +62 -0
  6. {judgeval-0.0.16 → judgeval-0.0.18}/docs/evaluation/scorers/introduction.mdx +1 -0
  7. {judgeval-0.0.16 → judgeval-0.0.18}/docs/mint.json +1 -0
  8. {judgeval-0.0.16 → judgeval-0.0.18}/pyproject.toml +1 -1
  9. judgeval-0.0.18/src/demo/cookbooks/jpmorgan/demo.ipynb +211 -0
  10. judgeval-0.0.18/src/demo/cookbooks/jpmorgan/demo.py +262 -0
  11. judgeval-0.0.18/src/demo/cookbooks/jpmorgan/vectordbdocs.py +174 -0
  12. judgeval-0.0.18/src/demo/cookbooks/langgraph_basic/agent.ipynb +107 -0
  13. judgeval-0.0.18/src/demo/cookbooks/langgraph_basic/agent.py +114 -0
  14. {judgeval-0.0.16 → judgeval-0.0.18}/src/demo/cookbooks/openai_travel_agent/agent.py +3 -3
  15. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/__init__.py +1 -3
  16. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/clients.py +0 -6
  17. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/common/logger.py +0 -1
  18. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/common/tracer.py +270 -62
  19. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/common/utils.py +9 -5
  20. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/constants.py +7 -2
  21. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/data/__init__.py +2 -0
  22. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/data/api_example.py +2 -2
  23. judgeval-0.0.18/src/judgeval/data/datasets/__init__.py +4 -0
  24. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/data/datasets/dataset.py +4 -5
  25. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/data/datasets/eval_dataset_client.py +11 -7
  26. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/data/datasets/utils.py +1 -2
  27. judgeval-0.0.18/src/judgeval/data/example.py +146 -0
  28. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/data/scorer_data.py +1 -1
  29. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/evaluation_run.py +2 -2
  30. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/judges/__init__.py +0 -1
  31. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/judges/base_judge.py +1 -1
  32. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/judges/mixture_of_judges.py +7 -2
  33. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/judgment_client.py +16 -8
  34. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/rules.py +2 -4
  35. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/run_evaluation.py +8 -8
  36. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/__init__.py +6 -0
  37. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/api_scorer.py +12 -6
  38. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/base_scorer.py +12 -6
  39. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorer.py +7 -3
  40. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/__init__.py +24 -3
  41. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +6 -0
  42. judgeval-0.0.18/src/judgeval/scorers/judgeval_scorers/api_scorers/comparison.py +35 -0
  43. judgeval-0.0.18/src/judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py +19 -0
  44. judgeval-0.0.18/src/judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +19 -0
  45. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/local_implementations/__init__.py +4 -1
  46. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/answer_correctness_scorer.py +0 -1
  47. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/prompts.py +2 -2
  48. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/answer_relevancy_scorer.py +7 -6
  49. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/prompts.py +2 -2
  50. judgeval-0.0.18/src/judgeval/scorers/judgeval_scorers/local_implementations/comparison/comparison_scorer.py +161 -0
  51. judgeval-0.0.18/src/judgeval/scorers/judgeval_scorers/local_implementations/comparison/prompts.py +222 -0
  52. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/prompts.py +2 -2
  53. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/prompts.py +2 -2
  54. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/prompts.py +2 -2
  55. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/faithfulness_scorer.py +1 -8
  56. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/local_implementations/hallucination/hallucination_scorer.py +7 -6
  57. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/local_implementations/hallucination/prompts.py +2 -2
  58. judgeval-0.0.18/src/judgeval/scorers/judgeval_scorers/local_implementations/instruction_adherence/instruction_adherence.py +232 -0
  59. judgeval-0.0.18/src/judgeval/scorers/judgeval_scorers/local_implementations/instruction_adherence/prompt.py +102 -0
  60. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/json_correctness_scorer.py +7 -7
  61. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py +7 -6
  62. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/tool_correctness_scorer.py +1 -2
  63. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/prompt_scorer.py +7 -5
  64. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/utils.py +1 -1
  65. judgeval-0.0.16/src/demo/customer_use/cstone/basic_test.py +0 -37
  66. judgeval-0.0.16/src/demo/customer_use/cstone/cstone_data.csv +0 -1225
  67. judgeval-0.0.16/src/demo/customer_use/cstone/data.csv +0 -1227
  68. judgeval-0.0.16/src/demo/customer_use/cstone/faithfulness_testing.py +0 -169
  69. judgeval-0.0.16/src/demo/customer_use/cstone/galen_data.csv +0 -0
  70. judgeval-0.0.16/src/demo/customer_use/cstone/playground.py +0 -152
  71. judgeval-0.0.16/src/judgeval/data/datasets/__init__.py +0 -5
  72. judgeval-0.0.16/src/judgeval/data/example.py +0 -91
  73. {judgeval-0.0.16 → judgeval-0.0.18}/.github/workflows/ci.yaml +0 -0
  74. {judgeval-0.0.16 → judgeval-0.0.18}/LICENSE.md +0 -0
  75. {judgeval-0.0.16 → judgeval-0.0.18}/README.md +0 -0
  76. {judgeval-0.0.16 → judgeval-0.0.18}/docs/README.md +0 -0
  77. {judgeval-0.0.16 → judgeval-0.0.18}/docs/api_reference/judgment_client.mdx +0 -0
  78. {judgeval-0.0.16 → judgeval-0.0.18}/docs/api_reference/trace.mdx +0 -0
  79. {judgeval-0.0.16 → judgeval-0.0.18}/docs/development.mdx +0 -0
  80. {judgeval-0.0.16 → judgeval-0.0.18}/docs/essentials/code.mdx +0 -0
  81. {judgeval-0.0.16 → judgeval-0.0.18}/docs/essentials/images.mdx +0 -0
  82. {judgeval-0.0.16 → judgeval-0.0.18}/docs/essentials/markdown.mdx +0 -0
  83. {judgeval-0.0.16 → judgeval-0.0.18}/docs/essentials/navigation.mdx +0 -0
  84. {judgeval-0.0.16 → judgeval-0.0.18}/docs/essentials/reusable-snippets.mdx +0 -0
  85. {judgeval-0.0.16 → judgeval-0.0.18}/docs/essentials/settings.mdx +0 -0
  86. {judgeval-0.0.16 → judgeval-0.0.18}/docs/evaluation/data_datasets.mdx +0 -0
  87. {judgeval-0.0.16 → judgeval-0.0.18}/docs/evaluation/data_examples.mdx +0 -0
  88. {judgeval-0.0.16 → judgeval-0.0.18}/docs/evaluation/introduction.mdx +0 -0
  89. {judgeval-0.0.16 → judgeval-0.0.18}/docs/evaluation/judges.mdx +0 -0
  90. {judgeval-0.0.16 → judgeval-0.0.18}/docs/evaluation/scorers/answer_correctness.mdx +0 -0
  91. {judgeval-0.0.16 → judgeval-0.0.18}/docs/evaluation/scorers/answer_relevancy.mdx +0 -0
  92. {judgeval-0.0.16 → judgeval-0.0.18}/docs/evaluation/scorers/classifier_scorer.mdx +0 -0
  93. {judgeval-0.0.16 → judgeval-0.0.18}/docs/evaluation/scorers/contextual_precision.mdx +0 -0
  94. {judgeval-0.0.16 → judgeval-0.0.18}/docs/evaluation/scorers/contextual_recall.mdx +0 -0
  95. {judgeval-0.0.16 → judgeval-0.0.18}/docs/evaluation/scorers/contextual_relevancy.mdx +0 -0
  96. {judgeval-0.0.16 → judgeval-0.0.18}/docs/evaluation/scorers/custom_scorers.mdx +0 -0
  97. {judgeval-0.0.16 → judgeval-0.0.18}/docs/evaluation/scorers/faithfulness.mdx +0 -0
  98. {judgeval-0.0.16 → judgeval-0.0.18}/docs/evaluation/scorers/hallucination.mdx +0 -0
  99. {judgeval-0.0.16 → judgeval-0.0.18}/docs/evaluation/scorers/json_correctness.mdx +0 -0
  100. {judgeval-0.0.16 → judgeval-0.0.18}/docs/evaluation/scorers/summarization.mdx +0 -0
  101. {judgeval-0.0.16 → judgeval-0.0.18}/docs/evaluation/scorers/tool_correctness.mdx +0 -0
  102. {judgeval-0.0.16 → judgeval-0.0.18}/docs/evaluation/unit_testing.mdx +0 -0
  103. {judgeval-0.0.16 → judgeval-0.0.18}/docs/favicon.svg +0 -0
  104. {judgeval-0.0.16 → judgeval-0.0.18}/docs/getting_started.mdx +0 -0
  105. {judgeval-0.0.16 → judgeval-0.0.18}/docs/images/basic_trace_example.png +0 -0
  106. {judgeval-0.0.16 → judgeval-0.0.18}/docs/images/checks-passed.png +0 -0
  107. {judgeval-0.0.16 → judgeval-0.0.18}/docs/images/create_aggressive_scorer.png +0 -0
  108. {judgeval-0.0.16 → judgeval-0.0.18}/docs/images/create_scorer.png +0 -0
  109. {judgeval-0.0.16 → judgeval-0.0.18}/docs/images/evaluation_diagram.png +0 -0
  110. {judgeval-0.0.16 → judgeval-0.0.18}/docs/images/hero-dark.svg +0 -0
  111. {judgeval-0.0.16 → judgeval-0.0.18}/docs/images/hero-light.svg +0 -0
  112. {judgeval-0.0.16 → judgeval-0.0.18}/docs/images/trace_screenshot.png +0 -0
  113. {judgeval-0.0.16 → judgeval-0.0.18}/docs/introduction.mdx +0 -0
  114. {judgeval-0.0.16 → judgeval-0.0.18}/docs/judgment/introduction.mdx +0 -0
  115. {judgeval-0.0.16 → judgeval-0.0.18}/docs/logo/dark.svg +0 -0
  116. {judgeval-0.0.16 → judgeval-0.0.18}/docs/logo/light.svg +0 -0
  117. {judgeval-0.0.16 → judgeval-0.0.18}/docs/monitoring/introduction.mdx +0 -0
  118. {judgeval-0.0.16 → judgeval-0.0.18}/docs/monitoring/production_insights.mdx +0 -0
  119. {judgeval-0.0.16 → judgeval-0.0.18}/docs/monitoring/tracing.mdx +0 -0
  120. {judgeval-0.0.16 → judgeval-0.0.18}/docs/notebooks/create_dataset.ipynb +0 -0
  121. {judgeval-0.0.16 → judgeval-0.0.18}/docs/notebooks/create_scorer.ipynb +0 -0
  122. {judgeval-0.0.16 → judgeval-0.0.18}/docs/notebooks/demo.ipynb +0 -0
  123. {judgeval-0.0.16 → judgeval-0.0.18}/docs/notebooks/prompt_scorer.ipynb +0 -0
  124. {judgeval-0.0.16 → judgeval-0.0.18}/docs/notebooks/quickstart.ipynb +0 -0
  125. {judgeval-0.0.16 → judgeval-0.0.18}/docs/quickstart.mdx +0 -0
  126. {judgeval-0.0.16 → judgeval-0.0.18}/docs/snippets/snippet-intro.mdx +0 -0
  127. {judgeval-0.0.16 → judgeval-0.0.18}/pytest.ini +0 -0
  128. {judgeval-0.0.16 → judgeval-0.0.18}/src/demo/cookbooks/anime_chatbot_agent/animeChatBot.py +0 -0
  129. {judgeval-0.0.16 → judgeval-0.0.18}/src/demo/cookbooks/ci_testing/ci_testing.py +0 -0
  130. {judgeval-0.0.16 → judgeval-0.0.18}/src/demo/cookbooks/ci_testing/travel_response.txt +0 -0
  131. {judgeval-0.0.16 → judgeval-0.0.18}/src/demo/cookbooks/custom_scorers/competitor_mentions.py +0 -0
  132. {judgeval-0.0.16 → judgeval-0.0.18}/src/demo/cookbooks/custom_scorers/text2sql.py +0 -0
  133. {judgeval-0.0.16 → judgeval-0.0.18}/src/demo/cookbooks/langchain_basic_rag/basic_agentic_rag.ipynb +0 -0
  134. {judgeval-0.0.16 → judgeval-0.0.18}/src/demo/cookbooks/langchain_basic_rag/tesla_q3.pdf +0 -0
  135. {judgeval-0.0.16 → judgeval-0.0.18}/src/demo/cookbooks/langchain_sales/example_product_price_id_mapping.json +0 -0
  136. {judgeval-0.0.16 → judgeval-0.0.18}/src/demo/cookbooks/langchain_sales/sales_agent_with_context.ipynb +0 -0
  137. {judgeval-0.0.16 → judgeval-0.0.18}/src/demo/cookbooks/langchain_sales/sample_product_catalog.txt +0 -0
  138. {judgeval-0.0.16 → judgeval-0.0.18}/src/demo/cookbooks/new_bot/basic_bot.py +0 -0
  139. {judgeval-0.0.16 → judgeval-0.0.18}/src/demo/cookbooks/openai_travel_agent/populate_db.py +0 -0
  140. {judgeval-0.0.16 → judgeval-0.0.18}/src/demo/cookbooks/openai_travel_agent/tools.py +0 -0
  141. {judgeval-0.0.16 → judgeval-0.0.18}/src/demo/cookbooks/rules_alerts/rules_bot.py +0 -0
  142. {judgeval-0.0.16 → judgeval-0.0.18}/src/demo/cookbooks/rules_alerts/rules_demo.py +0 -0
  143. {judgeval-0.0.16 → judgeval-0.0.18}/src/demo/cookbooks/rules_alerts/utils_helper.py +0 -0
  144. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/common/__init__.py +0 -0
  145. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/common/exceptions.py +0 -0
  146. {judgeval-0.0.16/src/judgeval/data/datasets → judgeval-0.0.18/src/judgeval/data}/ground_truth.py +0 -0
  147. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/data/result.py +0 -0
  148. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/judges/litellm_judge.py +0 -0
  149. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/judges/together_judge.py +0 -0
  150. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/judges/utils.py +0 -0
  151. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/exceptions.py +0 -0
  152. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +0 -0
  153. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +0 -0
  154. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py +0 -0
  155. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py +0 -0
  156. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py +0 -0
  157. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +0 -0
  158. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -0
  159. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py +0 -0
  160. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/api_scorers/summarization.py +0 -0
  161. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/api_scorers/tool_correctness.py +0 -0
  162. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/classifiers/__init__.py +0 -0
  163. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py +0 -0
  164. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py +0 -0
  165. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/__init__.py +0 -0
  166. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/__init__.py +0 -0
  167. /judgeval-0.0.16/src/demo/customer_use/cstone/results.csv → /judgeval-0.0.18/src/judgeval/scorers/judgeval_scorers/local_implementations/comparison/__init__.py +0 -0
  168. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/__init__.py +0 -0
  169. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/contextual_precision_scorer.py +0 -0
  170. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/__init__.py +0 -0
  171. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/contextual_recall_scorer.py +0 -0
  172. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/__init__.py +0 -0
  173. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/contextual_relevancy_scorer.py +0 -0
  174. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/__init__.py +0 -0
  175. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/prompts.py +0 -0
  176. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/local_implementations/hallucination/__init__.py +0 -0
  177. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/__init__.py +0 -0
  178. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/local_implementations/summarization/__init__.py +0 -0
  179. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py +0 -0
  180. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/__init__.py +0 -0
  181. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/scorers/score.py +0 -0
  182. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/tracer/__init__.py +0 -0
  183. {judgeval-0.0.16 → judgeval-0.0.18}/src/judgeval/utils/alerts.py +0 -0
@@ -9,6 +9,8 @@ __pycache__/
9
9
  # Testing files for competitor packages
10
10
  demo/test_competitors.py
11
11
  src/e2etests/customer_usecases/
12
+ src/demo/customer_use/cstone/
13
+
12
14
 
13
15
  # Packages
14
16
  *.egg
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: judgeval
3
- Version: 0.0.16
3
+ Version: 0.0.18
4
4
  Summary: Judgeval Package
5
5
  Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
6
6
  Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -17,6 +17,7 @@ anthropic = "*"
17
17
  asyncio = "*"
18
18
  nest-asyncio = "*"
19
19
  pika = "*"
20
+ openpyxl = "*"
20
21
 
21
22
  [dev-packages]
22
23
  pytest = "*"
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "_meta": {
3
3
  "hash": {
4
- "sha256": "679d65229010962cd48fce79a508812a4dd55ba522f0c76cfbcf3a262c9e7e47"
4
+ "sha256": "d7d9cae9e72293d62ac58b7fcdb1823c13bb9081f66003ff4e7f01d26f448631"
5
5
  },
6
6
  "pipfile-spec": 6,
7
7
  "requires": {
@@ -18,11 +18,11 @@
18
18
  "default": {
19
19
  "aiohappyeyeballs": {
20
20
  "hashes": [
21
- "sha256:147ec992cf873d74f5062644332c539fcd42956dc69453fe5204195e560517e1",
22
- "sha256:9b05052f9042985d32ecbe4b59a77ae19c006a78f1344d7fdad69d28ded3d0b0"
21
+ "sha256:0850b580748c7071db98bffff6d4c94028d0d3035acc20fd721a0ce7e8cac35d",
22
+ "sha256:18fde6204a76deeabc97c48bdd01d5801cfda5d6b9c8bbeb1aaaee9d648ca191"
23
23
  ],
24
24
  "markers": "python_version >= '3.9'",
25
- "version": "==2.4.6"
25
+ "version": "==2.5.0"
26
26
  },
27
27
  "aiohttp": {
28
28
  "hashes": [
@@ -129,12 +129,12 @@
129
129
  },
130
130
  "anthropic": {
131
131
  "hashes": [
132
- "sha256:452f4ca0c56ffab8b6ce9928bf8470650f88106a7001b250895eb65c54cfa44c",
133
- "sha256:61b712a56308fce69f04d92ba0230ab2bc187b5bce17811d400843a8976bb67f"
132
+ "sha256:bbc17ad4e7094988d2fa86b87753ded8dce12498f4b85fe5810f208f454a8375",
133
+ "sha256:c09e885b0f674b9119b4f296d8508907f6cff0009bc20d5cf6b35936c40b4398"
134
134
  ],
135
135
  "index": "pypi",
136
136
  "markers": "python_version >= '3.8'",
137
- "version": "==0.47.2"
137
+ "version": "==0.49.0"
138
138
  },
139
139
  "anyio": {
140
140
  "hashes": [
@@ -291,6 +291,14 @@
291
291
  "markers": "python_version >= '3.6'",
292
292
  "version": "==1.9.0"
293
293
  },
294
+ "et-xmlfile": {
295
+ "hashes": [
296
+ "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa",
297
+ "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54"
298
+ ],
299
+ "markers": "python_version >= '3.8'",
300
+ "version": "==2.0.0"
301
+ },
294
302
  "eval-type-backport": {
295
303
  "hashes": [
296
304
  "sha256:cb6ad7c393517f476f96d456d0412ea80f0a8cf96f6892834cd9340149111b0a",
@@ -301,12 +309,12 @@
301
309
  },
302
310
  "fastapi": {
303
311
  "hashes": [
304
- "sha256:0ce9111231720190473e222cdf0f07f7206ad7e53ea02beb1d2dc36e2f0741e9",
305
- "sha256:753a96dd7e036b34eeef8babdfcfe3f28ff79648f86551eb36bfc1b0bf4a8cbf"
312
+ "sha256:32e1541b7b74602e4ef4a0260ecaf3aadf9d4f19590bba3e1bf2ac4666aa2c64",
313
+ "sha256:cc81f03f688678b92600a65a5e618b93592c65005db37157147204d8924bf94f"
306
314
  ],
307
315
  "index": "pypi",
308
316
  "markers": "python_version >= '3.8'",
309
- "version": "==0.115.8"
317
+ "version": "==0.115.11"
310
318
  },
311
319
  "filelock": {
312
320
  "hashes": [
@@ -443,7 +451,6 @@
443
451
  "sha256:479a53ad425bb29af087f3458a61d30780bc818e4ebcf01f0b536ba916462ed0",
444
452
  "sha256:c8a52129695e88b1a0578d8d2cc6842bbd79128ac685463b887ee278126ad01f"
445
453
  ],
446
- "markers": "python_version >= '3.9'",
447
454
  "version": "==4.2.0"
448
455
  },
449
456
  "hpack": {
@@ -475,11 +482,11 @@
475
482
  },
476
483
  "huggingface-hub": {
477
484
  "hashes": [
478
- "sha256:352f69caf16566c7b6de84b54a822f6238e17ddd8ae3da4f8f2272aea5b198d5",
479
- "sha256:9524eae42077b8ff4fc459ceb7a514eca1c1232b775276b009709fe2a084f250"
485
+ "sha256:590b29c0dcbd0ee4b7b023714dc1ad8563fe4a68a91463438b74e980d28afaf3",
486
+ "sha256:c56f20fca09ef19da84dcde2b76379ecdaddf390b083f59f166715584953307d"
480
487
  ],
481
488
  "markers": "python_full_version >= '3.8.0'",
482
- "version": "==0.29.1"
489
+ "version": "==0.29.2"
483
490
  },
484
491
  "hyperframe": {
485
492
  "hashes": [
@@ -507,11 +514,11 @@
507
514
  },
508
515
  "jinja2": {
509
516
  "hashes": [
510
- "sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb",
511
- "sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb"
517
+ "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d",
518
+ "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67"
512
519
  ],
513
520
  "markers": "python_version >= '3.7'",
514
- "version": "==3.1.5"
521
+ "version": "==3.1.6"
515
522
  },
516
523
  "jiter": {
517
524
  "hashes": [
@@ -613,12 +620,12 @@
613
620
  },
614
621
  "litellm": {
615
622
  "hashes": [
616
- "sha256:02df5865f98ea9734a4d27ac7c33aad9a45c4015403d5c0797d3292ade3c5cb5",
617
- "sha256:d241436ac0edf64ec57fb5686f8d84a25998a7e52213d9063adf87df8432701f"
623
+ "sha256:1224e15b351a0f194bd5d908ccf4ff5d0e16b583f120519a5e68158bd44da071",
624
+ "sha256:cf9ab581198a12a5584571e0b2ad83869c7621684936ed26d7bf59015d0a8d2b"
618
625
  ],
619
626
  "index": "pypi",
620
627
  "markers": "python_version not in '2.7, 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7' and python_version >= '3.8'",
621
- "version": "==1.61.16"
628
+ "version": "==1.63.2"
622
629
  },
623
630
  "markdown-it-py": {
624
631
  "hashes": [
@@ -868,17 +875,26 @@
868
875
  "sha256:f4ca91d61a4bf61b0f2228f24bbfa6a9facd5f8af03759fe2a655c50ae2c6610",
869
876
  "sha256:f6b3dfc7661f8842babd8ea07e9897fe3d9b69a1d7e5fbb743e4160f9387833b"
870
877
  ],
871
- "markers": "python_version >= '3.10'",
878
+ "markers": "python_version == '3.11'",
872
879
  "version": "==2.2.3"
873
880
  },
874
881
  "openai": {
875
882
  "hashes": [
876
- "sha256:20f85cde9e95e9fbb416e3cb5a6d3119c0b28308afd6e3cc47bf100623dac623",
877
- "sha256:2861053538704d61340da56e2f176853d19f1dc5704bc306b7597155f850d57a"
883
+ "sha256:0b08c58625d556f5c6654701af1023689c173eb0989ce8f73c7fd0eb22203c76",
884
+ "sha256:15566d46574b94eae3d18efc2f9a4ebd1366d1d44bfc1bdafeea7a5cf8271bcb"
878
885
  ],
879
886
  "index": "pypi",
880
887
  "markers": "python_version >= '3.8'",
881
- "version": "==1.64.0"
888
+ "version": "==1.65.4"
889
+ },
890
+ "openpyxl": {
891
+ "hashes": [
892
+ "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2",
893
+ "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050"
894
+ ],
895
+ "index": "pypi",
896
+ "markers": "python_version >= '3.8'",
897
+ "version": "==3.1.5"
882
898
  },
883
899
  "packaging": {
884
900
  "hashes": [
@@ -1390,11 +1406,11 @@
1390
1406
  },
1391
1407
  "realtime": {
1392
1408
  "hashes": [
1393
- "sha256:0015219bb398edfdd5e993bc77a42424ed6d6890b7234a0114fe0de4d21e4f8b",
1394
- "sha256:4ffc61a9c0f8dbda7e6a48496254a018d5b2d90569f56d1d89c9618f56616c3b"
1409
+ "sha256:6aacfec1ca3519fbb87219ce250dee3b6797156f5a091eb48d0e19945bc6d103",
1410
+ "sha256:8e77616d8c721f0f17ea0a256f6b5cd6d626b0eb66b305544d5f330c3a6d9a4c"
1395
1411
  ],
1396
1412
  "markers": "python_version >= '3.9' and python_version < '4.0'",
1397
- "version": "==2.4.0"
1413
+ "version": "==2.4.1"
1398
1414
  },
1399
1415
  "referencing": {
1400
1416
  "hashes": [
@@ -1406,100 +1422,9 @@
1406
1422
  },
1407
1423
  "regex": {
1408
1424
  "hashes": [
1409
- "sha256:02a02d2bb04fec86ad61f3ea7f49c015a0681bf76abb9857f945d26159d2968c",
1410
- "sha256:02e28184be537f0e75c1f9b2f8847dc51e08e6e171c6bde130b2687e0c33cf60",
1411
- "sha256:040df6fe1a5504eb0f04f048e6d09cd7c7110fef851d7c567a6b6e09942feb7d",
1412
- "sha256:068376da5a7e4da51968ce4c122a7cd31afaaec4fccc7856c92f63876e57b51d",
1413
- "sha256:06eb1be98df10e81ebaded73fcd51989dcf534e3c753466e4b60c4697a003b67",
1414
- "sha256:072623554418a9911446278f16ecb398fb3b540147a7828c06e2011fa531e773",
1415
- "sha256:086a27a0b4ca227941700e0b31425e7a28ef1ae8e5e05a33826e17e47fbfdba0",
1416
- "sha256:08986dce1339bc932923e7d1232ce9881499a0e02925f7402fb7c982515419ef",
1417
- "sha256:0a86e7eeca091c09e021db8eb72d54751e527fa47b8d5787caf96d9831bd02ad",
1418
- "sha256:0c32f75920cf99fe6b6c539c399a4a128452eaf1af27f39bce8909c9a3fd8cbe",
1419
- "sha256:0d7f453dca13f40a02b79636a339c5b62b670141e63efd511d3f8f73fba162b3",
1420
- "sha256:1062b39a0a2b75a9c694f7a08e7183a80c63c0d62b301418ffd9c35f55aaa114",
1421
- "sha256:13291b39131e2d002a7940fb176e120bec5145f3aeb7621be6534e46251912c4",
1422
- "sha256:149f5008d286636e48cd0b1dd65018548944e495b0265b45e1bffecce1ef7f39",
1423
- "sha256:164d8b7b3b4bcb2068b97428060b2a53be050085ef94eca7f240e7947f1b080e",
1424
- "sha256:167ed4852351d8a750da48712c3930b031f6efdaa0f22fa1933716bfcd6bf4a3",
1425
- "sha256:1c4de13f06a0d54fa0d5ab1b7138bfa0d883220965a29616e3ea61b35d5f5fc7",
1426
- "sha256:202eb32e89f60fc147a41e55cb086db2a3f8cb82f9a9a88440dcfc5d37faae8d",
1427
- "sha256:220902c3c5cc6af55d4fe19ead504de80eb91f786dc102fbd74894b1551f095e",
1428
- "sha256:2b3361af3198667e99927da8b84c1b010752fa4b1115ee30beaa332cabc3ef1a",
1429
- "sha256:2c89a8cc122b25ce6945f0423dc1352cb9593c68abd19223eebbd4e56612c5b7",
1430
- "sha256:2d548dafee61f06ebdb584080621f3e0c23fff312f0de1afc776e2a2ba99a74f",
1431
- "sha256:2e34b51b650b23ed3354b5a07aab37034d9f923db2a40519139af34f485f77d0",
1432
- "sha256:32f9a4c643baad4efa81d549c2aadefaeba12249b2adc5af541759237eee1c54",
1433
- "sha256:3a51ccc315653ba012774efca4f23d1d2a8a8f278a6072e29c7147eee7da446b",
1434
- "sha256:3cde6e9f2580eb1665965ce9bf17ff4952f34f5b126beb509fee8f4e994f143c",
1435
- "sha256:40291b1b89ca6ad8d3f2b82782cc33807f1406cf68c8d440861da6304d8ffbbd",
1436
- "sha256:41758407fc32d5c3c5de163888068cfee69cb4c2be844e7ac517a52770f9af57",
1437
- "sha256:4181b814e56078e9b00427ca358ec44333765f5ca1b45597ec7446d3a1ef6e34",
1438
- "sha256:4f51f88c126370dcec4908576c5a627220da6c09d0bff31cfa89f2523843316d",
1439
- "sha256:50153825ee016b91549962f970d6a4442fa106832e14c918acd1c8e479916c4f",
1440
- "sha256:5056b185ca113c88e18223183aa1a50e66507769c9640a6ff75859619d73957b",
1441
- "sha256:5071b2093e793357c9d8b2929dfc13ac5f0a6c650559503bb81189d0a3814519",
1442
- "sha256:525eab0b789891ac3be914d36893bdf972d483fe66551f79d3e27146191a37d4",
1443
- "sha256:52fb28f528778f184f870b7cf8f225f5eef0a8f6e3778529bdd40c7b3920796a",
1444
1425
  "sha256:5478c6962ad548b54a591778e93cd7c456a7a29f8eca9c49e4f9a806dcc5d638",
1445
- "sha256:5670bce7b200273eee1840ef307bfa07cda90b38ae56e9a6ebcc9f50da9c469b",
1446
- "sha256:5704e174f8ccab2026bd2f1ab6c510345ae8eac818b613d7d73e785f1310f839",
1447
- "sha256:59dfe1ed21aea057a65c6b586afd2a945de04fc7db3de0a6e3ed5397ad491b07",
1448
- "sha256:5e7e351589da0850c125f1600a4c4ba3c722efefe16b297de54300f08d734fbf",
1449
- "sha256:63b13cfd72e9601125027202cad74995ab26921d8cd935c25f09c630436348ff",
1450
- "sha256:658f90550f38270639e83ce492f27d2c8d2cd63805c65a13a14d36ca126753f0",
1451
- "sha256:684d7a212682996d21ca12ef3c17353c021fe9de6049e19ac8481ec35574a70f",
1452
- "sha256:69ab78f848845569401469da20df3e081e6b5a11cb086de3eed1d48f5ed57c95",
1453
- "sha256:6f44ec28b1f858c98d3036ad5d7d0bfc568bdd7a74f9c24e25f41ef1ebfd81a4",
1454
- "sha256:70b7fa6606c2881c1db9479b0eaa11ed5dfa11c8d60a474ff0e095099f39d98e",
1455
- "sha256:764e71f22ab3b305e7f4c21f1a97e1526a25ebdd22513e251cf376760213da13",
1456
1426
  "sha256:7ab159b063c52a0333c884e4679f8d7a85112ee3078fe3d9004b2dd875585519",
1457
- "sha256:805e6b60c54bf766b251e94526ebad60b7de0c70f70a4e6210ee2891acb70bf2",
1458
- "sha256:8447d2d39b5abe381419319f942de20b7ecd60ce86f16a23b0698f22e1b70008",
1459
- "sha256:86fddba590aad9208e2fa8b43b4c098bb0ec74f15718bb6a704e3c63e2cef3e9",
1460
- "sha256:89d75e7293d2b3e674db7d4d9b1bee7f8f3d1609428e293771d1a962617150cc",
1461
- "sha256:93c0b12d3d3bc25af4ebbf38f9ee780a487e8bf6954c115b9f015822d3bb8e48",
1462
- "sha256:94d87b689cdd831934fa3ce16cc15cd65748e6d689f5d2b8f4f4df2065c9fa20",
1463
- "sha256:9714398225f299aa85267fd222f7142fcb5c769e73d7733344efc46f2ef5cf89",
1464
- "sha256:982e6d21414e78e1f51cf595d7f321dcd14de1f2881c5dc6a6e23bbbbd68435e",
1465
- "sha256:997d6a487ff00807ba810e0f8332c18b4eb8d29463cfb7c820dc4b6e7562d0cf",
1466
- "sha256:a03e02f48cd1abbd9f3b7e3586d97c8f7a9721c436f51a5245b3b9483044480b",
1467
- "sha256:a36fdf2af13c2b14738f6e973aba563623cb77d753bbbd8d414d18bfaa3105dd",
1468
- "sha256:a6ba92c0bcdf96cbf43a12c717eae4bc98325ca3730f6b130ffa2e3c3c723d84",
1469
- "sha256:a7c2155f790e2fb448faed6dd241386719802296ec588a8b9051c1f5c481bc29",
1470
- "sha256:a93c194e2df18f7d264092dc8539b8ffb86b45b899ab976aa15d48214138e81b",
1471
- "sha256:abfa5080c374a76a251ba60683242bc17eeb2c9818d0d30117b4486be10c59d3",
1472
- "sha256:ac10f2c4184420d881a3475fb2c6f4d95d53a8d50209a2500723d831036f7c45",
1473
- "sha256:ad182d02e40de7459b73155deb8996bbd8e96852267879396fb274e8700190e3",
1474
- "sha256:b2837718570f95dd41675328e111345f9b7095d821bac435aac173ac80b19983",
1475
- "sha256:b489578720afb782f6ccf2840920f3a32e31ba28a4b162e13900c3e6bd3f930e",
1476
- "sha256:b583904576650166b3d920d2bcce13971f6f9e9a396c673187f49811b2769dc7",
1477
- "sha256:b85c2530be953a890eaffde05485238f07029600e8f098cdf1848d414a8b45e4",
1478
- "sha256:b97c1e0bd37c5cd7902e65f410779d39eeda155800b65fc4d04cc432efa9bc6e",
1479
- "sha256:ba9b72e5643641b7d41fa1f6d5abda2c9a263ae835b917348fc3c928182ad467",
1480
- "sha256:bb26437975da7dc36b7efad18aa9dd4ea569d2357ae6b783bf1118dabd9ea577",
1481
- "sha256:bb8f74f2f10dbf13a0be8de623ba4f9491faf58c24064f32b65679b021ed0001",
1482
- "sha256:bde01f35767c4a7899b7eb6e823b125a64de314a8ee9791367c9a34d56af18d0",
1483
- "sha256:bec9931dfb61ddd8ef2ebc05646293812cb6b16b60cf7c9511a832b6f1854b55",
1484
- "sha256:c36f9b6f5f8649bb251a5f3f66564438977b7ef8386a52460ae77e6070d309d9",
1485
- "sha256:cdf58d0e516ee426a48f7b2c03a332a4114420716d55769ff7108c37a09951bf",
1486
- "sha256:d1cee317bfc014c2419a76bcc87f071405e3966da434e03e13beb45f8aced1a6",
1487
- "sha256:d22326fcdef5e08c154280b71163ced384b428343ae16a5ab2b3354aed12436e",
1488
- "sha256:d3660c82f209655a06b587d55e723f0b813d3a7db2e32e5e7dc64ac2a9e86fde",
1489
- "sha256:da8f5fc57d1933de22a9e23eec290a0d8a5927a5370d24bda9a6abe50683fe62",
1490
- "sha256:df951c5f4a1b1910f1a99ff42c473ff60f8225baa1cdd3539fe2819d9543e9df",
1491
- "sha256:e5364a4502efca094731680e80009632ad6624084aff9a23ce8c8c6820de3e51",
1492
- "sha256:ea1bfda2f7162605f6e8178223576856b3d791109f15ea99a9f95c16a7636fb5",
1493
- "sha256:f02f93b92358ee3f78660e43b4b0091229260c5d5c408d17d60bf26b6c900e86",
1494
- "sha256:f056bf21105c2515c32372bbc057f43eb02aae2fda61052e2f7622c801f0b4e2",
1495
- "sha256:f1ac758ef6aebfc8943560194e9fd0fa18bcb34d89fd8bd2af18183afd8da3a2",
1496
- "sha256:f2a19f302cd1ce5dd01a9099aaa19cae6173306d1302a43b627f62e21cf18ac0",
1497
- "sha256:f654882311409afb1d780b940234208a252322c24a93b442ca714d119e68086c",
1498
- "sha256:f65557897fc977a44ab205ea871b690adaef6b9da6afda4790a2484b04293a5f",
1499
- "sha256:f9d1e379028e0fc2ae3654bac3cbbef81bf3fd571272a42d56c24007979bafb6",
1500
- "sha256:fdabbfc59f2c6edba2a6622c647b716e34e8e3867e0ab975412c5c2f79b82da2",
1501
- "sha256:fdd6028445d2460f33136c55eeb1f601ab06d74cb3347132e1c24250187500d9",
1502
- "sha256:ff590880083d60acc0433f9c3f713c51f7ac6ebb9adf889c79a261ecf541aa91"
1427
+ "sha256:94d87b689cdd831934fa3ce16cc15cd65748e6d689f5d2b8f4f4df2065c9fa20"
1503
1428
  ],
1504
1429
  "markers": "python_version >= '3.8'",
1505
1430
  "version": "==2024.11.6"
@@ -1656,11 +1581,11 @@
1656
1581
  },
1657
1582
  "starlette": {
1658
1583
  "hashes": [
1659
- "sha256:2cbcba2a75806f8a41c722141486f37c28e30a0921c5f6fe4346cb0dcee1302f",
1660
- "sha256:dfb6d332576f136ec740296c7e8bb8c8a7125044e7c6da30744718880cdd059d"
1584
+ "sha256:913f0798bd90ba90a9156383bcf1350a17d6259451d0d8ee27fc0cf2db609038",
1585
+ "sha256:b359e4567456b28d473d0193f34c0de0ed49710d75ef183a74a5ce0499324f50"
1661
1586
  ],
1662
1587
  "markers": "python_version >= '3.9'",
1663
- "version": "==0.45.3"
1588
+ "version": "==0.46.0"
1664
1589
  },
1665
1590
  "storage3": {
1666
1591
  "hashes": [
@@ -1779,11 +1704,11 @@
1779
1704
  },
1780
1705
  "typer": {
1781
1706
  "hashes": [
1782
- "sha256:7994fb7b8155b64d3402518560648446072864beefd44aa2dc36972a5972e847",
1783
- "sha256:a0588c0a7fa68a1978a069818657778f86abe6ff5ea6abf472f940a08bfe4f0a"
1707
+ "sha256:46a499c6107d645a9c13f7ee46c5d5096cae6f5fc57dd11eccbbb9ae3e44ddfc",
1708
+ "sha256:ab2fab47533a813c49fe1f16b1a370fd5819099c00b119e0633df65f22144ba5"
1784
1709
  ],
1785
1710
  "markers": "python_version >= '3.7'",
1786
- "version": "==0.15.1"
1711
+ "version": "==0.15.2"
1787
1712
  },
1788
1713
  "typing-extensions": {
1789
1714
  "hashes": [
@@ -2231,7 +2156,7 @@
2231
2156
  "sha256:f4ca91d61a4bf61b0f2228f24bbfa6a9facd5f8af03759fe2a655c50ae2c6610",
2232
2157
  "sha256:f6b3dfc7661f8842babd8ea07e9897fe3d9b69a1d7e5fbb743e4160f9387833b"
2233
2158
  ],
2234
- "markers": "python_version >= '3.10'",
2159
+ "markers": "python_version == '3.11'",
2235
2160
  "version": "==2.2.3"
2236
2161
  },
2237
2162
  "packaging": {
@@ -2424,20 +2349,20 @@
2424
2349
  },
2425
2350
  "pydantic-settings": {
2426
2351
  "hashes": [
2427
- "sha256:88e2ca28f6e68ea102c99c3c401d6c9078e68a5df600e97b43891c34e089500a",
2428
- "sha256:c782c7dc3fb40e97b238e713c25d26f64314aece2e91abcff592fcac15f71820"
2352
+ "sha256:81942d5ac3d905f7f3ee1a70df5dfb62d5569c12f51a5a647defc1c3d9ee2e9c",
2353
+ "sha256:d5c663dfbe9db9d5e1c646b2e161da12f0d734d422ee56f567d0ea2cee4e8585"
2429
2354
  ],
2430
2355
  "markers": "python_version >= '3.8'",
2431
- "version": "==2.8.0"
2356
+ "version": "==2.8.1"
2432
2357
  },
2433
2358
  "pytest": {
2434
2359
  "hashes": [
2435
- "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6",
2436
- "sha256:965370d062bce11e73868e0335abac31b4d3de0e82f4007408d242b4f8610761"
2360
+ "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820",
2361
+ "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845"
2437
2362
  ],
2438
2363
  "index": "pypi",
2439
2364
  "markers": "python_version >= '3.8'",
2440
- "version": "==8.3.4"
2365
+ "version": "==8.3.5"
2441
2366
  },
2442
2367
  "pytest-asyncio": {
2443
2368
  "hashes": [
@@ -2542,100 +2467,9 @@
2542
2467
  },
2543
2468
  "regex": {
2544
2469
  "hashes": [
2545
- "sha256:02a02d2bb04fec86ad61f3ea7f49c015a0681bf76abb9857f945d26159d2968c",
2546
- "sha256:02e28184be537f0e75c1f9b2f8847dc51e08e6e171c6bde130b2687e0c33cf60",
2547
- "sha256:040df6fe1a5504eb0f04f048e6d09cd7c7110fef851d7c567a6b6e09942feb7d",
2548
- "sha256:068376da5a7e4da51968ce4c122a7cd31afaaec4fccc7856c92f63876e57b51d",
2549
- "sha256:06eb1be98df10e81ebaded73fcd51989dcf534e3c753466e4b60c4697a003b67",
2550
- "sha256:072623554418a9911446278f16ecb398fb3b540147a7828c06e2011fa531e773",
2551
- "sha256:086a27a0b4ca227941700e0b31425e7a28ef1ae8e5e05a33826e17e47fbfdba0",
2552
- "sha256:08986dce1339bc932923e7d1232ce9881499a0e02925f7402fb7c982515419ef",
2553
- "sha256:0a86e7eeca091c09e021db8eb72d54751e527fa47b8d5787caf96d9831bd02ad",
2554
- "sha256:0c32f75920cf99fe6b6c539c399a4a128452eaf1af27f39bce8909c9a3fd8cbe",
2555
- "sha256:0d7f453dca13f40a02b79636a339c5b62b670141e63efd511d3f8f73fba162b3",
2556
- "sha256:1062b39a0a2b75a9c694f7a08e7183a80c63c0d62b301418ffd9c35f55aaa114",
2557
- "sha256:13291b39131e2d002a7940fb176e120bec5145f3aeb7621be6534e46251912c4",
2558
- "sha256:149f5008d286636e48cd0b1dd65018548944e495b0265b45e1bffecce1ef7f39",
2559
- "sha256:164d8b7b3b4bcb2068b97428060b2a53be050085ef94eca7f240e7947f1b080e",
2560
- "sha256:167ed4852351d8a750da48712c3930b031f6efdaa0f22fa1933716bfcd6bf4a3",
2561
- "sha256:1c4de13f06a0d54fa0d5ab1b7138bfa0d883220965a29616e3ea61b35d5f5fc7",
2562
- "sha256:202eb32e89f60fc147a41e55cb086db2a3f8cb82f9a9a88440dcfc5d37faae8d",
2563
- "sha256:220902c3c5cc6af55d4fe19ead504de80eb91f786dc102fbd74894b1551f095e",
2564
- "sha256:2b3361af3198667e99927da8b84c1b010752fa4b1115ee30beaa332cabc3ef1a",
2565
- "sha256:2c89a8cc122b25ce6945f0423dc1352cb9593c68abd19223eebbd4e56612c5b7",
2566
- "sha256:2d548dafee61f06ebdb584080621f3e0c23fff312f0de1afc776e2a2ba99a74f",
2567
- "sha256:2e34b51b650b23ed3354b5a07aab37034d9f923db2a40519139af34f485f77d0",
2568
- "sha256:32f9a4c643baad4efa81d549c2aadefaeba12249b2adc5af541759237eee1c54",
2569
- "sha256:3a51ccc315653ba012774efca4f23d1d2a8a8f278a6072e29c7147eee7da446b",
2570
- "sha256:3cde6e9f2580eb1665965ce9bf17ff4952f34f5b126beb509fee8f4e994f143c",
2571
- "sha256:40291b1b89ca6ad8d3f2b82782cc33807f1406cf68c8d440861da6304d8ffbbd",
2572
- "sha256:41758407fc32d5c3c5de163888068cfee69cb4c2be844e7ac517a52770f9af57",
2573
- "sha256:4181b814e56078e9b00427ca358ec44333765f5ca1b45597ec7446d3a1ef6e34",
2574
- "sha256:4f51f88c126370dcec4908576c5a627220da6c09d0bff31cfa89f2523843316d",
2575
- "sha256:50153825ee016b91549962f970d6a4442fa106832e14c918acd1c8e479916c4f",
2576
- "sha256:5056b185ca113c88e18223183aa1a50e66507769c9640a6ff75859619d73957b",
2577
- "sha256:5071b2093e793357c9d8b2929dfc13ac5f0a6c650559503bb81189d0a3814519",
2578
- "sha256:525eab0b789891ac3be914d36893bdf972d483fe66551f79d3e27146191a37d4",
2579
- "sha256:52fb28f528778f184f870b7cf8f225f5eef0a8f6e3778529bdd40c7b3920796a",
2580
2470
  "sha256:5478c6962ad548b54a591778e93cd7c456a7a29f8eca9c49e4f9a806dcc5d638",
2581
- "sha256:5670bce7b200273eee1840ef307bfa07cda90b38ae56e9a6ebcc9f50da9c469b",
2582
- "sha256:5704e174f8ccab2026bd2f1ab6c510345ae8eac818b613d7d73e785f1310f839",
2583
- "sha256:59dfe1ed21aea057a65c6b586afd2a945de04fc7db3de0a6e3ed5397ad491b07",
2584
- "sha256:5e7e351589da0850c125f1600a4c4ba3c722efefe16b297de54300f08d734fbf",
2585
- "sha256:63b13cfd72e9601125027202cad74995ab26921d8cd935c25f09c630436348ff",
2586
- "sha256:658f90550f38270639e83ce492f27d2c8d2cd63805c65a13a14d36ca126753f0",
2587
- "sha256:684d7a212682996d21ca12ef3c17353c021fe9de6049e19ac8481ec35574a70f",
2588
- "sha256:69ab78f848845569401469da20df3e081e6b5a11cb086de3eed1d48f5ed57c95",
2589
- "sha256:6f44ec28b1f858c98d3036ad5d7d0bfc568bdd7a74f9c24e25f41ef1ebfd81a4",
2590
- "sha256:70b7fa6606c2881c1db9479b0eaa11ed5dfa11c8d60a474ff0e095099f39d98e",
2591
- "sha256:764e71f22ab3b305e7f4c21f1a97e1526a25ebdd22513e251cf376760213da13",
2592
2471
  "sha256:7ab159b063c52a0333c884e4679f8d7a85112ee3078fe3d9004b2dd875585519",
2593
- "sha256:805e6b60c54bf766b251e94526ebad60b7de0c70f70a4e6210ee2891acb70bf2",
2594
- "sha256:8447d2d39b5abe381419319f942de20b7ecd60ce86f16a23b0698f22e1b70008",
2595
- "sha256:86fddba590aad9208e2fa8b43b4c098bb0ec74f15718bb6a704e3c63e2cef3e9",
2596
- "sha256:89d75e7293d2b3e674db7d4d9b1bee7f8f3d1609428e293771d1a962617150cc",
2597
- "sha256:93c0b12d3d3bc25af4ebbf38f9ee780a487e8bf6954c115b9f015822d3bb8e48",
2598
- "sha256:94d87b689cdd831934fa3ce16cc15cd65748e6d689f5d2b8f4f4df2065c9fa20",
2599
- "sha256:9714398225f299aa85267fd222f7142fcb5c769e73d7733344efc46f2ef5cf89",
2600
- "sha256:982e6d21414e78e1f51cf595d7f321dcd14de1f2881c5dc6a6e23bbbbd68435e",
2601
- "sha256:997d6a487ff00807ba810e0f8332c18b4eb8d29463cfb7c820dc4b6e7562d0cf",
2602
- "sha256:a03e02f48cd1abbd9f3b7e3586d97c8f7a9721c436f51a5245b3b9483044480b",
2603
- "sha256:a36fdf2af13c2b14738f6e973aba563623cb77d753bbbd8d414d18bfaa3105dd",
2604
- "sha256:a6ba92c0bcdf96cbf43a12c717eae4bc98325ca3730f6b130ffa2e3c3c723d84",
2605
- "sha256:a7c2155f790e2fb448faed6dd241386719802296ec588a8b9051c1f5c481bc29",
2606
- "sha256:a93c194e2df18f7d264092dc8539b8ffb86b45b899ab976aa15d48214138e81b",
2607
- "sha256:abfa5080c374a76a251ba60683242bc17eeb2c9818d0d30117b4486be10c59d3",
2608
- "sha256:ac10f2c4184420d881a3475fb2c6f4d95d53a8d50209a2500723d831036f7c45",
2609
- "sha256:ad182d02e40de7459b73155deb8996bbd8e96852267879396fb274e8700190e3",
2610
- "sha256:b2837718570f95dd41675328e111345f9b7095d821bac435aac173ac80b19983",
2611
- "sha256:b489578720afb782f6ccf2840920f3a32e31ba28a4b162e13900c3e6bd3f930e",
2612
- "sha256:b583904576650166b3d920d2bcce13971f6f9e9a396c673187f49811b2769dc7",
2613
- "sha256:b85c2530be953a890eaffde05485238f07029600e8f098cdf1848d414a8b45e4",
2614
- "sha256:b97c1e0bd37c5cd7902e65f410779d39eeda155800b65fc4d04cc432efa9bc6e",
2615
- "sha256:ba9b72e5643641b7d41fa1f6d5abda2c9a263ae835b917348fc3c928182ad467",
2616
- "sha256:bb26437975da7dc36b7efad18aa9dd4ea569d2357ae6b783bf1118dabd9ea577",
2617
- "sha256:bb8f74f2f10dbf13a0be8de623ba4f9491faf58c24064f32b65679b021ed0001",
2618
- "sha256:bde01f35767c4a7899b7eb6e823b125a64de314a8ee9791367c9a34d56af18d0",
2619
- "sha256:bec9931dfb61ddd8ef2ebc05646293812cb6b16b60cf7c9511a832b6f1854b55",
2620
- "sha256:c36f9b6f5f8649bb251a5f3f66564438977b7ef8386a52460ae77e6070d309d9",
2621
- "sha256:cdf58d0e516ee426a48f7b2c03a332a4114420716d55769ff7108c37a09951bf",
2622
- "sha256:d1cee317bfc014c2419a76bcc87f071405e3966da434e03e13beb45f8aced1a6",
2623
- "sha256:d22326fcdef5e08c154280b71163ced384b428343ae16a5ab2b3354aed12436e",
2624
- "sha256:d3660c82f209655a06b587d55e723f0b813d3a7db2e32e5e7dc64ac2a9e86fde",
2625
- "sha256:da8f5fc57d1933de22a9e23eec290a0d8a5927a5370d24bda9a6abe50683fe62",
2626
- "sha256:df951c5f4a1b1910f1a99ff42c473ff60f8225baa1cdd3539fe2819d9543e9df",
2627
- "sha256:e5364a4502efca094731680e80009632ad6624084aff9a23ce8c8c6820de3e51",
2628
- "sha256:ea1bfda2f7162605f6e8178223576856b3d791109f15ea99a9f95c16a7636fb5",
2629
- "sha256:f02f93b92358ee3f78660e43b4b0091229260c5d5c408d17d60bf26b6c900e86",
2630
- "sha256:f056bf21105c2515c32372bbc057f43eb02aae2fda61052e2f7622c801f0b4e2",
2631
- "sha256:f1ac758ef6aebfc8943560194e9fd0fa18bcb34d89fd8bd2af18183afd8da3a2",
2632
- "sha256:f2a19f302cd1ce5dd01a9099aaa19cae6173306d1302a43b627f62e21cf18ac0",
2633
- "sha256:f654882311409afb1d780b940234208a252322c24a93b442ca714d119e68086c",
2634
- "sha256:f65557897fc977a44ab205ea871b690adaef6b9da6afda4790a2484b04293a5f",
2635
- "sha256:f9d1e379028e0fc2ae3654bac3cbbef81bf3fd571272a42d56c24007979bafb6",
2636
- "sha256:fdabbfc59f2c6edba2a6622c647b716e34e8e3867e0ab975412c5c2f79b82da2",
2637
- "sha256:fdd6028445d2460f33136c55eeb1f601ab06d74cb3347132e1c24250187500d9",
2638
- "sha256:ff590880083d60acc0433f9c3f713c51f7ac6ebb9adf889c79a261ecf541aa91"
2472
+ "sha256:94d87b689cdd831934fa3ce16cc15cd65748e6d689f5d2b8f4f4df2065c9fa20"
2639
2473
  ],
2640
2474
  "markers": "python_version >= '3.8'",
2641
2475
  "version": "==2024.11.6"
@@ -0,0 +1,62 @@
1
+ ---
2
+ title: Comparison
3
+ description: ""
4
+ ---
5
+
6
+ The comparison scorer is a default LLM judge scorer that returns the number of differences between `actual_output` and `expected_output` based on some criteria set by the user.
7
+ In practice, this scorer helps determine whether your LLM application produces **answers that are comparable to the expected output**.
8
+
9
+ ## Required Fields
10
+
11
+ The following represents the required fields for your `Example` and `ComparisonScorer`
12
+
13
+ **Example:**
14
+ - `input`
15
+ - `actual_output` - (the output from your LLM system)
16
+ - `expected_output` - (the gold standard you expect the LLM system to produce)
17
+
18
+ **ComparisonScorer:**
19
+ - `criteria` - (the criteria in which you want to compare the two outputs)
20
+ - `description` - (a description of the criteria)
21
+
22
+ ## Scorer Breakdown
23
+
24
+ The comparison scorer evaluates the `actual_output` against the `expected_output` using the specified `criteria` and `description`. The score is calculated as:
25
+
26
+ $$
27
+ \text{score} = \# \text{ of differences between } \text{actual\_output} \text{ and } \text{expected\_output}
28
+ $$
29
+
30
+ The threshold for the comparison scorer determines the acceptable number of differences between the two outputs. If the number of differences exceeds the threshold, the scorer will indicate failure. Conversely, if the number of differences is less than or equal to the threshold, the scorer will indicate success.
31
+
32
+ ## Sample Implementation
33
+
34
+ ```python comparison.py
35
+ from judgeval import JudgmentClient
36
+ from judgeval.data import Example
37
+ from judgeval.scorers import ComparisonScorer
38
+
39
+ example = Example(
40
+ input="Generate a poem about a field",
41
+ # Replace this with the input to your LLM system
42
+ actual_output="A field, kinda windy, with some flowers, stuff growing, and maybe a nice vibe. Petals do things, I guess? Like, they're there… and light exists, but whatever, it's fine."
43
+ # Replace this with the output from your LLM system
44
+ expected_output="A sunlit meadow, alive with whispers of wind, where daisies dance and hope begins again. Each petal holds a promise—bright, unbruised— a symphony of light that cannot be refused.",
45
+ # Replace this with the gold standard you expect the LLM system to produce
46
+ )
47
+
48
+ tone_scorer = ComparisonScorer(
49
+ threshold=2,
50
+ # Replace this with your own threshold for the comparison scorer
51
+ criteria="Tone and Style",
52
+ # Replace this with your own criteria for the comparison scorer
53
+ description="Tone is the attitude or emotional quality of language, while style is the structural and linguistic framework shaping how ideas are expressed—together, they define how a message feels and the way it's crafted.",
54
+ # Replace this with the description of the criteria (the more specific, the better)
55
+ )
56
+
57
+ results = client.run_evaluation(
58
+ examples=[example],
59
+ scorers=[tone_scorer],
60
+ model="gpt-4o",
61
+ )
62
+ ```
@@ -8,6 +8,7 @@ Scorers act as measurement tools for evaluating LLM systems based on specific cr
8
8
  `judgeval` comes with a set of **10+ built-in scorers** that you can easily start with, including:
9
9
  - [Answer Correctness](/evaluation/scorers/answer_correctness)
10
10
  - [Answer Relevancy](/evaluation/scorers/answer_relevancy)
11
+ - [Comparison](/evaluation/scorers/comparison)
11
12
  - [Contextual Precision](/evaluation/scorers/contextual_precision)
12
13
  - [Contextual Recall](/evaluation/scorers/contextual_recall)
13
14
  - [Contextual Relevancy](/evaluation/scorers/contextual_relevancy)
@@ -58,6 +58,7 @@
58
58
  "evaluation/scorers/introduction",
59
59
  "evaluation/scorers/answer_correctness",
60
60
  "evaluation/scorers/answer_relevancy",
61
+ "evaluation/scorers/comparison",
61
62
  "evaluation/scorers/contextual_precision",
62
63
  "evaluation/scorers/contextual_recall",
63
64
  "evaluation/scorers/contextual_relevancy",
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "judgeval"
3
- version = "0.0.16"
3
+ version = "0.0.18"
4
4
  authors = [
5
5
  { name="Andrew Li", email="andrew@judgmentlabs.ai" },
6
6
  { name="Alex Shan", email="alex@judgmentlabs.ai" },