judgeval 0.0.3__tar.gz → 0.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. {judgeval-0.0.3 → judgeval-0.0.5}/.github/workflows/ci.yaml +1 -0
  2. {judgeval-0.0.3 → judgeval-0.0.5}/.gitignore +1 -3
  3. {judgeval-0.0.3 → judgeval-0.0.5}/PKG-INFO +1 -1
  4. {judgeval-0.0.3 → judgeval-0.0.5}/Pipfile +3 -0
  5. judgeval-0.0.5/Pipfile.lock +2855 -0
  6. {judgeval-0.0.3 → judgeval-0.0.5}/docs/evaluation/introduction.mdx +18 -20
  7. judgeval-0.0.5/docs/evaluation/scorers/answer_correctness.mdx +56 -0
  8. {judgeval-0.0.3 → judgeval-0.0.5}/docs/evaluation/scorers/answer_relevancy.mdx +1 -1
  9. judgeval-0.0.5/docs/evaluation/scorers/classifier_scorer.mdx +90 -0
  10. {judgeval-0.0.3 → judgeval-0.0.5}/docs/evaluation/scorers/contextual_precision.mdx +1 -1
  11. {judgeval-0.0.3 → judgeval-0.0.5}/docs/evaluation/scorers/contextual_recall.mdx +1 -1
  12. {judgeval-0.0.3 → judgeval-0.0.5}/docs/evaluation/scorers/contextual_relevancy.mdx +1 -1
  13. {judgeval-0.0.3 → judgeval-0.0.5}/docs/evaluation/scorers/faithfulness.mdx +3 -4
  14. {judgeval-0.0.3 → judgeval-0.0.5}/docs/evaluation/scorers/hallucination.mdx +3 -4
  15. {judgeval-0.0.3 → judgeval-0.0.5}/docs/evaluation/scorers/introduction.mdx +1 -0
  16. {judgeval-0.0.3 → judgeval-0.0.5}/docs/evaluation/scorers/json_correctness.mdx +3 -4
  17. {judgeval-0.0.3 → judgeval-0.0.5}/docs/evaluation/scorers/summarization.mdx +3 -4
  18. {judgeval-0.0.3 → judgeval-0.0.5}/docs/evaluation/scorers/tool_correctness.mdx +3 -4
  19. {judgeval-0.0.3 → judgeval-0.0.5}/docs/getting_started.mdx +31 -46
  20. judgeval-0.0.5/docs/images/trace_screenshot.png +0 -0
  21. judgeval-0.0.5/docs/judgment/introduction.mdx +7 -0
  22. {judgeval-0.0.3 → judgeval-0.0.5}/docs/mint.json +9 -4
  23. judgeval-0.0.5/docs/monitoring/tracing.mdx +0 -0
  24. {judgeval-0.0.3 → judgeval-0.0.5}/pyproject.toml +1 -1
  25. judgeval-0.0.5/src/demo/cookbooks/langchain_basic_rag/basic_agentic_rag.ipynb +781 -0
  26. judgeval-0.0.5/src/demo/cookbooks/langchain_basic_rag/tesla_q3.pdf +0 -0
  27. judgeval-0.0.5/src/demo/cookbooks/langchain_sales/example_product_price_id_mapping.json +1 -0
  28. judgeval-0.0.5/src/demo/cookbooks/langchain_sales/sales_agent_with_context.ipynb +1375 -0
  29. judgeval-0.0.5/src/demo/cookbooks/langchain_sales/sample_product_catalog.txt +20 -0
  30. judgeval-0.0.5/src/demo/cookbooks/openai_travel_agent/agent.py +208 -0
  31. judgeval-0.0.5/src/demo/cookbooks/openai_travel_agent/populate_db.py +73 -0
  32. judgeval-0.0.5/src/judgeval/__init__.py +12 -0
  33. judgeval-0.0.5/src/judgeval/clients.py +30 -0
  34. {judgeval-0.0.3 → judgeval-0.0.5}/src/judgeval/common/tracer.py +57 -31
  35. {judgeval-0.0.3 → judgeval-0.0.5}/src/judgeval/constants.py +1 -0
  36. {judgeval-0.0.3 → judgeval-0.0.5}/src/judgeval/data/__init__.py +2 -1
  37. {judgeval-0.0.3 → judgeval-0.0.5}/src/judgeval/data/scorer_data.py +2 -2
  38. {judgeval-0.0.3 → judgeval-0.0.5}/src/judgeval/evaluation_run.py +16 -15
  39. {judgeval-0.0.3 → judgeval-0.0.5}/src/judgeval/judges/__init__.py +2 -2
  40. {judgeval-0.0.3 → judgeval-0.0.5}/src/judgeval/judges/base_judge.py +1 -1
  41. {judgeval-0.0.3 → judgeval-0.0.5}/src/judgeval/judges/litellm_judge.py +2 -2
  42. {judgeval-0.0.3 → judgeval-0.0.5}/src/judgeval/judges/mixture_of_judges.py +2 -2
  43. {judgeval-0.0.3 → judgeval-0.0.5}/src/judgeval/judges/together_judge.py +2 -2
  44. {judgeval-0.0.3 → judgeval-0.0.5}/src/judgeval/judges/utils.py +4 -4
  45. {judgeval-0.0.3 → judgeval-0.0.5}/src/judgeval/judgment_client.py +67 -15
  46. {judgeval-0.0.3 → judgeval-0.0.5}/src/judgeval/run_evaluation.py +79 -14
  47. {judgeval-0.0.3 → judgeval-0.0.5}/src/judgeval/scorers/__init__.py +8 -4
  48. judgeval-0.0.5/src/judgeval/scorers/api_scorer.py +64 -0
  49. {judgeval-0.0.3 → judgeval-0.0.5}/src/judgeval/scorers/base_scorer.py +3 -2
  50. judgeval-0.0.5/src/judgeval/scorers/exceptions.py +11 -0
  51. judgeval-0.0.3/src/judgeval/scorers/custom_scorer.py → judgeval-0.0.5/src/judgeval/scorers/judgeval_scorer.py +9 -5
  52. judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/__init__.py +144 -0
  53. judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +23 -0
  54. judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +19 -0
  55. {judgeval-0.0.3/src/judgeval/scorers/judgeval_scorers → judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/api_scorers}/answer_relevancy.py +2 -2
  56. {judgeval-0.0.3/src/judgeval/scorers/judgeval_scorers → judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/api_scorers}/contextual_precision.py +2 -2
  57. {judgeval-0.0.3/src/judgeval/scorers/judgeval_scorers → judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/api_scorers}/contextual_recall.py +2 -2
  58. {judgeval-0.0.3/src/judgeval/scorers/judgeval_scorers → judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/api_scorers}/contextual_relevancy.py +2 -2
  59. {judgeval-0.0.3/src/judgeval/scorers/judgeval_scorers → judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/api_scorers}/faithfulness.py +2 -2
  60. {judgeval-0.0.3/src/judgeval/scorers/judgeval_scorers → judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/api_scorers}/hallucination.py +2 -2
  61. {judgeval-0.0.3/src/judgeval/scorers/judgeval_scorers → judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/api_scorers}/json_correctness.py +7 -7
  62. {judgeval-0.0.3/src/judgeval/scorers/judgeval_scorers → judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/api_scorers}/summarization.py +2 -2
  63. {judgeval-0.0.3/src/judgeval/scorers/judgeval_scorers → judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/api_scorers}/tool_correctness.py +2 -2
  64. judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/local_implementations/__init__.py +24 -0
  65. judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/__init__.py +4 -0
  66. judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/answer_correctness_scorer.py +272 -0
  67. judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/prompts.py +169 -0
  68. judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/__init__.py +4 -0
  69. judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/answer_relevancy_scorer.py +292 -0
  70. judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/prompts.py +174 -0
  71. judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/__init__.py +3 -0
  72. judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/contextual_precision_scorer.py +259 -0
  73. judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/prompts.py +106 -0
  74. judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/__init__.py +3 -0
  75. judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/contextual_recall_scorer.py +249 -0
  76. judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/prompts.py +142 -0
  77. judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/__init__.py +3 -0
  78. judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/contextual_relevancy_scorer.py +240 -0
  79. judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/prompts.py +121 -0
  80. judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/__init__.py +3 -0
  81. judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/faithfulness_scorer.py +318 -0
  82. judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/prompts.py +265 -0
  83. judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/local_implementations/hallucination/__init__.py +3 -0
  84. judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/local_implementations/hallucination/hallucination_scorer.py +258 -0
  85. judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/local_implementations/hallucination/prompts.py +104 -0
  86. judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/json_correctness_scorer.py +127 -0
  87. judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/local_implementations/summarization/__init__.py +3 -0
  88. judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py +247 -0
  89. judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py +541 -0
  90. judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/__init__.py +3 -0
  91. judgeval-0.0.5/src/judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/tool_correctness_scorer.py +151 -0
  92. {judgeval-0.0.3 → judgeval-0.0.5}/src/judgeval/scorers/prompt_scorer.py +4 -4
  93. {judgeval-0.0.3 → judgeval-0.0.5}/src/judgeval/scorers/score.py +14 -14
  94. {judgeval-0.0.3 → judgeval-0.0.5}/src/judgeval/scorers/utils.py +40 -6
  95. judgeval-0.0.5/src/test.txt +51 -0
  96. judgeval-0.0.5/test.txt +0 -0
  97. judgeval-0.0.3/src/judgeval/__init__.py +0 -83
  98. judgeval-0.0.3/src/judgeval/clients.py +0 -19
  99. judgeval-0.0.3/src/judgeval/scorers/judgeval_scorers/__init__.py +0 -21
  100. {judgeval-0.0.3 → judgeval-0.0.5}/LICENSE.md +0 -0
  101. {judgeval-0.0.3 → judgeval-0.0.5}/README.md +0 -0
  102. {judgeval-0.0.3 → judgeval-0.0.5}/docs/README.md +0 -0
  103. {judgeval-0.0.3 → judgeval-0.0.5}/docs/development.mdx +0 -0
  104. {judgeval-0.0.3 → judgeval-0.0.5}/docs/essentials/code.mdx +0 -0
  105. {judgeval-0.0.3 → judgeval-0.0.5}/docs/essentials/images.mdx +0 -0
  106. {judgeval-0.0.3 → judgeval-0.0.5}/docs/essentials/markdown.mdx +0 -0
  107. {judgeval-0.0.3 → judgeval-0.0.5}/docs/essentials/navigation.mdx +0 -0
  108. {judgeval-0.0.3 → judgeval-0.0.5}/docs/essentials/reusable-snippets.mdx +0 -0
  109. {judgeval-0.0.3 → judgeval-0.0.5}/docs/essentials/settings.mdx +0 -0
  110. {judgeval-0.0.3 → judgeval-0.0.5}/docs/evaluation/data_datasets.mdx +0 -0
  111. {judgeval-0.0.3 → judgeval-0.0.5}/docs/evaluation/data_examples.mdx +0 -0
  112. {judgeval-0.0.3 → judgeval-0.0.5}/docs/evaluation/judges.mdx +0 -0
  113. {judgeval-0.0.3 → judgeval-0.0.5}/docs/evaluation/scorers/custom_scorers.mdx +0 -0
  114. {judgeval-0.0.3 → judgeval-0.0.5}/docs/favicon.svg +0 -0
  115. {judgeval-0.0.3 → judgeval-0.0.5}/docs/images/checks-passed.png +0 -0
  116. {judgeval-0.0.3 → judgeval-0.0.5}/docs/images/create_aggressive_scorer.png +0 -0
  117. {judgeval-0.0.3 → judgeval-0.0.5}/docs/images/create_scorer.png +0 -0
  118. {judgeval-0.0.3 → judgeval-0.0.5}/docs/images/evaluation_diagram.png +0 -0
  119. {judgeval-0.0.3 → judgeval-0.0.5}/docs/images/hero-dark.svg +0 -0
  120. {judgeval-0.0.3 → judgeval-0.0.5}/docs/images/hero-light.svg +0 -0
  121. {judgeval-0.0.3 → judgeval-0.0.5}/docs/introduction.mdx +0 -0
  122. {judgeval-0.0.3 → judgeval-0.0.5}/docs/logo/dark.svg +0 -0
  123. {judgeval-0.0.3 → judgeval-0.0.5}/docs/logo/light.svg +0 -0
  124. {judgeval-0.0.3/docs/judgment → judgeval-0.0.5/docs/monitoring}/introduction.mdx +0 -0
  125. /judgeval-0.0.3/docs/evaluation/scorers/classifier_scorer.mdx → /judgeval-0.0.5/docs/monitoring/production_insights.mdx +0 -0
  126. {judgeval-0.0.3 → judgeval-0.0.5}/docs/notebooks/create_dataset.ipynb +0 -0
  127. {judgeval-0.0.3 → judgeval-0.0.5}/docs/notebooks/create_scorer.ipynb +0 -0
  128. {judgeval-0.0.3 → judgeval-0.0.5}/docs/notebooks/demo.ipynb +0 -0
  129. {judgeval-0.0.3 → judgeval-0.0.5}/docs/notebooks/prompt_scorer.ipynb +0 -0
  130. {judgeval-0.0.3 → judgeval-0.0.5}/docs/notebooks/quickstart.ipynb +0 -0
  131. {judgeval-0.0.3 → judgeval-0.0.5}/docs/quickstart.mdx +0 -0
  132. {judgeval-0.0.3 → judgeval-0.0.5}/docs/snippets/snippet-intro.mdx +0 -0
  133. {judgeval-0.0.3 → judgeval-0.0.5}/pytest.ini +0 -0
  134. {judgeval-0.0.3 → judgeval-0.0.5}/src/judgeval/common/__init__.py +0 -0
  135. {judgeval-0.0.3 → judgeval-0.0.5}/src/judgeval/common/exceptions.py +0 -0
  136. {judgeval-0.0.3 → judgeval-0.0.5}/src/judgeval/common/logger.py +0 -0
  137. {judgeval-0.0.3 → judgeval-0.0.5}/src/judgeval/common/utils.py +0 -0
  138. {judgeval-0.0.3 → judgeval-0.0.5}/src/judgeval/data/api_example.py +0 -0
  139. {judgeval-0.0.3 → judgeval-0.0.5}/src/judgeval/data/datasets/__init__.py +0 -0
  140. {judgeval-0.0.3 → judgeval-0.0.5}/src/judgeval/data/datasets/dataset.py +0 -0
  141. {judgeval-0.0.3 → judgeval-0.0.5}/src/judgeval/data/datasets/ground_truth.py +0 -0
  142. {judgeval-0.0.3 → judgeval-0.0.5}/src/judgeval/data/datasets/utils.py +0 -0
  143. {judgeval-0.0.3 → judgeval-0.0.5}/src/judgeval/data/example.py +0 -0
  144. {judgeval-0.0.3 → judgeval-0.0.5}/src/judgeval/data/result.py +0 -0
@@ -40,4 +40,5 @@ jobs:
40
40
 
41
41
  - name: Run tests
42
42
  run: |
43
+ cd src
43
44
  pipenv run pytest
@@ -8,6 +8,7 @@ __pycache__/
8
8
 
9
9
  # Testing files for competitor packages
10
10
  demo/test_competitors.py
11
+ src/e2etests/customer_usecases/
11
12
 
12
13
  # Packages
13
14
  *.egg
@@ -105,8 +106,5 @@ test-results.xml
105
106
  # Encrypted files
106
107
  *.key
107
108
 
108
- # Custom
109
- Pipfile.lock
110
-
111
109
  # Logs
112
110
  ./logs
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: judgeval
3
- Version: 0.0.3
3
+ Version: 0.0.5
4
4
  Summary: Judgeval Package
5
5
  Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
6
6
  Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -16,6 +16,9 @@ openai = "*"
16
16
  together = "*"
17
17
  anthropic = "*"
18
18
  patronus = "*"
19
+ asyncio = "*"
20
+ nest-asyncio = "*"
21
+ tavily-python = "*"
19
22
 
20
23
  [dev-packages]
21
24
  pytest = "*"