judgeval 0.0.13__tar.gz → 0.0.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. {judgeval-0.0.13 → judgeval-0.0.15}/PKG-INFO +1 -1
  2. {judgeval-0.0.13 → judgeval-0.0.15}/docs/evaluation/introduction.mdx +1 -1
  3. {judgeval-0.0.13 → judgeval-0.0.15}/docs/evaluation/judges.mdx +1 -1
  4. {judgeval-0.0.13 → judgeval-0.0.15}/docs/getting_started.mdx +1 -1
  5. {judgeval-0.0.13 → judgeval-0.0.15}/pyproject.toml +1 -1
  6. judgeval-0.0.15/src/demo/cookbooks/anime_chatbot_agent/animeChatBot.py +443 -0
  7. {judgeval-0.0.13 → judgeval-0.0.15}/src/demo/cookbooks/openai_travel_agent/agent.py +2 -2
  8. judgeval-0.0.15/src/demo/cookbooks/rules_alerts/rules_bot.py +132 -0
  9. judgeval-0.0.15/src/demo/cookbooks/rules_alerts/rules_demo.py +351 -0
  10. judgeval-0.0.15/src/demo/cookbooks/rules_alerts/utils_helper.py +78 -0
  11. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/common/tracer.py +126 -59
  12. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/common/utils.py +12 -13
  13. judgeval-0.0.15/src/judgeval/constants.py +121 -0
  14. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/data/datasets/dataset.py +3 -2
  15. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/data/datasets/eval_dataset_client.py +25 -14
  16. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/data/example.py +8 -1
  17. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/evaluation_run.py +9 -0
  18. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/judges/together_judge.py +1 -1
  19. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/judges/utils.py +1 -1
  20. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/judgment_client.py +163 -28
  21. judgeval-0.0.15/src/judgeval/rules.py +384 -0
  22. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/run_evaluation.py +32 -14
  23. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/api_scorer.py +11 -12
  24. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/base_scorer.py +1 -1
  25. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/answer_relevancy_scorer.py +0 -1
  26. judgeval-0.0.15/src/judgeval/utils/alerts.py +43 -0
  27. judgeval-0.0.13/src/judgeval/constants.py +0 -70
  28. {judgeval-0.0.13 → judgeval-0.0.15}/.github/workflows/ci.yaml +0 -0
  29. {judgeval-0.0.13 → judgeval-0.0.15}/.gitignore +0 -0
  30. {judgeval-0.0.13 → judgeval-0.0.15}/LICENSE.md +0 -0
  31. {judgeval-0.0.13 → judgeval-0.0.15}/Pipfile +0 -0
  32. {judgeval-0.0.13 → judgeval-0.0.15}/Pipfile.lock +0 -0
  33. {judgeval-0.0.13 → judgeval-0.0.15}/README.md +0 -0
  34. {judgeval-0.0.13 → judgeval-0.0.15}/docs/README.md +0 -0
  35. {judgeval-0.0.13 → judgeval-0.0.15}/docs/api_reference/judgment_client.mdx +0 -0
  36. {judgeval-0.0.13 → judgeval-0.0.15}/docs/api_reference/trace.mdx +0 -0
  37. {judgeval-0.0.13 → judgeval-0.0.15}/docs/development.mdx +0 -0
  38. {judgeval-0.0.13 → judgeval-0.0.15}/docs/essentials/code.mdx +0 -0
  39. {judgeval-0.0.13 → judgeval-0.0.15}/docs/essentials/images.mdx +0 -0
  40. {judgeval-0.0.13 → judgeval-0.0.15}/docs/essentials/markdown.mdx +0 -0
  41. {judgeval-0.0.13 → judgeval-0.0.15}/docs/essentials/navigation.mdx +0 -0
  42. {judgeval-0.0.13 → judgeval-0.0.15}/docs/essentials/reusable-snippets.mdx +0 -0
  43. {judgeval-0.0.13 → judgeval-0.0.15}/docs/essentials/settings.mdx +0 -0
  44. {judgeval-0.0.13 → judgeval-0.0.15}/docs/evaluation/data_datasets.mdx +0 -0
  45. {judgeval-0.0.13 → judgeval-0.0.15}/docs/evaluation/data_examples.mdx +0 -0
  46. {judgeval-0.0.13 → judgeval-0.0.15}/docs/evaluation/scorers/answer_correctness.mdx +0 -0
  47. {judgeval-0.0.13 → judgeval-0.0.15}/docs/evaluation/scorers/answer_relevancy.mdx +0 -0
  48. {judgeval-0.0.13 → judgeval-0.0.15}/docs/evaluation/scorers/classifier_scorer.mdx +0 -0
  49. {judgeval-0.0.13 → judgeval-0.0.15}/docs/evaluation/scorers/contextual_precision.mdx +0 -0
  50. {judgeval-0.0.13 → judgeval-0.0.15}/docs/evaluation/scorers/contextual_recall.mdx +0 -0
  51. {judgeval-0.0.13 → judgeval-0.0.15}/docs/evaluation/scorers/contextual_relevancy.mdx +0 -0
  52. {judgeval-0.0.13 → judgeval-0.0.15}/docs/evaluation/scorers/custom_scorers.mdx +0 -0
  53. {judgeval-0.0.13 → judgeval-0.0.15}/docs/evaluation/scorers/faithfulness.mdx +0 -0
  54. {judgeval-0.0.13 → judgeval-0.0.15}/docs/evaluation/scorers/hallucination.mdx +0 -0
  55. {judgeval-0.0.13 → judgeval-0.0.15}/docs/evaluation/scorers/introduction.mdx +0 -0
  56. {judgeval-0.0.13 → judgeval-0.0.15}/docs/evaluation/scorers/json_correctness.mdx +0 -0
  57. {judgeval-0.0.13 → judgeval-0.0.15}/docs/evaluation/scorers/summarization.mdx +0 -0
  58. {judgeval-0.0.13 → judgeval-0.0.15}/docs/evaluation/scorers/tool_correctness.mdx +0 -0
  59. {judgeval-0.0.13 → judgeval-0.0.15}/docs/evaluation/unit_testing.mdx +0 -0
  60. {judgeval-0.0.13 → judgeval-0.0.15}/docs/favicon.svg +0 -0
  61. {judgeval-0.0.13 → judgeval-0.0.15}/docs/images/basic_trace_example.png +0 -0
  62. {judgeval-0.0.13 → judgeval-0.0.15}/docs/images/checks-passed.png +0 -0
  63. {judgeval-0.0.13 → judgeval-0.0.15}/docs/images/create_aggressive_scorer.png +0 -0
  64. {judgeval-0.0.13 → judgeval-0.0.15}/docs/images/create_scorer.png +0 -0
  65. {judgeval-0.0.13 → judgeval-0.0.15}/docs/images/evaluation_diagram.png +0 -0
  66. {judgeval-0.0.13 → judgeval-0.0.15}/docs/images/hero-dark.svg +0 -0
  67. {judgeval-0.0.13 → judgeval-0.0.15}/docs/images/hero-light.svg +0 -0
  68. {judgeval-0.0.13 → judgeval-0.0.15}/docs/images/trace_screenshot.png +0 -0
  69. {judgeval-0.0.13 → judgeval-0.0.15}/docs/introduction.mdx +0 -0
  70. {judgeval-0.0.13 → judgeval-0.0.15}/docs/judgment/introduction.mdx +0 -0
  71. {judgeval-0.0.13 → judgeval-0.0.15}/docs/logo/dark.svg +0 -0
  72. {judgeval-0.0.13 → judgeval-0.0.15}/docs/logo/light.svg +0 -0
  73. {judgeval-0.0.13 → judgeval-0.0.15}/docs/mint.json +0 -0
  74. {judgeval-0.0.13 → judgeval-0.0.15}/docs/monitoring/introduction.mdx +0 -0
  75. {judgeval-0.0.13 → judgeval-0.0.15}/docs/monitoring/production_insights.mdx +0 -0
  76. {judgeval-0.0.13 → judgeval-0.0.15}/docs/monitoring/tracing.mdx +0 -0
  77. {judgeval-0.0.13 → judgeval-0.0.15}/docs/notebooks/create_dataset.ipynb +0 -0
  78. {judgeval-0.0.13 → judgeval-0.0.15}/docs/notebooks/create_scorer.ipynb +0 -0
  79. {judgeval-0.0.13 → judgeval-0.0.15}/docs/notebooks/demo.ipynb +0 -0
  80. {judgeval-0.0.13 → judgeval-0.0.15}/docs/notebooks/prompt_scorer.ipynb +0 -0
  81. {judgeval-0.0.13 → judgeval-0.0.15}/docs/notebooks/quickstart.ipynb +0 -0
  82. {judgeval-0.0.13 → judgeval-0.0.15}/docs/quickstart.mdx +0 -0
  83. {judgeval-0.0.13 → judgeval-0.0.15}/docs/snippets/snippet-intro.mdx +0 -0
  84. {judgeval-0.0.13 → judgeval-0.0.15}/pytest.ini +0 -0
  85. {judgeval-0.0.13 → judgeval-0.0.15}/src/demo/cookbooks/ci_testing/ci_testing.py +0 -0
  86. {judgeval-0.0.13 → judgeval-0.0.15}/src/demo/cookbooks/ci_testing/travel_response.txt +0 -0
  87. {judgeval-0.0.13 → judgeval-0.0.15}/src/demo/cookbooks/custom_scorers/competitor_mentions.py +0 -0
  88. {judgeval-0.0.13 → judgeval-0.0.15}/src/demo/cookbooks/custom_scorers/text2sql.py +0 -0
  89. {judgeval-0.0.13 → judgeval-0.0.15}/src/demo/cookbooks/langchain_basic_rag/basic_agentic_rag.ipynb +0 -0
  90. {judgeval-0.0.13 → judgeval-0.0.15}/src/demo/cookbooks/langchain_basic_rag/tesla_q3.pdf +0 -0
  91. {judgeval-0.0.13 → judgeval-0.0.15}/src/demo/cookbooks/langchain_sales/example_product_price_id_mapping.json +0 -0
  92. {judgeval-0.0.13 → judgeval-0.0.15}/src/demo/cookbooks/langchain_sales/sales_agent_with_context.ipynb +0 -0
  93. {judgeval-0.0.13 → judgeval-0.0.15}/src/demo/cookbooks/langchain_sales/sample_product_catalog.txt +0 -0
  94. {judgeval-0.0.13 → judgeval-0.0.15}/src/demo/cookbooks/new_bot/basic_bot.py +0 -0
  95. {judgeval-0.0.13 → judgeval-0.0.15}/src/demo/cookbooks/openai_travel_agent/populate_db.py +0 -0
  96. {judgeval-0.0.13 → judgeval-0.0.15}/src/demo/cookbooks/openai_travel_agent/tools.py +0 -0
  97. {judgeval-0.0.13 → judgeval-0.0.15}/src/demo/customer_use/cstone/basic_test.py +0 -0
  98. {judgeval-0.0.13 → judgeval-0.0.15}/src/demo/customer_use/cstone/cstone_data.csv +0 -0
  99. {judgeval-0.0.13 → judgeval-0.0.15}/src/demo/customer_use/cstone/data.csv +0 -0
  100. {judgeval-0.0.13 → judgeval-0.0.15}/src/demo/customer_use/cstone/faithfulness_testing.py +0 -0
  101. {judgeval-0.0.13 → judgeval-0.0.15}/src/demo/customer_use/cstone/galen_data.csv +0 -0
  102. {judgeval-0.0.13 → judgeval-0.0.15}/src/demo/customer_use/cstone/playground.py +0 -0
  103. {judgeval-0.0.13 → judgeval-0.0.15}/src/demo/customer_use/cstone/results.csv +0 -0
  104. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/__init__.py +0 -0
  105. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/clients.py +0 -0
  106. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/common/__init__.py +0 -0
  107. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/common/exceptions.py +0 -0
  108. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/common/logger.py +0 -0
  109. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/data/__init__.py +0 -0
  110. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/data/api_example.py +0 -0
  111. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/data/datasets/__init__.py +0 -0
  112. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/data/datasets/ground_truth.py +0 -0
  113. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/data/datasets/utils.py +0 -0
  114. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/data/result.py +0 -0
  115. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/data/scorer_data.py +0 -0
  116. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/judges/__init__.py +0 -0
  117. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/judges/base_judge.py +0 -0
  118. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/judges/litellm_judge.py +0 -0
  119. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/judges/mixture_of_judges.py +0 -0
  120. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/__init__.py +0 -0
  121. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/exceptions.py +0 -0
  122. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorer.py +0 -0
  123. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/__init__.py +0 -0
  124. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -0
  125. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +0 -0
  126. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +0 -0
  127. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py +0 -0
  128. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py +0 -0
  129. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py +0 -0
  130. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +0 -0
  131. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -0
  132. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py +0 -0
  133. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/api_scorers/summarization.py +0 -0
  134. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/api_scorers/tool_correctness.py +0 -0
  135. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/classifiers/__init__.py +0 -0
  136. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py +0 -0
  137. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py +0 -0
  138. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/local_implementations/__init__.py +0 -0
  139. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/__init__.py +0 -0
  140. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/answer_correctness_scorer.py +0 -0
  141. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/prompts.py +0 -0
  142. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/__init__.py +0 -0
  143. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/prompts.py +0 -0
  144. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/__init__.py +0 -0
  145. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/contextual_precision_scorer.py +0 -0
  146. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/prompts.py +0 -0
  147. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/__init__.py +0 -0
  148. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/contextual_recall_scorer.py +0 -0
  149. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/prompts.py +0 -0
  150. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/__init__.py +0 -0
  151. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/contextual_relevancy_scorer.py +0 -0
  152. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/prompts.py +0 -0
  153. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/__init__.py +0 -0
  154. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/faithfulness_scorer.py +0 -0
  155. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/prompts.py +0 -0
  156. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/local_implementations/hallucination/__init__.py +0 -0
  157. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/local_implementations/hallucination/hallucination_scorer.py +0 -0
  158. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/local_implementations/hallucination/prompts.py +0 -0
  159. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/__init__.py +0 -0
  160. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/json_correctness_scorer.py +0 -0
  161. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/local_implementations/summarization/__init__.py +0 -0
  162. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py +0 -0
  163. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py +0 -0
  164. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/__init__.py +0 -0
  165. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/tool_correctness_scorer.py +0 -0
  166. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/prompt_scorer.py +0 -0
  167. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/score.py +0 -0
  168. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/scorers/utils.py +0 -0
  169. {judgeval-0.0.13 → judgeval-0.0.15}/src/judgeval/tracer/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: judgeval
3
- Version: 0.0.13
3
+ Version: 0.0.15
4
4
  Summary: Judgeval Package
5
5
  Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
6
6
  Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -79,7 +79,7 @@ scorer = FaithfulnessScorer(threshold=0.5)
79
79
  results = client.evaluate_dataset(
80
80
  dataset=dataset,
81
81
  scorers=[scorer],
82
- model="QWEN",
82
+ model="Qwen/Qwen2.5-72B-Instruct-Turbo",
83
83
  )
84
84
  ```
85
85
 
@@ -40,7 +40,7 @@ To use an open-source judge model, you simply pass the model name to the `model`
40
40
  results = client.run_evaluation(
41
41
  examples=[example1, ...],
42
42
  scorers=[AnswerRelevancyScorer(threshold=0.5), ...]
43
- model="QWEN" # or any other open-source model name
43
+ model="Qwen/Qwen2.5-72B-Instruct-Turbo" # or any other open-source model name
44
44
  )
45
45
  ```
46
46
 
@@ -255,7 +255,7 @@ scorer = FaithfulnessScorer(threshold=0.5)
255
255
  results = client.evaluate_dataset(
256
256
  dataset=dataset,
257
257
  scorers=[scorer],
258
- model="QWEN",
258
+ model="Qwen/Qwen2.5-72B-Instruct-Turbo",
259
259
  )
260
260
  ```
261
261
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "judgeval"
3
- version = "0.0.13"
3
+ version = "0.0.15"
4
4
  authors = [
5
5
  { name="Andrew Li", email="andrew@judgmentlabs.ai" },
6
6
  { name="Alex Shan", email="alex@judgmentlabs.ai" },
@@ -0,0 +1,443 @@
1
+ import os
2
+ import re
3
+ import requests
4
+ import asyncio
5
+ import json
6
+ from typing import TypedDict, List
7
+ from dotenv import load_dotenv
8
+
9
+ import chromadb
10
+ from chromadb.utils import embedding_functions
11
+ from langgraph.graph import StateGraph, START, END
12
+ from langgraph.checkpoint.memory import MemorySaver
13
+ from langchain_community.tools.tavily_search import TavilySearchResults
14
+ from tavily import TavilyClient
15
+ from langchain.schema import Document
16
+ from openai import OpenAI
17
+
18
+ from judgeval.common.tracer import Tracer, wrap
19
+ from judgeval import JudgmentClient
20
+ from judgeval.data import Example
21
+ from judgeval.scorers import FaithfulnessScorer, AnswerRelevancyScorer
22
+
23
+ load_dotenv()
24
+
25
+ client = wrap(OpenAI(api_key=os.getenv("OPENAI_API_KEY")))
26
+ judgment = Tracer(
27
+ api_key=os.getenv("JUDGMENT_API_KEY"),
28
+ organization_id=os.getenv("JUDGMENT_ORG_ID"),
29
+ project_name="anime_chatbot"
30
+ )
31
+
32
+ # Setup Chroma and embeddings
33
+ chroma_client = chromadb.Client()
34
+ embedding_fn = embedding_functions.OpenAIEmbeddingFunction(
35
+ api_key=os.getenv("OPENAI_API_KEY"),
36
+ model_name="text-embedding-ada-002"
37
+ )
38
+ collection = chroma_client.get_or_create_collection(
39
+ name="anime_data",
40
+ embedding_function=embedding_fn
41
+ )
42
+
43
+ # Define the shape of our state
44
+ class ChatState(TypedDict):
45
+ query: str
46
+ refined_query: str
47
+ retrieved_info: List[str] # Retrieved text from Chroma, Jikan, or web
48
+ final_answer: str # Final answer to show the user
49
+ next_node: str # Data source decision for routing
50
+ attempt_count: int # Number of attempts made
51
+ retry_flag: bool # Flag indicating whether to retry
52
+ node_decision: str # Stores the chosen data source
53
+
54
+ # Node Functions
55
+
56
+ @judgment.observe(span_type="LLM decision")
57
+ def decision_node(state: ChatState) -> ChatState:
58
+ """
59
+ Select the best data source for answering an anime query and refine the query if necessary.
60
+
61
+ If a previous attempt failed (retry_flag True), include feedback in the prompt.
62
+ The LLM returns a JSON with keys 'chosen_node' and 'refined_query'.
63
+ """
64
+ state["attempt_count"] += 1
65
+ query = state["query"]
66
+ refined_query = state["refined_query"]
67
+
68
+ feedback = ""
69
+ if state.get("retry_flag", False):
70
+ feedback = (f"Previously, you chose to search {state['node_decision']} using refined_query: {refined_query} failed. "
71
+ f"Please do not choose {state['node_decision']} and use a different query. Previous attempt feedback: {state['final_answer']}. "
72
+ "Incorporate some of these keywords to refine the query.")
73
+
74
+ prompt = (
75
+ "You have three available data sources:\n"
76
+ "1. 'vector': A Chroma vector database populated with the top 300 anime information.\n"
77
+ "2. 'jikan': The Jikan API that returns detailed information about a specific anime (for summarization tasks).\n"
78
+ "3. 'web': A web search tool that returns recent anime news articles.\n\n"
79
+ "Based on the user's query, decide which data source is most likely to return useful results. "
80
+ "If the query might not yield good results from that source, provide a refined version of the query "
81
+ "that is more specific or likely to produce results.\n\n"
82
+ "For example, the query 'recommend me an anime that prominently swords' is poor because irrelevant keywords like "
83
+ "'recommend' may be considered, especially when querying the vector database. "
84
+ "If a user asks for suggestions or similar anime based on thematic content, choose 'vector'. "
85
+ "Also, overly detailed queries for the Jikan API may yield poor results due to extraneous wording.\n\n"
86
+ f"User Query: {query}\n\n"
87
+ f"{feedback}\n\n"
88
+ "Return your answer in JSON format with exactly two keys:\n"
89
+ " - 'chosen_node': one of 'vector', 'jikan', or 'web'\n"
90
+ " - 'refined_query': the refined version of the user's query\n\n"
91
+ "Ensure the JSON is valid and contains only these two keys."
92
+ )
93
+
94
+ try:
95
+ response = client.chat.completions.create(
96
+ model="gpt-4",
97
+ messages=[
98
+ {"role": "system", "content": "You are a helpful assistant that selects the best data source for answering anime queries and refines queries when necessary."},
99
+ {"role": "user", "content": prompt}
100
+ ]
101
+ )
102
+ content = response.choices[0].message.content.strip()
103
+ parsed = json.loads(content)
104
+ chosen_node = parsed.get("chosen_node", "web").strip().lower()
105
+ refined_query = parsed.get("refined_query", query).strip()
106
+ except Exception as e:
107
+ print(f"DecisionNode: Error parsing LLM response: {e}")
108
+ chosen_node = "web"
109
+ refined_query = query
110
+
111
+ state["next_node"] = chosen_node
112
+ state["refined_query"] = refined_query
113
+ state["node_decision"] = chosen_node
114
+ print(f"DecisionNode: Chosen node: {chosen_node}, Refined query: {refined_query}")
115
+
116
+ judgment.get_current_trace().async_evaluate(
117
+ scorers=[AnswerRelevancyScorer(threshold=0.5)],
118
+ input=prompt,
119
+ actual_output=content,
120
+ model="gpt-4",
121
+ )
122
+
123
+ return state
124
+
125
+ @judgment.observe(span_type="retriever")
126
+ def anime_vector_node(state: ChatState) -> ChatState:
127
+ """
128
+ Perform a similarity search using the Chroma vector store and populate retrieved_info.
129
+ """
130
+ query = state["query"]
131
+ # print("AnimeRecommendationNode: vector search for recommendations.")
132
+ try:
133
+ results = collection.query(query_texts=[query], n_results=3)
134
+ docs = results.get("documents", [[]])[0]
135
+ if not docs:
136
+ state["retrieved_info"] = ["No similar anime found for your request."]
137
+ else:
138
+ state["retrieved_info"] = [f"RECOMMEND DOC: {d[:300]}" for d in docs]
139
+ except Exception as e:
140
+ state["retrieved_info"] = [f"Error retrieving from DB: {e}"]
141
+
142
+
143
+ judgment.get_current_trace().async_evaluate(
144
+ scorers=[AnswerRelevancyScorer(threshold=0.5)],
145
+ input=query,
146
+ actual_output=state["retrieved_info"],
147
+ model="gpt-4",
148
+ )
149
+ return state
150
+
151
+ @judgment.observe(span_type="API call")
152
+ def anime_jikan_node(state: ChatState) -> ChatState:
153
+ """
154
+ Fetch detailed anime information from the Jikan API using the query.
155
+ Considers the first 10 results and populates retrieved_info.
156
+ """
157
+ query = state["query"]
158
+ # print("AnimeDetailNode: fetching info from Jikan.")
159
+ url = f"https://api.jikan.moe/v4/anime?q={query}"
160
+ try:
161
+ resp = requests.get(url, timeout=10)
162
+ resp.raise_for_status()
163
+ data = resp.json()
164
+ anime_data = data.get("data", [])
165
+ if not anime_data:
166
+ state["retrieved_info"] = [f"No anime details found for '{query}'."]
167
+ return state
168
+
169
+ results = []
170
+ for anime in anime_data[:10]:
171
+ title = anime.get("title", "Unknown Title")
172
+ synopsis = anime.get("synopsis", "No synopsis available.")
173
+ combined = f"Title: {title}\nSynopsis: {synopsis}"
174
+ results.append(combined)
175
+ state["retrieved_info"] = results
176
+ except Exception as e:
177
+ state["retrieved_info"] = [f"Error fetching details for '{query}': {str(e)}"]
178
+
179
+ judgment.get_current_trace().async_evaluate(
180
+ scorers=[AnswerRelevancyScorer(threshold=0.5)],
181
+ input=query,
182
+ actual_output=state["retrieved_info"],
183
+ model="gpt-4",
184
+ )
185
+ return state
186
+
187
+ @judgment.observe(span_type="web search")
188
+ def anime_web_node(state: ChatState) -> ChatState:
189
+ """
190
+ Fetch recent anime news articles using the Tavily web search tool.
191
+ """
192
+ TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
193
+ os.environ["TAVILY_API_KEY"] = TAVILY_API_KEY
194
+
195
+ query = state.get("refined_query", state["query"])
196
+ try:
197
+ web_search_tool = TavilySearchResults(k=10, tavily_api_key=TAVILY_API_KEY)
198
+ results = web_search_tool.invoke({"query": query})
199
+ docs = []
200
+ for result in results:
201
+ content = result.get("content", "")
202
+ title = result.get("title", "No Title")
203
+ docs.append(f"{title}: {content[:500]}")
204
+ if not docs:
205
+ state["retrieved_info"] = [f"No news articles found for '{query}'."]
206
+ else:
207
+ state["retrieved_info"] = docs
208
+
209
+ judgment.get_current_trace().async_evaluate(
210
+ scorers=[AnswerRelevancyScorer(threshold=0.5)],
211
+ input=query,
212
+ actual_output=state["retrieved_info"],
213
+ model="gpt-4",
214
+ )
215
+ except Exception as e:
216
+ state["retrieved_info"] = [f"Error retrieving news: {e}"]
217
+ return state
218
+
219
+ @judgment.observe(span_type="LLM evaluation")
220
+ def finalize_answer_node(state: ChatState) -> ChatState:
221
+ """
222
+ Evaluate the retrieved information using GPT and determine if it is sufficient.
223
+ If sufficient, return a comprehensive answer. Otherwise, return suggested keywords.
224
+
225
+ Expects a JSON output with keys:
226
+ - "status": "sufficient" or "insufficient"
227
+ - "final_answer": (if sufficient)
228
+ - "keywords": (if insufficient)
229
+ """
230
+ MAX_ATTEMPTS = 2
231
+ query = state["query"]
232
+ retrieved_info = state["retrieved_info"]
233
+
234
+ prompt = (
235
+ "You are a helpful assistant tasked with evaluating retrieved information for an anime query. "
236
+ "Please provide a comprehensive final answer to the query using the information provided. "
237
+ "If the retrieved information is insufficient, please also provide a comma-separated list of keywords "
238
+ "that, if added to the query, would yield better results.\n\n"
239
+ f"User Query: {query}\n\n"
240
+ "Retrieved Information:\n" + "\n".join(retrieved_info) + "\n\n"
241
+ "Return your answer in JSON format with exactly the following keys:\n"
242
+ ' "status": either "sufficient" or "insufficient",\n'
243
+ ' "final_answer": "your comprehensive answer using the retrieved information",\n'
244
+ ' "keywords": "a comma-separated list of suggested keywords if the information is insufficient" (optional).\n'
245
+ "Ensure the JSON is valid and contains only these keys."
246
+ )
247
+
248
+ try:
249
+ response = client.chat.completions.create(
250
+ model="gpt-4",
251
+ messages=[
252
+ {
253
+ "role": "system",
254
+ "content": "You are a helpful assistant that evaluates retrieved information and provides comprehensive answers."
255
+ },
256
+ {"role": "user", "content": prompt}
257
+ ]
258
+ )
259
+ content = response.choices[0].message.content.strip()
260
+
261
+ parsed = json.loads(content)
262
+ status = parsed.get("status", "sufficient").lower()
263
+ final_answer = parsed.get("final_answer", "")
264
+ if status == "insufficient" and state.get("attempt_count", 0) < MAX_ATTEMPTS:
265
+ state["final_answer"] = f"Retrieved information insufficient. Suggested keywords: {parsed.get('keywords', '')}\n"
266
+ state["retry_flag"] = True
267
+ else:
268
+ state["final_answer"] = final_answer
269
+ state["retry_flag"] = False
270
+ except Exception as e:
271
+ state["final_answer"] = f"Error generating final answer: {e}"
272
+ state["retry_flag"] = True
273
+
274
+ judgment.get_current_trace().async_evaluate(
275
+ scorers=[FaithfulnessScorer(threshold=0.5), AnswerRelevancyScorer(threshold=0.5)],
276
+ input=prompt,
277
+ actual_output=content,
278
+ retrieval_context=retrieved_info,
279
+ model="gpt-4",
280
+ )
281
+
282
+ return state
283
+
284
+ # Build the Graph
285
+ graph_builder = StateGraph(ChatState)
286
+ graph_builder.add_node("decision", decision_node)
287
+ graph_builder.add_node("vector", anime_vector_node)
288
+ graph_builder.add_node("jikan", anime_jikan_node)
289
+ graph_builder.add_node("web", anime_web_node)
290
+ graph_builder.add_node("finalize", finalize_answer_node)
291
+
292
+ # Graph edges
293
+ graph_builder.add_edge(START, "decision")
294
+
295
+ def route_from_decision(state: ChatState) -> str:
296
+ return state["next_node"]
297
+
298
+ graph_builder.add_conditional_edges(
299
+ "decision",
300
+ route_from_decision,
301
+ {"vector": "vector", "jikan": "jikan", "web": "web"}
302
+ )
303
+
304
+ def route_from_finalize(state: ChatState) -> str:
305
+ max_attempts = 2
306
+ if state.get("retry_flag", False) and state.get("attempt_count", 0) < max_attempts:
307
+ print("Final answer unsatisfactory. Retrying with updated query...")
308
+ return "decision"
309
+ else:
310
+ return END
311
+
312
+ graph_builder.add_conditional_edges(
313
+ "finalize",
314
+ route_from_finalize,
315
+ {"decision": "decision", END: END}
316
+ )
317
+
318
+ graph_builder.add_edge("vector", "finalize")
319
+ graph_builder.add_edge("jikan", "finalize")
320
+ graph_builder.add_edge("web", "finalize")
321
+ graph_builder.add_edge("finalize", END)
322
+
323
+ memory_saver = MemorySaver()
324
+ graph = graph_builder.compile(checkpointer=memory_saver)
325
+
326
+ def fetch_top_anime(total=350):
327
+ """
328
+ Fetch top anime from the Jikan API.
329
+ """
330
+ base_url = "https://api.jikan.moe/v4/top/anime"
331
+ per_page = 25
332
+ pages = (total + per_page - 1) // per_page
333
+ all_data = []
334
+ for p in range(1, pages + 1):
335
+ try:
336
+ params = {"page": p, "limit": per_page}
337
+ r = requests.get(base_url, params=params, timeout=10)
338
+ r.raise_for_status()
339
+ data = r.json()
340
+ anime_list = data.get("data", [])
341
+ if not anime_list:
342
+ break
343
+ all_data.extend(anime_list)
344
+ except Exception as e:
345
+ # print(f"Error fetching top anime page {p}: {e}")
346
+ pass
347
+ return all_data
348
+
349
+ def populate_vector_db(coll, anime_list):
350
+ """
351
+ Populate the Chroma collection with detailed anime information.
352
+ """
353
+ docs, metas, ids = [], [], []
354
+ seen_ids = set()
355
+ for item in anime_list:
356
+ mal_id = item.get("mal_id")
357
+ if mal_id in seen_ids:
358
+ continue
359
+ seen_ids.add(mal_id)
360
+ title = item.get("title", "")
361
+ synopsis = item.get("synopsis", "")
362
+ type_ = item.get("type", "Unknown")
363
+ episodes = item.get("episodes", "N/A")
364
+ score = item.get("score", "N/A")
365
+ rank = item.get("rank", "N/A")
366
+ popularity = item.get("popularity", "N/A")
367
+
368
+ if rank is None:
369
+ rank = "N/A"
370
+ if episodes is None:
371
+ episodes = "N/A"
372
+
373
+ genres = []
374
+ if "genres" in item and isinstance(item["genres"], list):
375
+ genres = [g.get("name", "") for g in item["genres"] if g.get("name")]
376
+ genres_str = ", ".join(genres) if genres else "N/A"
377
+
378
+ combined = (
379
+ f"Title: {title}\n"
380
+ f"Synopsis: {synopsis}\n"
381
+ f"Type: {type_}\n"
382
+ f"Episodes: {episodes}\n"
383
+ f"Score: {score}\n"
384
+ f"Rank: {rank}\n"
385
+ f"Popularity: {popularity}\n"
386
+ f"Genres: {genres_str}"
387
+ )
388
+ docs.append(combined)
389
+ meta = {
390
+ "title": title,
391
+ "mal_id": mal_id,
392
+ "type": type_,
393
+ "episodes": episodes,
394
+ "score": score,
395
+ "rank": rank,
396
+ "popularity": popularity,
397
+ "genres": genres_str,
398
+ }
399
+ metas.append(meta)
400
+ ids.append(str(mal_id))
401
+ if docs:
402
+ try:
403
+ coll.add(documents=docs, metadatas=metas, ids=ids)
404
+ print(f"Populated {len(docs)} anime items into Chroma collection.")
405
+ except Exception as e:
406
+ print("Error inserting into Chroma:", e)
407
+ else:
408
+ print("No anime records to add.")
409
+
410
+
411
+ @judgment.observe(span_type="Main Function", overwrite=True)
412
+ async def main():
413
+ top_anime = fetch_top_anime(total=350)
414
+ populate_vector_db(collection, top_anime)
415
+
416
+ print("=== Basic LangGraph Anime Chatbot ===")
417
+ print("Type 'quit' or 'exit' to stop.\n")
418
+
419
+ while True:
420
+ user_input = input("You: ")
421
+ if user_input.lower() in ["quit", "exit"]:
422
+ break
423
+
424
+ init_state: ChatState = {
425
+ "query": user_input,
426
+ "refined_query": user_input,
427
+ "retrieved_info": [],
428
+ "final_answer": "",
429
+ "next_node": "",
430
+ "attempt_count": 0,
431
+ "retry_flag": False,
432
+ "node_decision": ""
433
+ }
434
+
435
+ results = graph.invoke(
436
+ init_state,
437
+ config={"configurable": {"thread_id": "my_unique_conversation_id"}}
438
+ )
439
+ final_answer = results["final_answer"]
440
+ print("Assistant:", final_answer, "\n")
441
+
442
+ if __name__ == "__main__":
443
+ asyncio.run(main())
@@ -50,7 +50,7 @@ async def get_flights(destination):
50
50
  judgment.get_current_trace().async_evaluate(
51
51
  scorers=[AnswerRelevancyScorer(threshold=0.5)],
52
52
  input=prompt,
53
- actual_output=flights_search,
53
+ actual_output=flights_search["results"],
54
54
  model="gpt-4",
55
55
  )
56
56
  return flights_search
@@ -63,7 +63,7 @@ async def get_weather(destination, start_date, end_date):
63
63
  judgment.get_current_trace().async_evaluate(
64
64
  scorers=[AnswerRelevancyScorer(threshold=0.5)],
65
65
  input=prompt,
66
- actual_output=weather_search,
66
+ actual_output=weather_search["results"],
67
67
  model="gpt-4",
68
68
  )
69
69
  return weather_search
@@ -0,0 +1,132 @@
1
+ import os
2
+ import asyncio
3
+ from typing import Dict, List
4
+ from openai import OpenAI
5
+ from uuid import uuid4
6
+ from dotenv import load_dotenv
7
+
8
+ from judgeval.tracer import Tracer, wrap
9
+ from judgeval.scorers import AnswerRelevancyScorer, FaithfulnessScorer, AnswerCorrectnessScorer
10
+ from judgeval.rules import Rule, Condition, Operator
11
+
12
+ # Initialize clients
13
+ load_dotenv()
14
+ rules = [
15
+ Rule(
16
+ name="All Conditions Check",
17
+ description="Check if all conditions are met",
18
+ conditions=[
19
+ # Use scorer objects instead of strings
20
+ Condition(metric=FaithfulnessScorer(threshold=0.7), operator=Operator.GTE, threshold=0.7),
21
+ Condition(metric=AnswerRelevancyScorer(threshold=0.8), operator=Operator.GTE, threshold=0.8),
22
+ Condition(metric=AnswerCorrectnessScorer(threshold=0.9), operator=Operator.GTE, threshold=0.9)
23
+ ],
24
+ combine_type="all" # Require all conditions to trigger
25
+ ),
26
+ Rule(
27
+ name="Any Condition Check",
28
+ description="Check if any condition is met",
29
+ conditions=[
30
+ Condition(metric=FaithfulnessScorer(threshold=0.7), operator=Operator.GTE, threshold=0.7),
31
+ Condition(metric=AnswerRelevancyScorer(threshold=0.8), operator=Operator.GTE, threshold=0.8),
32
+ Condition(metric=AnswerCorrectnessScorer(threshold=0.9), operator=Operator.GTE, threshold=0.9)
33
+ ],
34
+ combine_type="any" # Require any condition to trigger
35
+ )
36
+ # Removed rules that used SimpleKeywordScorer
37
+ ]
38
+
39
+ judgment = Tracer(api_key=os.getenv("JUDGMENT_API_KEY"), project_name="restaurant_bot", rules=rules)
40
+ client = wrap(OpenAI())
41
+
42
+ @judgment.observe(span_type="Research")
43
+ async def search_restaurants(cuisine: str, location: str = "nearby") -> List[Dict]:
44
+ """Search for restaurants matching the cuisine type."""
45
+ # Simulate API call to restaurant database
46
+ prompt = f"Find 3 popular {cuisine} restaurants {location}. Return ONLY a JSON array of objects with 'name', 'rating', and 'price_range' fields. No other text."
47
+
48
+ response = client.chat.completions.create(
49
+ model="gpt-4",
50
+ messages=[
51
+ {"role": "system", "content": """You are a restaurant search expert.
52
+ Return ONLY valid JSON arrays containing restaurant objects.
53
+ Example format: [{"name": "Restaurant Name", "rating": 4.5, "price_range": "$$"}]
54
+ Do not include any other text or explanations."""},
55
+ {"role": "user", "content": prompt}
56
+ ]
57
+ )
58
+
59
+ try:
60
+ import json
61
+ return json.loads(response.choices[0].message.content)
62
+ except json.JSONDecodeError as e:
63
+ print(f"Error parsing JSON response: {response.choices[0].message.content}")
64
+ return [{"name": "Error fetching restaurants", "rating": 0, "price_range": "N/A"}]
65
+
66
+ @judgment.observe(span_type="Research")
67
+ async def get_menu_highlights(restaurant_name: str) -> List[str]:
68
+ """Get popular menu items for a restaurant."""
69
+ prompt = f"What are 3 must-try dishes at {restaurant_name}?"
70
+
71
+ response = client.chat.completions.create(
72
+ model="gpt-4",
73
+ messages=[
74
+ {"role": "system", "content": "You are a food critic. List only the dish names."},
75
+ {"role": "user", "content": prompt}
76
+ ]
77
+ )
78
+
79
+ judgment.get_current_trace().async_evaluate(
80
+ scorers=[AnswerRelevancyScorer(threshold=0.5)],
81
+ input=prompt,
82
+ actual_output=response.choices[0].message.content,
83
+ model="gpt-4",
84
+ )
85
+
86
+ return response.choices[0].message.content.split("\n")
87
+
88
+ @judgment.observe(span_type="function")
89
+ async def generate_recommendation(cuisine: str, restaurants: List[Dict], menu_items: Dict[str, List[str]]) -> str:
90
+ """Generate a natural language recommendation."""
91
+ context = f"""
92
+ Cuisine: {cuisine}
93
+ Restaurants: {restaurants}
94
+ Popular Items: {menu_items}
95
+ """
96
+
97
+ response = client.chat.completions.create(
98
+ model="gpt-4",
99
+ messages=[
100
+ {"role": "system", "content": "You are a helpful food recommendation bot. Provide a natural recommendation based on the data."},
101
+ {"role": "user", "content": context}
102
+ ]
103
+ )
104
+ return response.choices[0].message.content
105
+
106
+ @judgment.observe(span_type="Research")
107
+ async def get_food_recommendations(cuisine: str) -> str:
108
+ """Main function to get restaurant recommendations."""
109
+ # Search for restaurants
110
+ restaurants = await search_restaurants(cuisine)
111
+
112
+ # Get menu highlights for each restaurant
113
+ menu_items = {}
114
+ for restaurant in restaurants:
115
+ menu_items[restaurant['name']] = await get_menu_highlights(restaurant['name'])
116
+
117
+ # Generate final recommendation
118
+ recommendation = await generate_recommendation(cuisine, restaurants, menu_items)
119
+ judgment.get_current_trace().async_evaluate(
120
+ scorers=[AnswerRelevancyScorer(threshold=0.5), FaithfulnessScorer(threshold=1.0)],
121
+ input=f"Create a recommendation for a restaurant and dishes based on the desired cuisine: {cuisine}",
122
+ actual_output=recommendation,
123
+ retrieval_context=[str(restaurants), str(menu_items)],
124
+ model="gpt-4",
125
+ )
126
+ return recommendation
127
+
128
+ if __name__ == "__main__":
129
+ cuisine = input("What kind of food would you like to eat? ")
130
+ recommendation = asyncio.run(get_food_recommendations(cuisine))
131
+ print("\nHere are my recommendations:\n")
132
+ print(recommendation)