judgeval 0.0.28__tar.gz → 0.0.30__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. {judgeval-0.0.28 → judgeval-0.0.30}/PKG-INFO +1 -1
  2. {judgeval-0.0.28 → judgeval-0.0.30}/pyproject.toml +1 -1
  3. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/common/tracer.py +1 -1
  4. judgeval-0.0.30/src/test.py +143 -0
  5. judgeval-0.0.28/src/test.py +0 -21
  6. {judgeval-0.0.28 → judgeval-0.0.30}/.github/workflows/ci.yaml +0 -0
  7. {judgeval-0.0.28 → judgeval-0.0.30}/.gitignore +0 -0
  8. {judgeval-0.0.28 → judgeval-0.0.30}/LICENSE.md +0 -0
  9. {judgeval-0.0.28 → judgeval-0.0.30}/Pipfile +0 -0
  10. {judgeval-0.0.28 → judgeval-0.0.30}/Pipfile.lock +0 -0
  11. {judgeval-0.0.28 → judgeval-0.0.30}/README.md +0 -0
  12. {judgeval-0.0.28 → judgeval-0.0.30}/docs/README.md +0 -0
  13. {judgeval-0.0.28 → judgeval-0.0.30}/docs/alerts/notifications.mdx +0 -0
  14. {judgeval-0.0.28 → judgeval-0.0.30}/docs/alerts/platform_notifications.mdx +0 -0
  15. {judgeval-0.0.28 → judgeval-0.0.30}/docs/alerts/rules.mdx +0 -0
  16. {judgeval-0.0.28 → judgeval-0.0.30}/docs/api_reference/judgment_client.mdx +0 -0
  17. {judgeval-0.0.28 → judgeval-0.0.30}/docs/api_reference/trace.mdx +0 -0
  18. {judgeval-0.0.28 → judgeval-0.0.30}/docs/development.mdx +0 -0
  19. {judgeval-0.0.28 → judgeval-0.0.30}/docs/essentials/code.mdx +0 -0
  20. {judgeval-0.0.28 → judgeval-0.0.30}/docs/essentials/images.mdx +0 -0
  21. {judgeval-0.0.28 → judgeval-0.0.30}/docs/essentials/markdown.mdx +0 -0
  22. {judgeval-0.0.28 → judgeval-0.0.30}/docs/essentials/navigation.mdx +0 -0
  23. {judgeval-0.0.28 → judgeval-0.0.30}/docs/essentials/reusable-snippets.mdx +0 -0
  24. {judgeval-0.0.28 → judgeval-0.0.30}/docs/essentials/settings.mdx +0 -0
  25. {judgeval-0.0.28 → judgeval-0.0.30}/docs/evaluation/data_datasets.mdx +0 -0
  26. {judgeval-0.0.28 → judgeval-0.0.30}/docs/evaluation/data_examples.mdx +0 -0
  27. {judgeval-0.0.28 → judgeval-0.0.30}/docs/evaluation/introduction.mdx +0 -0
  28. {judgeval-0.0.28 → judgeval-0.0.30}/docs/evaluation/judges.mdx +0 -0
  29. {judgeval-0.0.28 → judgeval-0.0.30}/docs/evaluation/scorers/answer_correctness.mdx +0 -0
  30. {judgeval-0.0.28 → judgeval-0.0.30}/docs/evaluation/scorers/answer_relevancy.mdx +0 -0
  31. {judgeval-0.0.28 → judgeval-0.0.30}/docs/evaluation/scorers/classifier_scorer.mdx +0 -0
  32. {judgeval-0.0.28 → judgeval-0.0.30}/docs/evaluation/scorers/comparison.mdx +0 -0
  33. {judgeval-0.0.28 → judgeval-0.0.30}/docs/evaluation/scorers/contextual_precision.mdx +0 -0
  34. {judgeval-0.0.28 → judgeval-0.0.30}/docs/evaluation/scorers/contextual_recall.mdx +0 -0
  35. {judgeval-0.0.28 → judgeval-0.0.30}/docs/evaluation/scorers/contextual_relevancy.mdx +0 -0
  36. {judgeval-0.0.28 → judgeval-0.0.30}/docs/evaluation/scorers/custom_scorers.mdx +0 -0
  37. {judgeval-0.0.28 → judgeval-0.0.30}/docs/evaluation/scorers/execution_order.mdx +0 -0
  38. {judgeval-0.0.28 → judgeval-0.0.30}/docs/evaluation/scorers/faithfulness.mdx +0 -0
  39. {judgeval-0.0.28 → judgeval-0.0.30}/docs/evaluation/scorers/groundedness.mdx +0 -0
  40. {judgeval-0.0.28 → judgeval-0.0.30}/docs/evaluation/scorers/hallucination.mdx +0 -0
  41. {judgeval-0.0.28 → judgeval-0.0.30}/docs/evaluation/scorers/introduction.mdx +0 -0
  42. {judgeval-0.0.28 → judgeval-0.0.30}/docs/evaluation/scorers/json_correctness.mdx +0 -0
  43. {judgeval-0.0.28 → judgeval-0.0.30}/docs/evaluation/scorers/summarization.mdx +0 -0
  44. {judgeval-0.0.28 → judgeval-0.0.30}/docs/evaluation/unit_testing.mdx +0 -0
  45. {judgeval-0.0.28 → judgeval-0.0.30}/docs/favicon.svg +0 -0
  46. {judgeval-0.0.28 → judgeval-0.0.30}/docs/getting_started.mdx +0 -0
  47. {judgeval-0.0.28 → judgeval-0.0.30}/docs/images/basic_trace_example.png +0 -0
  48. {judgeval-0.0.28 → judgeval-0.0.30}/docs/images/checks-passed.png +0 -0
  49. {judgeval-0.0.28 → judgeval-0.0.30}/docs/images/create_aggressive_scorer.png +0 -0
  50. {judgeval-0.0.28 → judgeval-0.0.30}/docs/images/create_scorer.png +0 -0
  51. {judgeval-0.0.28 → judgeval-0.0.30}/docs/images/evaluation_diagram.png +0 -0
  52. {judgeval-0.0.28 → judgeval-0.0.30}/docs/images/hero-dark.svg +0 -0
  53. {judgeval-0.0.28 → judgeval-0.0.30}/docs/images/hero-light.svg +0 -0
  54. {judgeval-0.0.28 → judgeval-0.0.30}/docs/images/notifications_page.png +0 -0
  55. {judgeval-0.0.28 → judgeval-0.0.30}/docs/images/online_eval_fault.png +0 -0
  56. {judgeval-0.0.28 → judgeval-0.0.30}/docs/images/reports_modal.png +0 -0
  57. {judgeval-0.0.28 → judgeval-0.0.30}/docs/images/trace_ss.png +0 -0
  58. {judgeval-0.0.28 → judgeval-0.0.30}/docs/integration/langgraph.mdx +0 -0
  59. {judgeval-0.0.28 → judgeval-0.0.30}/docs/introduction.mdx +0 -0
  60. {judgeval-0.0.28 → judgeval-0.0.30}/docs/judgment/introduction.mdx +0 -0
  61. {judgeval-0.0.28 → judgeval-0.0.30}/docs/logo/dark.svg +0 -0
  62. {judgeval-0.0.28 → judgeval-0.0.30}/docs/logo/light.svg +0 -0
  63. {judgeval-0.0.28 → judgeval-0.0.30}/docs/mint.json +0 -0
  64. {judgeval-0.0.28 → judgeval-0.0.30}/docs/monitoring/introduction.mdx +0 -0
  65. {judgeval-0.0.28 → judgeval-0.0.30}/docs/monitoring/production_insights.mdx +0 -0
  66. {judgeval-0.0.28 → judgeval-0.0.30}/docs/monitoring/tracing.mdx +0 -0
  67. {judgeval-0.0.28 → judgeval-0.0.30}/docs/notebooks/create_dataset.ipynb +0 -0
  68. {judgeval-0.0.28 → judgeval-0.0.30}/docs/notebooks/create_scorer.ipynb +0 -0
  69. {judgeval-0.0.28 → judgeval-0.0.30}/docs/notebooks/demo.ipynb +0 -0
  70. {judgeval-0.0.28 → judgeval-0.0.30}/docs/notebooks/prompt_scorer.ipynb +0 -0
  71. {judgeval-0.0.28 → judgeval-0.0.30}/docs/notebooks/quickstart.ipynb +0 -0
  72. {judgeval-0.0.28 → judgeval-0.0.30}/docs/quickstart.mdx +0 -0
  73. {judgeval-0.0.28 → judgeval-0.0.30}/docs/snippets/snippet-intro.mdx +0 -0
  74. {judgeval-0.0.28 → judgeval-0.0.30}/pytest.ini +0 -0
  75. {judgeval-0.0.28 → judgeval-0.0.30}/src/demo/cookbooks/JNPR_Mist/test.py +0 -0
  76. {judgeval-0.0.28 → judgeval-0.0.30}/src/demo/cookbooks/linkd/text2sql.py +0 -0
  77. {judgeval-0.0.28 → judgeval-0.0.30}/src/demo/custom_example_demo/osiris_test.py +0 -0
  78. {judgeval-0.0.28 → judgeval-0.0.30}/src/demo/custom_example_demo/qodo_scorer.py +0 -0
  79. {judgeval-0.0.28 → judgeval-0.0.30}/src/demo/demo.py +0 -0
  80. {judgeval-0.0.28 → judgeval-0.0.30}/src/demo/new_trace/example_complex_async.py +0 -0
  81. {judgeval-0.0.28 → judgeval-0.0.30}/src/demo/travel_agent.py +0 -0
  82. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/__init__.py +0 -0
  83. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/clients.py +0 -0
  84. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/common/__init__.py +0 -0
  85. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/common/exceptions.py +0 -0
  86. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/common/logger.py +0 -0
  87. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/common/utils.py +0 -0
  88. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/constants.py +0 -0
  89. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/data/__init__.py +0 -0
  90. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/data/custom_api_example.py +0 -0
  91. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/data/datasets/__init__.py +0 -0
  92. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/data/datasets/dataset.py +0 -0
  93. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/data/datasets/eval_dataset_client.py +0 -0
  94. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/data/example.py +0 -0
  95. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/data/result.py +0 -0
  96. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/data/scorer_data.py +0 -0
  97. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/evaluation_run.py +0 -0
  98. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/integrations/langgraph.py +0 -0
  99. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/judges/__init__.py +0 -0
  100. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/judges/base_judge.py +0 -0
  101. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/judges/litellm_judge.py +0 -0
  102. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/judges/mixture_of_judges.py +0 -0
  103. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/judges/together_judge.py +0 -0
  104. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/judges/utils.py +0 -0
  105. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/judgment_client.py +0 -0
  106. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/rules.py +0 -0
  107. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/run_evaluation.py +0 -0
  108. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/__init__.py +0 -0
  109. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/api_scorer.py +0 -0
  110. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/base_scorer.py +0 -0
  111. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/exceptions.py +0 -0
  112. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorer.py +0 -0
  113. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/__init__.py +0 -0
  114. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -0
  115. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +0 -0
  116. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +0 -0
  117. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/api_scorers/comparison.py +0 -0
  118. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py +0 -0
  119. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py +0 -0
  120. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py +0 -0
  121. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -0
  122. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +0 -0
  123. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py +0 -0
  124. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -0
  125. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +0 -0
  126. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py +0 -0
  127. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/api_scorers/summarization.py +0 -0
  128. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/classifiers/__init__.py +0 -0
  129. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py +0 -0
  130. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py +0 -0
  131. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/__init__.py +0 -0
  132. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/__init__.py +0 -0
  133. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/answer_correctness_scorer.py +0 -0
  134. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/prompts.py +0 -0
  135. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/__init__.py +0 -0
  136. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/answer_relevancy_scorer.py +0 -0
  137. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/prompts.py +0 -0
  138. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/comparison/__init__.py +0 -0
  139. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/comparison/comparison_scorer.py +0 -0
  140. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/comparison/prompts.py +0 -0
  141. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/__init__.py +0 -0
  142. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/contextual_precision_scorer.py +0 -0
  143. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/prompts.py +0 -0
  144. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/__init__.py +0 -0
  145. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/contextual_recall_scorer.py +0 -0
  146. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/prompts.py +0 -0
  147. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/__init__.py +0 -0
  148. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/contextual_relevancy_scorer.py +0 -0
  149. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/prompts.py +0 -0
  150. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/execution_order/__init__.py +0 -0
  151. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/execution_order/execution_order.py +0 -0
  152. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/__init__.py +0 -0
  153. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/faithfulness_scorer.py +0 -0
  154. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/prompts.py +0 -0
  155. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/hallucination/__init__.py +0 -0
  156. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/hallucination/hallucination_scorer.py +0 -0
  157. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/hallucination/prompts.py +0 -0
  158. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/instruction_adherence/instruction_adherence.py +0 -0
  159. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/instruction_adherence/prompt.py +0 -0
  160. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/__init__.py +0 -0
  161. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/json_correctness_scorer.py +0 -0
  162. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/summarization/__init__.py +0 -0
  163. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py +0 -0
  164. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py +0 -0
  165. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/prompt_scorer.py +0 -0
  166. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/score.py +0 -0
  167. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/scorers/utils.py +0 -0
  168. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/tracer/__init__.py +0 -0
  169. {judgeval-0.0.28 → judgeval-0.0.30}/src/judgeval/utils/alerts.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: judgeval
3
- Version: 0.0.28
3
+ Version: 0.0.30
4
4
  Summary: Judgeval Package
5
5
  Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
6
6
  Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "judgeval"
3
- version = "0.0.28"
3
+ version = "0.0.30"
4
4
  authors = [
5
5
  { name="Andrew Li", email="andrew@judgmentlabs.ai" },
6
6
  { name="Alex Shan", email="alex@judgmentlabs.ai" },
@@ -73,7 +73,7 @@ class TraceEntry:
73
73
  span_id: str # Unique ID for this specific span instance
74
74
  depth: int # Indentation level for nested calls
75
75
  message: str # Human-readable description
76
- # created_at: Unix timestamp when entry was created, replacing the deprecated 'timestamp' field
76
+ created_at: float # Unix timestamp when entry was created, replacing the deprecated 'timestamp' field
77
77
  duration: Optional[float] = None # Time taken (for exit/evaluation entries)
78
78
  trace_id: str = None # ID of the trace this entry belongs to
79
79
  output: Any = None # Function output value
@@ -0,0 +1,143 @@
1
+ import os
2
+ import asyncio
3
+ from openai import OpenAI
4
+ from dotenv import load_dotenv
5
+ from judgeval.common.tracer import Tracer, wrap
6
+ from judgeval.scorers import AnswerRelevancyScorer, FaithfulnessScorer, GroundednessScorer
7
+
8
+ # Load environment variables
9
+ load_dotenv()
10
+
11
+ # Initialize OpenAI client and Judgment tracer
12
+ client = wrap(OpenAI())
13
+ judgment = Tracer(project_name="music-bot-demo")
14
+
15
+ @judgment.observe(span_type="tool")
16
+ async def search_tavily(query):
17
+ """Search for information using Tavily."""
18
+ from tavily import TavilyClient
19
+
20
+ tavily_client = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
21
+ search_result = tavily_client.search(
22
+ query=query,
23
+ search_depth="advanced",
24
+ max_results=5
25
+ )
26
+
27
+ return search_result
28
+
29
+ @judgment.observe(span_type="function")
30
+ async def ask_user_preferences():
31
+ """Ask the user a series of questions about their music preferences."""
32
+ questions = [
33
+ "What are some of your favorite artists or bands?",
34
+ "What genres of music do you enjoy the most?",
35
+ "Do you have any favorite songs currently?",
36
+ "Are there any moods or themes you're looking for in new music?",
37
+ "Do you prefer newer releases or classic songs?"
38
+ ]
39
+
40
+ preferences = {}
41
+ for question in questions:
42
+ print(f"\n{question}")
43
+ answer = input("> ")
44
+ preferences[question] = answer
45
+
46
+ return preferences
47
+
48
+ @judgment.observe(span_type="function")
49
+ async def search_music_recommendations(preferences):
50
+ """Search for music recommendations based on user preferences."""
51
+ # Construct search queries based on preferences
52
+ search_results = {}
53
+
54
+ # Search for artist recommendations
55
+ if preferences.get("What are some of your favorite artists or bands?"):
56
+ artists_query = f"Music similar to {preferences['What are some of your favorite artists or bands?']}"
57
+ search_results["artist_based"] = await search_tavily(artists_query)
58
+
59
+ # Search for genre recommendations
60
+ if preferences.get("What genres of music do you enjoy the most?"):
61
+ genre_query = f"Best {preferences['What genres of music do you enjoy the most?']} songs"
62
+ search_results["genre_based"] = await search_tavily(genre_query)
63
+
64
+ # Search for mood-based recommendations
65
+ if preferences.get("Are there any moods or themes you're looking for in new music?"):
66
+ mood_query = f"""{preferences["Are there any moods or themes you're looking for in new music?"]} music recommendations"""
67
+ search_results["mood_based"] = await search_tavily(mood_query)
68
+
69
+ return search_results
70
+
71
+ @judgment.observe(span_type="function")
72
+ async def generate_recommendations(preferences, search_results):
73
+ """Generate personalized music recommendations using the search results."""
74
+ # Prepare context from search results
75
+ context = ""
76
+ for category, results in search_results.items():
77
+ context += f"\n{category.replace('_', ' ').title()} Search Results:\n"
78
+ for result in results.get("results", []):
79
+ context += f"- {result.get('title')}: {result.get('content')[:200]}...\n"
80
+
81
+ # Create a prompt for the LLM
82
+ prompt = f"""
83
+ Suggest 5-7 songs they could enjoy. Be creative and suggest whatever feels right. You should only recommend songs that are from the user's favorite artists/bands.
84
+ For each song, include the artist name, song title, and a brief explanation of why they might like it.
85
+
86
+ User Preferences:
87
+ {preferences}
88
+
89
+ Search Results:
90
+ {context}
91
+
92
+ Provide recommendations in a clear, organized format. Focus on specific songs rather than just artists.
93
+ """
94
+
95
+
96
+ # Generate recommendations using OpenAI
97
+ response = client.chat.completions.create(
98
+ model="gpt-4o-mini",
99
+ messages=[
100
+ {"role": "system", "content": "You are a music recommendation expert with deep knowledge of various genres, artists, and songs. Your goal is to suggest songs that match the user's preferences; recommend songs from their favorite artists/bands."},
101
+ {"role": "user", "content": prompt}
102
+ ]
103
+ )
104
+
105
+ recommendations = response.choices[0].message.content
106
+
107
+ # Evaluate the recommendations
108
+ judgment.get_current_trace().async_evaluate(
109
+ scorers=[
110
+ AnswerRelevancyScorer(threshold=1.0),
111
+ GroundednessScorer(threshold=1.0)
112
+ ],
113
+ input=prompt,
114
+ actual_output=recommendations,
115
+ retrieval_context=[str(search_results)],
116
+ model="gpt-4o"
117
+ )
118
+
119
+ return recommendations
120
+
121
+ @judgment.observe(span_type="Main Function")
122
+ async def music_recommendation_bot():
123
+ """Main function to run the music recommendation bot."""
124
+ print("🎵 Welcome to the Music Recommendation Bot! 🎵")
125
+ print("I'll ask you a few questions to understand your music taste, then suggest some songs you might enjoy.")
126
+
127
+ # Get user preferences
128
+ preferences = await ask_user_preferences()
129
+
130
+ print("\nSearching for music recommendations based on your preferences...")
131
+ search_results = await search_music_recommendations(preferences)
132
+
133
+ print("\nGenerating personalized recommendations...")
134
+ recommendations = await generate_recommendations(preferences, search_results)
135
+
136
+ print("\n🎧 Your Personalized Music Recommendations 🎧")
137
+ print(recommendations)
138
+
139
+ return recommendations
140
+
141
+ if __name__ == "__main__":
142
+ asyncio.run(music_recommendation_bot())
143
+
@@ -1,21 +0,0 @@
1
- from judgeval import JudgmentClient
2
- from judgeval.data import Example
3
- from judgeval.scorers import FaithfulnessScorer
4
-
5
- client = JudgmentClient()
6
-
7
- example = Example(
8
- input="What if these shoes don't fit?",
9
- actual_output="We offer a 30-day full refund at no extra cost.",
10
- retrieval_context=["All customers are eligible for a 30 day full refund at no extra cost."],
11
- )
12
-
13
- scorer = FaithfulnessScorer(threshold=0.5)
14
- results = client.run_evaluation(
15
- examples=[example],
16
- scorers=[scorer],
17
- model="gpt-4o",
18
- project_name="fdsafdsafdstest",
19
- eval_run_name="fdsafadstest",
20
- )
21
- print(results)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes