ragaai-catalyst 2.0.7.2b0__py3-none-any.whl → 2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. ragaai_catalyst/dataset.py +0 -3
  2. ragaai_catalyst/evaluation.py +1 -2
  3. ragaai_catalyst/tracers/__init__.py +1 -1
  4. ragaai_catalyst/tracers/agentic_tracing/agent_tracer.py +231 -74
  5. ragaai_catalyst/tracers/agentic_tracing/agentic_tracing.py +32 -42
  6. ragaai_catalyst/tracers/agentic_tracing/base.py +132 -30
  7. ragaai_catalyst/tracers/agentic_tracing/data_structure.py +91 -79
  8. ragaai_catalyst/tracers/agentic_tracing/examples/FinancialAnalysisSystem.ipynb +536 -0
  9. ragaai_catalyst/tracers/agentic_tracing/examples/GameActivityEventPlanner.ipynb +134 -0
  10. ragaai_catalyst/tracers/agentic_tracing/examples/TravelPlanner.ipynb +563 -0
  11. ragaai_catalyst/tracers/agentic_tracing/file_name_tracker.py +46 -0
  12. ragaai_catalyst/tracers/agentic_tracing/llm_tracer.py +262 -356
  13. ragaai_catalyst/tracers/agentic_tracing/tool_tracer.py +31 -19
  14. ragaai_catalyst/tracers/agentic_tracing/unique_decorator.py +61 -117
  15. ragaai_catalyst/tracers/agentic_tracing/upload_agentic_traces.py +187 -0
  16. ragaai_catalyst/tracers/agentic_tracing/upload_code.py +115 -0
  17. ragaai_catalyst/tracers/agentic_tracing/user_interaction_tracer.py +35 -59
  18. ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +0 -4
  19. ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json +2201 -324
  20. ragaai_catalyst/tracers/agentic_tracing/zip_list_of_unique_files.py +186 -0
  21. ragaai_catalyst/tracers/exporters/raga_exporter.py +1 -7
  22. ragaai_catalyst/tracers/tracer.py +6 -2
  23. {ragaai_catalyst-2.0.7.2b0.dist-info → ragaai_catalyst-2.1.dist-info}/METADATA +8 -4
  24. {ragaai_catalyst-2.0.7.2b0.dist-info → ragaai_catalyst-2.1.dist-info}/RECORD +26 -20
  25. {ragaai_catalyst-2.0.7.2b0.dist-info → ragaai_catalyst-2.1.dist-info}/WHEEL +1 -1
  26. ragaai_catalyst/tracers/agentic_tracing/Untitled-1.json +0 -660
  27. {ragaai_catalyst-2.0.7.2b0.dist-info → ragaai_catalyst-2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,536 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# Financial Analysis System with AgentNeo Integration\n",
8
+ "\n",
9
+ "This Jupyter notebook demonstrates the integration of AgentNeo, a powerful tracing and monitoring tool, with a financial analysis system. AgentNeo provides seamless tracing capabilities for both function calls and AI model interactions, allowing for comprehensive analysis and debugging of complex systems."
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "markdown",
14
+ "metadata": {},
15
+ "source": [
16
+ "## Setup and Imports\n",
17
+ "\n",
18
+ "First, let's import the necessary libraries and set up our environment."
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "code",
23
+ "execution_count": 1,
24
+ "metadata": {},
25
+ "outputs": [
26
+ {
27
+ "name": "stderr",
28
+ "output_type": "stream",
29
+ "text": [
30
+ "INFO:httpx:HTTP Request: GET https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json \"HTTP/1.1 200 OK\"\n"
31
+ ]
32
+ }
33
+ ],
34
+ "source": [
35
+ "import os\n",
36
+ "os.chdir('..')\n",
37
+ "\n",
38
+ "import random\n",
39
+ "from textblob import TextBlob\n",
40
+ "import openai\n",
41
+ "from dotenv import load_dotenv\n",
42
+ "from agentneo import AgentNeo, Tracer, Evaluation\n",
43
+ "\n",
44
+ "# Load environment variables\n",
45
+ "load_dotenv(\"YOUR_ENV_FILE\")\n",
46
+ "\n",
47
+ "# Initialize OpenAI API\n",
48
+ "openai.api_key = os.getenv(\"OPENAI_API_KEY\")"
49
+ ]
50
+ },
51
+ {
52
+ "cell_type": "markdown",
53
+ "metadata": {},
54
+ "source": [
55
+ "## FinancialAnalysisSystem Class\n",
56
+ "\n",
57
+ "Now, let's define our `FinancialAnalysisSystem` class with AgentNeo integration."
58
+ ]
59
+ },
60
+ {
61
+ "cell_type": "code",
62
+ "execution_count": 2,
63
+ "metadata": {},
64
+ "outputs": [
65
+ {
66
+ "name": "stdout",
67
+ "output_type": "stream",
68
+ "text": [
69
+ "Project 'financial_analysis_project2' found.\n",
70
+ "Tracing Started.\n"
71
+ ]
72
+ }
73
+ ],
74
+ "source": [
75
+ "# Initialize AgentNeo session\n",
76
+ "neo_session = AgentNeo(session_name=\"financial_analysis_session2\")\n",
77
+ "\n",
78
+ "# Create project\n",
79
+ "neo_session.create_project(project_name=\"financial_analysis_project2\")\n",
80
+ "\n",
81
+ "# Start tracing\n",
82
+ "tracer = Tracer(session=neo_session)\n",
83
+ "tracer.start()"
84
+ ]
85
+ },
86
+ {
87
+ "cell_type": "code",
88
+ "execution_count": 3,
89
+ "metadata": {},
90
+ "outputs": [],
91
+ "source": [
92
+ "class FinancialAnalysisSystem:\n",
93
+ " def __init__(self):\n",
94
+ " self.stock_data = {}\n",
95
+ " self.news_sentiment = {}\n",
96
+ " self.economic_indicators = {}\n",
97
+ "\n",
98
+ " @tracer.trace_tool(name=\"fetch_stock_data\")\n",
99
+ " def fetch_stock_data(self, symbol):\n",
100
+ " return {\n",
101
+ " \"symbol\": symbol,\n",
102
+ " \"price\": round(random.uniform(50, 500), 2),\n",
103
+ " \"change\": round(random.uniform(-5, 5), 2),\n",
104
+ " }\n",
105
+ "\n",
106
+ " @tracer.trace_tool(name=\"fetch_news_articles\")\n",
107
+ " def fetch_news_articles(self, company):\n",
108
+ " return [\n",
109
+ " f\"{company} announces new product line\",\n",
110
+ " f\"{company} reports quarterly earnings\",\n",
111
+ " f\"{company} faces regulatory scrutiny\",\n",
112
+ " ]\n",
113
+ "\n",
114
+ " @tracer.trace_tool(name=\"analyze_sentiment\")\n",
115
+ " def analyze_sentiment(self, text):\n",
116
+ " return TextBlob(text).sentiment.polarity\n",
117
+ "\n",
118
+ " @tracer.trace_tool(name=\"fetch_economic_indicators\")\n",
119
+ " def fetch_economic_indicators(self):\n",
120
+ " return {\n",
121
+ " \"gdp_growth\": round(random.uniform(-2, 5), 2),\n",
122
+ " \"unemployment_rate\": round(random.uniform(3, 10), 2),\n",
123
+ " \"inflation_rate\": round(random.uniform(0, 5), 2),\n",
124
+ " }\n",
125
+ "\n",
126
+ " @tracer.trace_llm(name=\"analyze_market_conditions\")\n",
127
+ " def analyze_market_conditions(self, stock_data, sentiment, economic_indicators):\n",
128
+ " prompt = f\"\"\"\n",
129
+ " Analyze the following market conditions and provide a brief market outlook:\n",
130
+ " Stock: {stock_data['symbol']} at ${stock_data['price']} (change: {stock_data['change']}%)\n",
131
+ " News Sentiment: {sentiment}\n",
132
+ " Economic Indicators:\n",
133
+ " - GDP Growth: {economic_indicators['gdp_growth']}%\n",
134
+ " - Unemployment Rate: {economic_indicators['unemployment_rate']}%\n",
135
+ " - Inflation Rate: {economic_indicators['inflation_rate']}%\n",
136
+ " \"\"\"\n",
137
+ " response = openai.chat.completions.create(\n",
138
+ " model=\"gpt-4-0125-preview\",\n",
139
+ " messages=[{\"role\": \"user\", \"content\": prompt}],\n",
140
+ " max_tokens=150,\n",
141
+ " )\n",
142
+ " return response.choices[0].message.content.strip()\n",
143
+ "\n",
144
+ " @tracer.trace_llm(name=\"generate_investment_recommendation\")\n",
145
+ " def generate_investment_recommendation(self, market_outlook, risk_tolerance):\n",
146
+ " prompt = f\"\"\"\n",
147
+ " Based on the following market outlook and investor risk tolerance,\n",
148
+ " provide a specific investment recommendation:\n",
149
+ " Market Outlook: {market_outlook}\n",
150
+ " Investor Risk Tolerance: {risk_tolerance}\n",
151
+ " \"\"\"\n",
152
+ " response = openai.chat.completions.create(\n",
153
+ " model=\"gpt-4-0125-preview\",\n",
154
+ " messages=[{\"role\": \"user\", \"content\": prompt}],\n",
155
+ " max_tokens=200,\n",
156
+ " )\n",
157
+ " return response.choices[0].message.content.strip()\n",
158
+ "\n",
159
+ " @tracer.trace_agent(name=\"FinancialAdvisorAgent\")\n",
160
+ " def financial_advisor_agent(self, stock_symbol, risk_tolerance):\n",
161
+ " self.stock_data = self.fetch_stock_data(stock_symbol)\n",
162
+ " news_articles = self.fetch_news_articles(stock_symbol)\n",
163
+ " sentiment_scores = [self.analyze_sentiment(article) for article in news_articles]\n",
164
+ " self.news_sentiment = sum(sentiment_scores) / len(sentiment_scores)\n",
165
+ " self.economic_indicators = self.fetch_economic_indicators()\n",
166
+ " market_outlook = self.analyze_market_conditions(\n",
167
+ " self.stock_data, self.news_sentiment, self.economic_indicators\n",
168
+ " )\n",
169
+ " recommendation = self.generate_investment_recommendation(market_outlook, risk_tolerance)\n",
170
+ " return recommendation\n",
171
+ "\n",
172
+ " def run_analysis(self, stock_symbol, risk_tolerance):\n",
173
+ " recommendation = self.financial_advisor_agent(stock_symbol, risk_tolerance)\n",
174
+ " print(f\"\\nAnalysis for {stock_symbol}:\")\n",
175
+ " print(f\"Stock Data: {self.stock_data}\")\n",
176
+ " print(f\"News Sentiment: {self.news_sentiment}\")\n",
177
+ " print(f\"Economic Indicators: {self.economic_indicators}\")\n",
178
+ " print(f\"\\nInvestment Recommendation:\\n{recommendation}\")\n",
179
+ " if \"buy\" in recommendation.lower():\n",
180
+ " self.execute_buy_order(stock_symbol)\n",
181
+ " elif \"sell\" in recommendation.lower():\n",
182
+ " self.execute_sell_order(stock_symbol)\n",
183
+ " else:\n",
184
+ " print(\"No action taken based on the current recommendation.\")\n",
185
+ "\n",
186
+ " @tracer.trace_tool(name=\"execute_buy_order\")\n",
187
+ " def execute_buy_order(self, symbol):\n",
188
+ " print(f\"Executing buy order for {symbol}\")\n",
189
+ "\n",
190
+ " @tracer.trace_tool(name=\"execute_sell_order\")\n",
191
+ " def execute_sell_order(self, symbol):\n",
192
+ " print(f\"Executing sell order for {symbol}\")"
193
+ ]
194
+ },
195
+ {
196
+ "cell_type": "markdown",
197
+ "metadata": {},
198
+ "source": [
199
+ "## Running the Analysis\n",
200
+ "\n",
201
+ "Now let's create an instance of our `FinancialAnalysisSystem` and run an analysis."
202
+ ]
203
+ },
204
+ {
205
+ "cell_type": "code",
206
+ "execution_count": 4,
207
+ "metadata": {},
208
+ "outputs": [
209
+ {
210
+ "name": "stderr",
211
+ "output_type": "stream",
212
+ "text": [
213
+ "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
214
+ "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
215
+ "DEBUG:agentneo.tracing.agent_tracer:Successfully updated and committed AgentCallModel with id 39\n"
216
+ ]
217
+ },
218
+ {
219
+ "name": "stdout",
220
+ "output_type": "stream",
221
+ "text": [
222
+ "\n",
223
+ "Analysis for AAPL:\n",
224
+ "Stock Data: {'symbol': 'AAPL', 'price': 215.96, 'change': -1.66}\n",
225
+ "News Sentiment: 0.04545454545454545\n",
226
+ "Economic Indicators: {'gdp_growth': 4.75, 'unemployment_rate': 4.82, 'inflation_rate': 0.87}\n",
227
+ "\n",
228
+ "Investment Recommendation:\n",
229
+ "Given the information provided on AAPL and the broader economic indicators, and considering an investor with moderate risk tolerance, a specific investment recommendation would be as follows:\n",
230
+ "\n",
231
+ "### Investment Recommendation: Diversified Approach with Focus on AAPL\n",
232
+ "\n",
233
+ "### 1. Partial Investment in AAPL:\n",
234
+ "Given AAPL's current trading price and the slight dip it has experienced, it could be an opportune moment to buy the stock for those with a moderate risk tolerance. The decrease in AAPL’s share price might not necessarily be due to company-specific negatives as it's within normal market fluctuations. Additionally, the marginally positive news sentiment suggests there isn't a prevailing negative view on the company, which could be seen as a positive indicator. It would be prudent to allocate a portion of the investment portfolio to AAPL shares, capitalizing on the current lower price with the expectation of long-term growth. AAPL's historical performance, its strong product ecosystem, and continuous innovation could be viewed as catalysts for future growth.\n",
235
+ "Executing buy order for AAPL\n",
236
+ "Tracing Completed.\n",
237
+ "Data saved to the database and JSON file.\n",
238
+ "\n"
239
+ ]
240
+ }
241
+ ],
242
+ "source": [
243
+ "# Create an instance of FinancialAnalysisSystem\n",
244
+ "analysis_system = FinancialAnalysisSystem()\n",
245
+ "\n",
246
+ "# Run an analysis for Apple stock with moderate risk tolerance\n",
247
+ "analysis_system.run_analysis(\"AAPL\", \"moderate\")\n",
248
+ "\n",
249
+ "# Stop the tracer when analysis is complete\n",
250
+ "tracer.stop()"
251
+ ]
252
+ },
253
+ {
254
+ "cell_type": "markdown",
255
+ "metadata": {},
256
+ "source": [
257
+ "## Evaluation using Metrics"
258
+ ]
259
+ },
260
+ {
261
+ "cell_type": "code",
262
+ "execution_count": 5,
263
+ "metadata": {},
264
+ "outputs": [
265
+ {
266
+ "name": "stderr",
267
+ "output_type": "stream",
268
+ "text": [
269
+ "\u001b[92m17:21:46 - LiteLLM:INFO\u001b[0m: utils.py:2740 - \n",
270
+ "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
271
+ "INFO:LiteLLM:\n",
272
+ "LiteLLM completion() model= gpt-4o-mini; provider = openai\n"
273
+ ]
274
+ },
275
+ {
276
+ "name": "stderr",
277
+ "output_type": "stream",
278
+ "text": [
279
+ "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
280
+ "\u001b[92m17:21:48 - LiteLLM:INFO\u001b[0m: utils.py:938 - Wrapper: Completed Call, calling success_handler\n",
281
+ "INFO:LiteLLM:Wrapper: Completed Call, calling success_handler\n",
282
+ "\u001b[92m17:21:48 - LiteLLM:INFO\u001b[0m: utils.py:2740 - \n",
283
+ "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
284
+ "INFO:LiteLLM:\n",
285
+ "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
286
+ "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
287
+ "\u001b[92m17:21:51 - LiteLLM:INFO\u001b[0m: utils.py:938 - Wrapper: Completed Call, calling success_handler\n",
288
+ "INFO:LiteLLM:Wrapper: Completed Call, calling success_handler\n",
289
+ "\u001b[92m17:21:51 - LiteLLM:INFO\u001b[0m: utils.py:2740 - \n",
290
+ "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
291
+ "INFO:LiteLLM:\n",
292
+ "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
293
+ "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
294
+ "\u001b[92m17:21:55 - LiteLLM:INFO\u001b[0m: utils.py:938 - Wrapper: Completed Call, calling success_handler\n",
295
+ "INFO:LiteLLM:Wrapper: Completed Call, calling success_handler\n",
296
+ "\u001b[92m17:21:55 - LiteLLM:INFO\u001b[0m: utils.py:2740 - \n",
297
+ "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
298
+ "INFO:LiteLLM:\n",
299
+ "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
300
+ "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
301
+ "\u001b[92m17:21:57 - LiteLLM:INFO\u001b[0m: utils.py:938 - Wrapper: Completed Call, calling success_handler\n",
302
+ "INFO:LiteLLM:Wrapper: Completed Call, calling success_handler\n",
303
+ "\u001b[92m17:21:57 - LiteLLM:INFO\u001b[0m: utils.py:2740 - \n",
304
+ "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
305
+ "INFO:LiteLLM:\n",
306
+ "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
307
+ "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
308
+ "\u001b[92m17:22:01 - LiteLLM:INFO\u001b[0m: utils.py:938 - Wrapper: Completed Call, calling success_handler\n",
309
+ "INFO:LiteLLM:Wrapper: Completed Call, calling success_handler\n",
310
+ "\u001b[92m17:22:01 - LiteLLM:INFO\u001b[0m: utils.py:2740 - \n",
311
+ "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
312
+ "INFO:LiteLLM:\n",
313
+ "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
314
+ "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
315
+ "\u001b[92m17:22:02 - LiteLLM:INFO\u001b[0m: utils.py:938 - Wrapper: Completed Call, calling success_handler\n",
316
+ "INFO:LiteLLM:Wrapper: Completed Call, calling success_handler\n",
317
+ "\u001b[92m17:22:02 - LiteLLM:INFO\u001b[0m: utils.py:2740 - \n",
318
+ "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
319
+ "INFO:LiteLLM:\n",
320
+ "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
321
+ "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
322
+ "\u001b[92m17:22:07 - LiteLLM:INFO\u001b[0m: utils.py:938 - Wrapper: Completed Call, calling success_handler\n",
323
+ "INFO:LiteLLM:Wrapper: Completed Call, calling success_handler\n",
324
+ "\u001b[92m17:22:07 - LiteLLM:INFO\u001b[0m: utils.py:2740 - \n",
325
+ "LiteLLM completion() model= gpt-3.5-turbo; provider = openai\n",
326
+ "INFO:LiteLLM:\n",
327
+ "LiteLLM completion() model= gpt-3.5-turbo; provider = openai\n",
328
+ "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
329
+ "\u001b[92m17:22:08 - LiteLLM:INFO\u001b[0m: utils.py:938 - Wrapper: Completed Call, calling success_handler\n",
330
+ "INFO:LiteLLM:Wrapper: Completed Call, calling success_handler\n",
331
+ "\u001b[92m17:22:08 - LiteLLM:INFO\u001b[0m: utils.py:2740 - \n",
332
+ "LiteLLM completion() model= gpt-3.5-turbo; provider = openai\n",
333
+ "INFO:LiteLLM:\n",
334
+ "LiteLLM completion() model= gpt-3.5-turbo; provider = openai\n",
335
+ "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
336
+ "\u001b[92m17:22:09 - LiteLLM:INFO\u001b[0m: utils.py:938 - Wrapper: Completed Call, calling success_handler\n",
337
+ "INFO:LiteLLM:Wrapper: Completed Call, calling success_handler\n",
338
+ "\u001b[92m17:22:09 - LiteLLM:INFO\u001b[0m: utils.py:2740 - \n",
339
+ "LiteLLM completion() model= gpt-3.5-turbo; provider = openai\n",
340
+ "INFO:LiteLLM:\n",
341
+ "LiteLLM completion() model= gpt-3.5-turbo; provider = openai\n",
342
+ "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
343
+ "\u001b[92m17:22:10 - LiteLLM:INFO\u001b[0m: utils.py:938 - Wrapper: Completed Call, calling success_handler\n",
344
+ "INFO:LiteLLM:Wrapper: Completed Call, calling success_handler\n",
345
+ "\u001b[92m17:22:10 - LiteLLM:INFO\u001b[0m: utils.py:2740 - \n",
346
+ "LiteLLM completion() model= gpt-3.5-turbo; provider = openai\n",
347
+ "INFO:LiteLLM:\n",
348
+ "LiteLLM completion() model= gpt-3.5-turbo; provider = openai\n",
349
+ "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
350
+ "\u001b[92m17:22:11 - LiteLLM:INFO\u001b[0m: utils.py:938 - Wrapper: Completed Call, calling success_handler\n",
351
+ "INFO:LiteLLM:Wrapper: Completed Call, calling success_handler\n",
352
+ "\u001b[92m17:22:11 - LiteLLM:INFO\u001b[0m: utils.py:2740 - \n",
353
+ "LiteLLM completion() model= gpt-3.5-turbo; provider = openai\n",
354
+ "INFO:LiteLLM:\n",
355
+ "LiteLLM completion() model= gpt-3.5-turbo; provider = openai\n",
356
+ "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
357
+ "\u001b[92m17:22:13 - LiteLLM:INFO\u001b[0m: utils.py:938 - Wrapper: Completed Call, calling success_handler\n",
358
+ "INFO:LiteLLM:Wrapper: Completed Call, calling success_handler\n",
359
+ "\u001b[92m17:22:13 - LiteLLM:INFO\u001b[0m: utils.py:2740 - \n",
360
+ "LiteLLM completion() model= gpt-3.5-turbo; provider = openai\n",
361
+ "INFO:LiteLLM:\n",
362
+ "LiteLLM completion() model= gpt-3.5-turbo; provider = openai\n",
363
+ "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
364
+ "\u001b[92m17:22:14 - LiteLLM:INFO\u001b[0m: utils.py:938 - Wrapper: Completed Call, calling success_handler\n",
365
+ "INFO:LiteLLM:Wrapper: Completed Call, calling success_handler\n",
366
+ "\u001b[92m17:22:14 - LiteLLM:INFO\u001b[0m: utils.py:2740 - \n",
367
+ "LiteLLM completion() model= gpt-3.5-turbo; provider = openai\n",
368
+ "INFO:LiteLLM:\n",
369
+ "LiteLLM completion() model= gpt-3.5-turbo; provider = openai\n",
370
+ "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
371
+ "\u001b[92m17:22:15 - LiteLLM:INFO\u001b[0m: utils.py:938 - Wrapper: Completed Call, calling success_handler\n",
372
+ "INFO:LiteLLM:Wrapper: Completed Call, calling success_handler\n",
373
+ "\u001b[92m17:22:15 - LiteLLM:INFO\u001b[0m: utils.py:2740 - \n",
374
+ "LiteLLM completion() model= gpt-3.5-turbo; provider = openai\n",
375
+ "INFO:LiteLLM:\n",
376
+ "LiteLLM completion() model= gpt-3.5-turbo; provider = openai\n",
377
+ "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
378
+ "\u001b[92m17:22:17 - LiteLLM:INFO\u001b[0m: utils.py:938 - Wrapper: Completed Call, calling success_handler\n",
379
+ "INFO:LiteLLM:Wrapper: Completed Call, calling success_handler\n"
380
+ ]
381
+ }
382
+ ],
383
+ "source": [
384
+ "exe = Evaluation(session=neo_session, trace_id=tracer.trace_id)\n",
385
+ "\n",
386
+ "# run a single metric\n",
387
+ "exe.evaluate(metric_list=['goal_decomposition_efficiency', \n",
388
+ " 'goal_fulfillment_rate', \n",
389
+ " 'tool_call_correctness_rate', \n",
390
+ " 'tool_call_success_rate'])"
391
+ ]
392
+ },
393
+ {
394
+ "cell_type": "code",
395
+ "execution_count": 7,
396
+ "metadata": {},
397
+ "outputs": [
398
+ {
399
+ "data": {
400
+ "text/plain": [
401
+ "[{'metric_name': 'goal_decomposition_efficiency',\n",
402
+ " 'score': 0.85,\n",
403
+ " 'reason': 'The decomposition of the original goal into sub-tasks is largely effective, with each sub-task logically contributing to the overall objective of making an informed investment decision. The tools are appropriately assigned to each sub-task, and the sequence of tasks follows a logical progression from data gathering to analysis and finally to execution. However, while the sub-tasks cover most aspects of the original goal, the sentiment analysis could be more granular by providing a more detailed breakdown of sentiment scores for each article. Overall, the decomposition is efficient and would likely scale well for similar investment analysis tasks.',\n",
404
+ " 'result_detail': {'metric_name': 'goal_fulfillment_rate',\n",
405
+ " 'config': {},\n",
406
+ " 'result': {'originalGoal': 'Make informed investment decisions regarding Apple Inc. (AAPL) stock by understanding current market conditions, including stock performance, news sentiment, and economic indicators.',\n",
407
+ " 'subtasks': ['Fetch stock data for AAPL.',\n",
408
+ " 'Fetch news articles related to AAPL.',\n",
409
+ " 'Analyze sentiment for each news article.',\n",
410
+ " 'Fetch economic indicators.',\n",
411
+ " 'Analyze market conditions based on stock performance, news sentiment, and economic indicators.',\n",
412
+ " 'Generate an investment recommendation based on the market outlook and investor risk tolerance.',\n",
413
+ " 'Execute a buy order for AAPL shares.'],\n",
414
+ " 'score': 0.85,\n",
415
+ " 'reason': 'The decomposition of the original goal into sub-tasks is largely effective, with each sub-task logically contributing to the overall objective of making an informed investment decision. The tools are appropriately assigned to each sub-task, and the sequence of tasks follows a logical progression from data gathering to analysis and finally to execution. However, while the sub-tasks cover most aspects of the original goal, the sentiment analysis could be more granular by providing a more detailed breakdown of sentiment scores for each article. Overall, the decomposition is efficient and would likely scale well for similar investment analysis tasks.'}},\n",
416
+ " 'config': {},\n",
417
+ " 'start_time': '2024-10-22T17:21:46.507620',\n",
418
+ " 'end_time': '2024-10-22T17:21:55.573536',\n",
419
+ " 'duration': 9.065916},\n",
420
+ " {'metric_name': 'goal_fulfillment_rate',\n",
421
+ " 'score': 0.8,\n",
422
+ " 'reason': \"The system responses effectively address the user's intent to make informed investment decisions regarding AAPL stock. The responses provide relevant stock data, news articles, sentiment analysis, and economic indicators, which are crucial for evaluating the investment potential. The stock performance data indicates a slight dip, which aligns with the user's interest in understanding current market conditions. The news sentiment score, although low, suggests a marginally positive outlook, which is a relevant factor for investment decisions. The analysis of market conditions offers a balanced view, indicating that the price drop may be part of normal fluctuations rather than a significant negative event. Furthermore, the investment recommendation is tailored to the user's moderate risk tolerance, suggesting a partial investment in AAPL, which is a proactive strategy. However, the execution of the buy order is missing, which is a critical step in fulfilling the user's intent to invest. This omission prevents a perfect score, as the user expressed a clear intent to execute a buy order based on the insights provided. Overall, the responses are comprehensive and align well with the user's goals, warranting a score of 0.8.\",\n",
423
+ " 'result_detail': {'metric_name': 'goal_fulfillment_rate',\n",
424
+ " 'config': {},\n",
425
+ " 'result': {'inputGoal': \"The user aims to make informed investment decisions regarding Apple Inc. (AAPL) stock. They seek to understand the current market conditions, including stock performance, news sentiment, and economic indicators, to evaluate the potential for investment. After analyzing the data, the user expresses a moderate risk tolerance and ultimately decides to execute a buy order for AAPL shares, indicating a proactive approach to capitalizing on the stock's current price dip. This reflects a clear intent to invest strategically based on the gathered insights.\",\n",
426
+ " 'relevantResponses': \"fetch_stock_data: {'symbol': 'AAPL', 'price': 215.96, 'change': -1.66}\\n\\nfetch_news_articles: ['AAPL announces new product line', 'AAPL reports quarterly earnings', 'AAPL faces regulatory scrutiny']\\n\\nanalyze_sentiment: 0.13636363636363635\\n\\nanalyze_sentiment: 0.0\\n\\nanalyze_sentiment: 0.0\\n\\nfetch_economic_indicators: {'gdp_growth': 4.75, 'unemployment_rate': 4.82, 'inflation_rate': 0.87}\\n\\nanalyze_market_conditions: Given the information you've provided about AAPL and the broader economic indicators, here's a brief market outlook:\\n\\n### **Stock Performance: AAPL**\\nAAPL is currently trading at $215.96, which represents a 1.66% decrease. Such a dip could be the result of various factors including market sentiment, company-specific news, or sector-wide shifts. Without more context, it’s difficult to pinpoint the cause, but the drop isn't drastic, suggesting it could be part of normal market fluctuations rather than a response to negative company-specific news.\\n\\n### **News Sentiment**\\nThe news sentiment score is 0.04545454545454545. This score, hovering slightly above zero, indicates a marginally positive sentiment in the news\\n\\ngenerate_investment_recommendation: Given the information provided on AAPL and the broader economic indicators, and considering an investor with moderate risk tolerance, a specific investment recommendation would be as follows:\\n\\n### Investment Recommendation: Diversified Approach with Focus on AAPL\\n\\n### 1. Partial Investment in AAPL:\\nGiven AAPL's current trading price and the slight dip it has experienced, it could be an opportune moment to buy the stock for those with a moderate risk tolerance. The decrease in AAPL’s share price might not necessarily be due to company-specific negatives as it's within normal market fluctuations. Additionally, the marginally positive news sentiment suggests there isn't a prevailing negative view on the company, which could be seen as a positive indicator. It would be prudent to allocate a portion of the investment portfolio to AAPL shares, capitalizing on the current lower price with the expectation of long-term growth. AAPL's historical performance, its strong product ecosystem, and continuous innovation could be viewed as catalysts for future growth. \\n\\n\\n\\nexecute_buy_order: None\",\n",
427
+ " 'score': 0.8,\n",
428
+ " 'reason': \"The system responses effectively address the user's intent to make informed investment decisions regarding AAPL stock. The responses provide relevant stock data, news articles, sentiment analysis, and economic indicators, which are crucial for evaluating the investment potential. The stock performance data indicates a slight dip, which aligns with the user's interest in understanding current market conditions. The news sentiment score, although low, suggests a marginally positive outlook, which is a relevant factor for investment decisions. The analysis of market conditions offers a balanced view, indicating that the price drop may be part of normal fluctuations rather than a significant negative event. Furthermore, the investment recommendation is tailored to the user's moderate risk tolerance, suggesting a partial investment in AAPL, which is a proactive strategy. However, the execution of the buy order is missing, which is a critical step in fulfilling the user's intent to invest. This omission prevents a perfect score, as the user expressed a clear intent to execute a buy order based on the insights provided. Overall, the responses are comprehensive and align well with the user's goals, warranting a score of 0.8.\"}},\n",
429
+ " 'config': {},\n",
430
+ " 'start_time': '2024-10-22T17:21:55.573671',\n",
431
+ " 'end_time': '2024-10-22T17:22:01.774679',\n",
432
+ " 'duration': 6.201008},\n",
433
+ " {'metric_name': 'tool_call_correctness_rate',\n",
434
+ " 'score': 0.7142857142857143,\n",
435
+ " 'reason': 'The correctness rate of 0.71 (or 71%) indicates that out of the total 7 tool calls made, 5 were appropriate and aligned with the intended tools for the query. \\n\\nIn this interaction, the user requested an analysis of market conditions for AAPL stock, which required fetching stock data, analyzing sentiment, and reviewing economic indicators. The intended tools were correctly identified as `fetch_stock_data`, `analyze_sentiment`, and `fetch_economic_indicators`. \\n\\nHowever, the total calls made included 2 additional calls that were not necessary for fulfilling the query. These could have been calls to `fetch_news_articles` or `execute_buy_order`, which do not directly contribute to the analysis requested. \\n\\nThe discrepancy between the intended tools and the actual tool usage led to the 2 incorrect calls, resulting in a correctness rate of 5 correct calls out of 7 total calls. This highlights the importance of using only the relevant tools for a given query to improve efficiency and accuracy in tool usage.',\n",
436
+ " 'result_detail': {'metric_name': 'tool_correctness',\n",
437
+ " 'config': {},\n",
438
+ " 'result': {'score': 0.7142857142857143,\n",
439
+ " 'reason': 'The correctness rate of 0.71 (or 71%) indicates that out of the total 7 tool calls made, 5 were appropriate and aligned with the intended tools for the query. \\n\\nIn this interaction, the user requested an analysis of market conditions for AAPL stock, which required fetching stock data, analyzing sentiment, and reviewing economic indicators. The intended tools were correctly identified as `fetch_stock_data`, `analyze_sentiment`, and `fetch_economic_indicators`. \\n\\nHowever, the total calls made included 2 additional calls that were not necessary for fulfilling the query. These could have been calls to `fetch_news_articles` or `execute_buy_order`, which do not directly contribute to the analysis requested. \\n\\nThe discrepancy between the intended tools and the actual tool usage led to the 2 incorrect calls, resulting in a correctness rate of 5 correct calls out of 7 total calls. This highlights the importance of using only the relevant tools for a given query to improve efficiency and accuracy in tool usage.',\n",
440
+ " 'details': {'correct_calls': 5,\n",
441
+ " 'total_calls': 7,\n",
442
+ " 'intended_tools': ['fetch_stock_data',\n",
443
+ " 'analyze_sentiment',\n",
444
+ " 'fetch_economic_indicators'],\n",
445
+ " 'available_tools': ['fetch_stock_data',\n",
446
+ " 'analyze_sentiment',\n",
447
+ " 'fetch_news_articles',\n",
448
+ " 'execute_buy_order',\n",
449
+ " 'fetch_economic_indicators']}}},\n",
450
+ " 'config': {},\n",
451
+ " 'start_time': '2024-10-22T17:22:01.774778',\n",
452
+ " 'end_time': '2024-10-22T17:22:07.358702',\n",
453
+ " 'duration': 5.583924},\n",
454
+ " {'metric_name': 'tool_call_success_rate',\n",
455
+ " 'score': 0.8571428571428571,\n",
456
+ " 'reason': \"The tool call had an overall success rate of 0.86, with the majority of tool calls (6 out of 7) being successful. The successful tool calls returned the expected output without any visible errors, indicating that the tool was able to retrieve and process the requested information effectively. However, there was one failed tool call where the output was 'None', suggesting that there may have been an issue with generating the output. In general, the tool's performance was good, but there may be room for improvement in handling certain edge cases or errors.\",\n",
457
+ " 'result_detail': {'metric_name': 'tool_call_success_rate',\n",
458
+ " 'config': {},\n",
459
+ " 'result': {'score': 0.8571428571428571,\n",
460
+ " 'reason': \"The tool call had an overall success rate of 0.86, with the majority of tool calls (6 out of 7) being successful. The successful tool calls returned the expected output without any visible errors, indicating that the tool was able to retrieve and process the requested information effectively. However, there was one failed tool call where the output was 'None', suggesting that there may have been an issue with generating the output. In general, the tool's performance was good, but there may be room for improvement in handling certain edge cases or errors.\"}},\n",
461
+ " 'config': {},\n",
462
+ " 'start_time': '2024-10-22T17:22:07.358790',\n",
463
+ " 'end_time': '2024-10-22T17:22:17.297285',\n",
464
+ " 'duration': 9.938495}]"
465
+ ]
466
+ },
467
+ "execution_count": 7,
468
+ "metadata": {},
469
+ "output_type": "execute_result"
470
+ }
471
+ ],
472
+ "source": [
473
+ "results = exe.get_results()\n",
474
+ "results"
475
+ ]
476
+ },
477
+ {
478
+ "cell_type": "markdown",
479
+ "metadata": {},
480
+ "source": [
481
+ "## Analyzing the Results\n",
482
+ "\n",
483
+ "After running the analysis, you can examine the output to see the stock data, news sentiment, economic indicators, and the investment recommendation. The AgentNeo tracer will have logged all the steps of the process, which you can later analyze using the AgentNeo dashboard.\n",
484
+ "\n",
485
+ "To launch the AgentNeo dashboard and analyze the traces, you can use:"
486
+ ]
487
+ },
488
+ {
489
+ "cell_type": "code",
490
+ "execution_count": 6,
491
+ "metadata": {},
492
+ "outputs": [
493
+ {
494
+ "name": "stderr",
495
+ "output_type": "stream",
496
+ "text": [
497
+ "INFO:root:Port 3000 is busy. Finding an available port...\n",
498
+ "INFO:root:Using port 3002\n",
499
+ "INFO:root:Dashboard launched successfully. Access it at: http://localhost:3002\n"
500
+ ]
501
+ }
502
+ ],
503
+ "source": [
504
+ "neo_session.launch_dashboard(port=3000)"
505
+ ]
506
+ },
507
+ {
508
+ "cell_type": "markdown",
509
+ "metadata": {},
510
+ "source": [
511
+ "This will allow you to visualize the execution flow, identify any bottlenecks, and gain insights into the decision-making process of your financial analysis system."
512
+ ]
513
+ }
514
+ ],
515
+ "metadata": {
516
+ "kernelspec": {
517
+ "display_name": "base",
518
+ "language": "python",
519
+ "name": "python3"
520
+ },
521
+ "language_info": {
522
+ "codemirror_mode": {
523
+ "name": "ipython",
524
+ "version": 3
525
+ },
526
+ "file_extension": ".py",
527
+ "mimetype": "text/x-python",
528
+ "name": "python",
529
+ "nbconvert_exporter": "python",
530
+ "pygments_lexer": "ipython3",
531
+ "version": "3.11.4"
532
+ }
533
+ },
534
+ "nbformat": 4,
535
+ "nbformat_minor": 2
536
+ }