dhisana 0.0.1.dev5__tar.gz → 0.0.1.dev7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/PKG-INFO +2 -1
  2. {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/setup.py +3 -2
  3. {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/ui/components.py +44 -5
  4. {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/utils/agent_tools.py +306 -76
  5. dhisana-0.0.1.dev7/src/dhisana/utils/linkedin_crawler.py +177 -0
  6. {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/utils/openai_helpers.py +283 -71
  7. {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/utils/openapi_tool/convert_openai_spec_to_tool.py +14 -9
  8. {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/utils/tools_json.py +29 -0
  9. {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana.egg-info/PKG-INFO +2 -1
  10. {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana.egg-info/SOURCES.txt +1 -0
  11. {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana.egg-info/requires.txt +1 -0
  12. {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/tests/test_agent_tools.py +45 -13
  13. {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/README.md +0 -0
  14. {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/pyproject.toml +0 -0
  15. {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/setup.cfg +0 -0
  16. {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/__init__.py +0 -0
  17. {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/cli/__init__.py +0 -0
  18. {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/cli/cli.py +0 -0
  19. {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/cli/datasets.py +0 -0
  20. {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/cli/models.py +0 -0
  21. {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/cli/predictions.py +0 -0
  22. {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/ui/__init__.py +0 -0
  23. {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/utils/__init__.py +0 -0
  24. {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/utils/assistant_tool_tag.py +0 -0
  25. {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/utils/openapi_spec_to_tools.py +0 -0
  26. {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/utils/openapi_tool/__init__.py +0 -0
  27. {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/utils/openapi_tool/api_models.py +0 -0
  28. {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/utils/openapi_tool/openapi_tool.py +0 -0
  29. {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana.egg-info/dependency_links.txt +0 -0
  30. {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana.egg-info/entry_points.txt +0 -0
  31. {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dhisana
3
- Version: 0.0.1.dev5
3
+ Version: 0.0.1.dev7
4
4
  Summary: A Python SDK for Dhisana AI Platform
5
5
  Home-page: https://github.com/dhisana-ai/dhisana-python-sdk
6
6
  Author: Admin
@@ -22,3 +22,4 @@ Requires-Dist: requests
22
22
  Requires-Dist: uvicorn[standard]
23
23
  Requires-Dist: aiohttp
24
24
  Requires-Dist: openapi_pydantic
25
+ Requires-Dist: pandas
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name='dhisana',
5
- version='0.0.1-dev5',
5
+ version='0.0.1-dev7',
6
6
  description='A Python SDK for Dhisana AI Platform',
7
7
  author='Admin',
8
8
  author_email='contact@dhisana.ai',
@@ -21,7 +21,8 @@ setup(
21
21
  'requests',
22
22
  'uvicorn[standard]',
23
23
  'aiohttp',
24
- 'openapi_pydantic'
24
+ 'openapi_pydantic',
25
+ 'pandas'
25
26
  ],
26
27
  entry_points={
27
28
  'console_scripts': [
@@ -48,6 +48,18 @@ class Sidebar(Component):
48
48
  },
49
49
  }
50
50
 
51
+ class Text(Component):
52
+ def __init__(self, content: str):
53
+ self.content = content
54
+
55
+ def to_dict(self):
56
+ return {
57
+ 'type': 'text',
58
+ 'properties': {
59
+ 'content': self.content,
60
+ },
61
+ }
62
+
51
63
 
52
64
  class MainContent(Component):
53
65
  def __init__(self, children: List[Component]):
@@ -78,16 +90,23 @@ class ChatWindow(Component):
78
90
 
79
91
 
80
92
  class DataTable(Component):
81
- def __init__(self, columns: List[Dict[str, Any]], data_source: str):
93
+ def __init__(
94
+ self,
95
+ columns: List[Dict[str, Any]],
96
+ data_source: str,
97
+ actions: Optional[List[Dict[str, Any]]] = None,
98
+ ):
82
99
  self.columns = columns
83
- self.data_source = data_source # Should be a reference to data in dataContext
100
+ self.data_source = data_source
101
+ self.actions = actions or []
84
102
 
85
103
  def to_dict(self):
86
104
  return {
87
105
  'type': 'data-table',
88
106
  'properties': {
89
107
  'columns': self.columns,
90
- 'dataSource': self.data_source, # Should be in the form '{{dataKey}}'
108
+ 'dataSource': self.data_source,
109
+ 'actions': self.actions,
91
110
  },
92
111
  }
93
112
 
@@ -172,14 +191,16 @@ class TextArea(Component):
172
191
 
173
192
 
174
193
  class Upload(Component):
175
- def __init__(self, name: str):
194
+ def __init__(self, name: str, required: bool = False):
176
195
  self.name = name
196
+ self.required = required
177
197
 
178
198
  def to_dict(self):
179
199
  return {
180
200
  'type': 'upload',
181
201
  'properties': {
182
202
  'name': self.name,
203
+ 'required': self.required,
183
204
  },
184
205
  }
185
206
 
@@ -279,7 +300,7 @@ class Action:
279
300
  self,
280
301
  action_type: str,
281
302
  method: str,
282
- url: str,
303
+ url: Optional[str] = None,
283
304
  data: Optional[Any] = None,
284
305
  state: Optional[str] = None,
285
306
  on_success: Optional[str] = None,
@@ -302,6 +323,24 @@ class Action:
302
323
  }
303
324
 
304
325
 
326
+ class CustomInputOutputContent(Component):
327
+ def __init__(
328
+ self,
329
+ data_source: str,
330
+ actions: Optional[List[Dict[str, Any]]] = None,
331
+ ):
332
+ self.data_source = data_source
333
+ self.actions = actions or []
334
+
335
+ def to_dict(self):
336
+ return {
337
+ 'type': 'custom-input-output-content',
338
+ 'properties': {
339
+ 'dataSource': self.data_source,
340
+ 'actions': self.actions,
341
+ },
342
+ }
343
+
305
344
  def render(
306
345
  layout: str,
307
346
  components: List[Component],
@@ -7,22 +7,36 @@ import json
7
7
  import uuid
8
8
  import io
9
9
  import base64
10
+ import csv
11
+ import logging
12
+ from typing import List, Dict, Any, Optional
13
+
14
+ import pandas as pd
15
+ import httpx
10
16
  from bs4 import BeautifulSoup
11
17
  from playwright.async_api import async_playwright
12
18
  from email.mime.text import MIMEText
13
- from typing import List, Dict, Any
14
- from .assistant_tool_tag import assistant_tool
15
19
  from google.oauth2 import service_account
16
20
  from googleapiclient.discovery import build
17
21
  from googleapiclient.http import MediaIoBaseDownload, MediaFileUpload
18
- import httpx
19
22
  from google.auth.transport.requests import Request
20
- from typing import List
21
23
  from googleapiclient.errors import HttpError
24
+ from pydantic import BaseModel
25
+ from fastapi import HTTPException
26
+ from openai import LengthFinishReasonError, OpenAI, OpenAIError, AsyncOpenAI
27
+ from typing import List, Optional
28
+ import tempfile
29
+ import pandas as pd
30
+ from typing import List, Optional
31
+ import time
32
+
22
33
 
34
+
35
+ from dhisana.utils.assistant_tool_tag import assistant_tool
23
36
  GLOBAL_DATA_MODELS = []
24
37
  GLOBAL_TOOLS_FUNCTIONS = {}
25
38
 
39
+ OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
26
40
 
27
41
  @assistant_tool
28
42
  async def get_html_content_from_url(url):
@@ -31,13 +45,13 @@ async def get_html_content_from_url(url):
31
45
  context = await browser.new_context()
32
46
  page = await context.new_page()
33
47
 
34
- print(f"Requesting {url}")
48
+ logging.info(f"Requesting {url}")
35
49
  try:
36
50
  await page.goto(url, timeout=10000)
37
51
  html_content = await page.content()
38
52
  return await parse_html_content(html_content)
39
53
  except Exception as e:
40
- print(f"Failed to fetch {url}: {e}")
54
+ logging.info(f"Failed to fetch {url}: {e}")
41
55
  return ""
42
56
  finally:
43
57
  await browser.close()
@@ -130,7 +144,7 @@ async def get_file_content_from_googledrive_by_name(file_name: str = None) -> st
130
144
  done = False
131
145
  while not done:
132
146
  status, done = downloader.next_chunk()
133
- print(f"Download {int(status.progress() * 100)}%.")
147
+ logging.info(f"{file_name} Download {int(status.progress() * 100)}%.")
134
148
 
135
149
  # Close the file handle
136
150
  fh.close()
@@ -141,83 +155,86 @@ async def get_file_content_from_googledrive_by_name(file_name: str = None) -> st
141
155
 
142
156
  @assistant_tool
143
157
  async def write_content_to_googledrive(cloud_file_path: str, local_file_path: str) -> str:
144
- """
145
- Writes content from a local file to a file in Google Drive using a service account.
146
- If the file does not exist in Google Drive, it creates it along with any necessary intermediate directories.
147
-
148
- :param cloud_file_path: The path of the file to create or update on Google Drive.
149
- :param local_file_path: The path to the local file whose content will be uploaded.
150
- :return: The file ID of the uploaded or updated file.
151
- """
158
+ try:
159
+ """
160
+ Writes content from a local file to a file in Google Drive using a service account.
161
+ If the file does not exist in Google Drive, it creates it along with any necessary intermediate directories.
162
+
163
+ :param cloud_file_path: The path of the file to create or update on Google Drive.
164
+ :param local_file_path: The path to the local file whose content will be uploaded.
165
+ :return: The file ID of the uploaded or updated file.
166
+ """
152
167
 
153
- # Retrieve the service account JSON and email for automation from environment variables
154
- email_for_automation = os.getenv('EMAIL_FOR_AUTOMATION')
155
- service_account_base64 = os.getenv('GOOGLE_SERVICE_KEY')
156
- service_account_json = convert_base_64_json(service_account_base64)
168
+ # Retrieve the service account JSON and email for automation from environment variables
169
+ email_for_automation = os.getenv('EMAIL_FOR_AUTOMATION')
170
+ service_account_base64 = os.getenv('GOOGLE_SERVICE_KEY')
171
+ service_account_json = convert_base_64_json(service_account_base64)
157
172
 
158
- # Parse the JSON string into a dictionary
159
- service_account_info = json.loads(service_account_json)
173
+ # Parse the JSON string into a dictionary
174
+ service_account_info = json.loads(service_account_json)
160
175
 
161
- # Define the required scope for Google Drive API access
162
- SCOPES = ['https://www.googleapis.com/auth/drive']
176
+ # Define the required scope for Google Drive API access
177
+ SCOPES = ['https://www.googleapis.com/auth/drive']
163
178
 
164
- # Authenticate using the service account info and impersonate the specific email
165
- credentials = service_account.Credentials.from_service_account_info(
166
- service_account_info, scopes=SCOPES
167
- ).with_subject(email_for_automation)
179
+ # Authenticate using the service account info and impersonate the specific email
180
+ credentials = service_account.Credentials.from_service_account_info(
181
+ service_account_info, scopes=SCOPES
182
+ ).with_subject(email_for_automation)
168
183
 
169
- # Build the Google Drive service object
170
- service = build('drive', 'v3', credentials=credentials)
184
+ # Build the Google Drive service object
185
+ service = build('drive', 'v3', credentials=credentials)
171
186
 
172
- # Split the cloud file path into components
173
- path_components = cloud_file_path.split('/')
174
- parent_id = 'root'
175
-
176
- # Create intermediate directories if they don't exist
177
- for component in path_components[:-1]:
178
- query = f"'{parent_id}' in parents and name = '{component}' and mimeType = 'application/vnd.google-apps.folder'"
187
+ # Split the cloud file path into components
188
+ path_components = cloud_file_path.split('/')
189
+ parent_id = 'root'
190
+
191
+ # Create intermediate directories if they don't exist
192
+ for component in path_components[:-1]:
193
+ query = f"'{parent_id}' in parents and name = '{component}' and mimeType = 'application/vnd.google-apps.folder'"
194
+ results = service.files().list(q=query, pageSize=1, fields="files(id, name)").execute()
195
+ items = results.get('files', [])
196
+
197
+ if items:
198
+ parent_id = items[0]['id']
199
+ else:
200
+ file_metadata = {
201
+ 'name': component,
202
+ 'mimeType': 'application/vnd.google-apps.folder',
203
+ 'parents': [parent_id]
204
+ }
205
+ folder = service.files().create(body=file_metadata, fields='id').execute()
206
+ parent_id = folder.get('id')
207
+
208
+ # Prepare the file for upload
209
+ media_body = MediaFileUpload(local_file_path, resumable=True)
210
+ file_name = path_components[-1]
211
+
212
+ # Check if the file exists in the specified directory
213
+ query = f"'{parent_id}' in parents and name = '{file_name}'"
179
214
  results = service.files().list(q=query, pageSize=1, fields="files(id, name)").execute()
180
215
  items = results.get('files', [])
181
-
216
+
182
217
  if items:
183
- parent_id = items[0]['id']
218
+ # File exists, update its content
219
+ file_id = items[0]['id']
220
+ updated_file = service.files().update(
221
+ fileId=file_id,
222
+ media_body=media_body
223
+ ).execute()
184
224
  else:
225
+ # File does not exist, create a new one
185
226
  file_metadata = {
186
- 'name': component,
187
- 'mimeType': 'application/vnd.google-apps.folder',
227
+ 'name': file_name,
188
228
  'parents': [parent_id]
189
229
  }
190
- folder = service.files().create(body=file_metadata, fields='id').execute()
191
- parent_id = folder.get('id')
192
-
193
- # Prepare the file for upload
194
- media_body = MediaFileUpload(local_file_path, resumable=True)
195
- file_name = path_components[-1]
196
-
197
- # Check if the file exists in the specified directory
198
- query = f"'{parent_id}' in parents and name = '{file_name}'"
199
- results = service.files().list(q=query, pageSize=1, fields="files(id, name)").execute()
200
- items = results.get('files', [])
201
-
202
- if items:
203
- # File exists, update its content
204
- file_id = items[0]['id']
205
- updated_file = service.files().update(
206
- fileId=file_id,
207
- media_body=media_body
208
- ).execute()
209
- else:
210
- # File does not exist, create a new one
211
- file_metadata = {
212
- 'name': file_name,
213
- 'parents': [parent_id]
214
- }
215
- created_file = service.files().create(
216
- body=file_metadata,
217
- media_body=media_body,
218
- fields='id'
219
- ).execute()
220
- file_id = created_file.get('id')
230
+ created_file = service.files().create(
231
+ body=file_metadata,
232
+ media_body=media_body,
233
+ fields='id'
234
+ ).execute()
235
+ file_id = created_file.get('id')
236
+ except HttpError as error:
237
+ raise Exception(f"list_files_in_drive_folder_by_name An error occurred: {error}")
221
238
 
222
239
  return file_id
223
240
 
@@ -278,7 +295,7 @@ async def list_files_in_drive_folder_by_name(folder_path: str = None) -> List[st
278
295
  # Update folder_id to the ID of the found folder
279
296
  folder_id = items[0]['id']
280
297
  except HttpError as error:
281
- raise Exception(f"An error occurred: {error}")
298
+ raise Exception(f"list_files_in_drive_folder_by_name An error occurred: {error}")
282
299
 
283
300
  # Now folder_id is the ID of the desired folder
284
301
  # List all files in the specified folder
@@ -294,7 +311,7 @@ async def list_files_in_drive_folder_by_name(folder_path: str = None) -> List[st
294
311
  file_names = [item['name'] for item in items]
295
312
  return file_names
296
313
  except HttpError as error:
297
- raise Exception(f"An error occurred while listing files: {error}")
314
+ raise Exception(f"list_files_in_drive_folder_by_name An error occurred while listing files: {error}")
298
315
 
299
316
 
300
317
  @assistant_tool
@@ -442,14 +459,227 @@ async def get_calendar_events_using_service_account_async(
442
459
  events = events_result.get('items', [])
443
460
 
444
461
  if not events:
445
- print('No upcoming events found within the specified range.')
462
+ logging.info('No upcoming events found within the specified range.')
446
463
  else:
447
- print('Upcoming events:')
464
+ logging.info('Upcoming events:')
448
465
  for event in events:
449
466
  start = event['start'].get('dateTime', event['start'].get('date'))
450
- print(f"{start} - {event.get('summary', 'No Title')}")
467
+ logging.info(f"{start} - {event.get('summary', 'No Title')}")
451
468
 
452
469
  return events
453
470
 
471
+ class FileItem:
472
+ def __init__(self, file_path: str):
473
+ self.file_path = file_path
474
+
475
+ class FileList:
476
+ def __init__(self, files: List[FileItem]):
477
+ self.files = files
478
+
479
+ class PandasQuery(BaseModel):
480
+ pandas_query: str
481
+
482
+
483
+
484
+ @assistant_tool
485
+ async def query_dataframes(user_query: str, input_files: Optional[List[str]], output_file_path: Optional[str] = None) -> str:
486
+ """
487
+ Query multiple dataframes based on a user query and write the output dataframe to a specified output file path.
488
+
489
+ Args:
490
+ user_query (str): User query in natural language.
491
+ input_files (List[str]): List of paths to CSV files to be loaded into dataframes.
492
+ output_file_path (Optional[str]): Path to the output file where the resulting dataframe will be saved.
493
+ If not specified, a unique file path will be generated in '/tmp/run_interim_outputs/'.
494
+
495
+ Returns:
496
+ str: A JSON string representing the FileList containing the path to the output file if created, otherwise an empty list.
497
+ """
498
+ max_retries = 3
499
+ # Check if the list of CSV files or the user query is empty
500
+ if not input_files or not user_query:
501
+ # Return an empty FileList as JSON
502
+ return json.dumps({"files": []})
503
+
504
+ # If output_file_path is not specified, generate one
505
+ if not output_file_path:
506
+ output_folder = '/tmp/run_interim_outputs/'
507
+ # Ensure output_folder exists
508
+ os.makedirs(output_folder, exist_ok=True)
509
+ # Generate a unique filename
510
+ unique_number = int(time.time() * 1000) # milliseconds since epoch
511
+ output_file_name = f'query_dataframe_{unique_number}.csv'
512
+ output_file_path = os.path.join(output_folder, output_file_name)
513
+ else:
514
+ # Ensure the directory exists
515
+ output_folder = os.path.dirname(output_file_path)
516
+ if output_folder:
517
+ os.makedirs(output_folder, exist_ok=True)
518
+
519
+ # Load CSV files into dataframes, skipping empty files
520
+ data_frames = []
521
+ df_names = []
522
+ for idx, file in enumerate(input_files):
523
+ # Check if the file is empty
524
+ if os.path.getsize(file) == 0:
525
+ # Skip empty files
526
+ continue
527
+ df = pd.read_csv(file)
528
+ data_frames.append(df)
529
+ df_name = f'df{idx+1}'
530
+ df_names.append(df_name)
531
+
532
+ # Check if any dataframes were loaded
533
+ if not data_frames:
534
+ # Return an empty FileList as JSON
535
+ return json.dumps({"files": []})
536
+
537
+ # Create a context with the dataframes and their schemas
538
+ schema_info = ""
539
+ for df_name, df in zip(df_names, data_frames):
540
+ schema_info += f"DataFrame '{df_name}' columns: {', '.join(df.columns)}\n"
541
+
542
+ # Initialize the error message as empty
543
+ error_message = ""
544
+
545
+ for attempt in range(max_retries):
546
+ # Prepare the message
547
+ message = f"""
548
+ You are an expert data analyst. Given the following DataFrames and their schemas:
549
+
550
+ {schema_info}
551
+
552
+ Write a pandas query to answer the following question:
553
+
554
+ \"\"\"{user_query}\"\"\"
555
+
556
+ Your query should use the provided DataFrames ({', '.join(df_names)}) and produce a DataFrame named 'result_df'. Do not include any imports or explanations; only provide the pandas query code that assigns the result to 'result_df'.
557
+ """
558
+ if error_message:
559
+ message += f"\nThe previous query returned the following error:\n{error_message}\nPlease fix the query."
560
+
561
+ # Get structured output
562
+ pandas_query_result, status = await get_structured_output(message, PandasQuery)
563
+ if status == 'SUCCESS' and pandas_query_result and pandas_query_result.pandas_query:
564
+ pandas_query = pandas_query_result.pandas_query
565
+ # Execute the query safely
566
+ local_vars = {name: df for name, df in zip(df_names, data_frames)}
567
+ global_vars = {}
568
+ try:
569
+ exec(pandas_query, global_vars, local_vars)
570
+ result_df = local_vars.get('result_df')
571
+ if result_df is None:
572
+ raise ValueError("The query did not produce a DataFrame named 'result_df'.")
573
+ # If execution is successful, break out of the loop
574
+ break
575
+ except Exception as e:
576
+ # Capture the error message
577
+ error_message = str(e)
578
+ # If this was the last attempt, raise the error
579
+ if attempt == max_retries - 1:
580
+ raise RuntimeError(f"Error executing generated query after {max_retries} attempts: {error_message}")
581
+ # Otherwise, continue to the next iteration
582
+ continue
583
+ else:
584
+ # If unable to get a valid response, raise an error
585
+ if attempt == max_retries - 1:
586
+ raise RuntimeError("Failed to get a valid pandas query after multiple attempts.")
587
+ continue
588
+
589
+ # Write the resulting DataFrame to the output file
590
+ result_df.to_csv(output_file_path, index=False)
591
+
592
+ # Create FileList object
593
+ file_list = FileList(files=[FileItem(file_path=output_file_path)])
594
+
595
+ # Convert FileList to JSON
596
+ def file_item_to_dict(file_item):
597
+ return {"file_path": file_item.file_path}
598
+
599
+ file_list_dict = {
600
+ "files": [file_item_to_dict(file_item) for file_item in file_list.files]
601
+ }
602
+ file_list_json = json.dumps(file_list_dict, indent=2)
603
+ return file_list_json
604
+
605
+ @assistant_tool
606
+ async def load_csv_file(input_file_path: str):
607
+ with open(input_file_path, newline='') as csvfile:
608
+ reader = csv.DictReader(csvfile)
609
+ return [row for row in reader]
610
+
611
+ async def get_structured_output(message: str, response_type):
612
+ try:
613
+ client = AsyncOpenAI()
614
+ completion = await client.beta.chat.completions.parse(
615
+ model="gpt-4o-2024-08-06",
616
+ messages=[
617
+ {"role": "system", "content": "Extract structured content from input. Output is in JSON Format."},
618
+ {"role": "user", "content": message},
619
+ ],
620
+ response_format=response_type,
621
+ )
622
+
623
+ response = completion.choices[0].message
624
+ if response.parsed:
625
+ return response.parsed, 'SUCCESS'
626
+ elif response.refusal:
627
+ logging.warning("ERROR: Refusal response: %s", response.refusal)
628
+ return response.refusal, 'FAIL'
629
+
630
+ except LengthFinishReasonError as e:
631
+ logging.error(f"Too many tokens: {e}")
632
+ raise HTTPException(status_code=502, detail="The request exceeded the maximum token limit.")
633
+ except OpenAIError as e:
634
+ logging.error(f"OpenAI API error: {e}")
635
+ raise HTTPException(status_code=502, detail="Error communicating with the OpenAI API.")
636
+ except Exception as e:
637
+ logging.error(f"Unexpected error: {e}")
638
+ raise HTTPException(status_code=500, detail="An unexpected error occurred while processing your request.")
639
+
454
640
  GLOBAL_TOOLS_FUNCTIONS = {name: func for name, func in globals().items(
455
641
  ) if callable(func) and getattr(func, 'is_assistant_tool', False)}
642
+
643
+
644
+ # import asyncio
645
+ # import os
646
+ # import pandas as pd
647
+
648
+ # async def test_query_dataframes():
649
+ # # Setup: Create a temporary CSV file with sample data
650
+ # input_csv_path = '/tmp/leads_gtm/scored_leads_test.csv'
651
+ # output_csv_path = '/tmp/leads_gtm/totalled_score.csv'
652
+ # sample_data = {
653
+ # 'job_title_match_score': [1, 2, 3],
654
+ # 'skill_relevance_match_score': [1, 2, 3],
655
+ # 'location_match_score': [1, 2, 3],
656
+ # 'education_history_match_score': [1, 2, 3],
657
+ # 'job_history_match_score': [1, 2, 3],
658
+ # 'company_match_score': [1, 2, 3],
659
+ # 'industry_match_score': [1, 2, 3],
660
+ # 'keywords_match_score': [1, 2, 3]
661
+ # }
662
+ # df = pd.DataFrame(sample_data)
663
+ # df.to_csv(input_csv_path, index=False)
664
+
665
+ # # Define the input parameters
666
+ # input_csv_files = [input_csv_path]
667
+ # user_query = "Sum the columns 'job_title_match_score', 'skill_relevance_match_score', 'location_match_score', 'education_history_match_score', 'job_history_match_score', 'company_match_score', 'industry_match_score', 'keywords_match_score' to create a new column 'aggregate_score'. Save the output to '/tmp/totalled_score.csv'."
668
+ # output_file = output_csv_path
669
+
670
+ # # Call the function
671
+ # result = await query_dataframes(input_csv_files, user_query, output_file)
672
+
673
+ # # Verify the output
674
+ # assert os.path.exists(output_csv_path), "Output file was not created."
675
+ # result_df = pd.read_csv(output_csv_path)
676
+ # expected_aggregate_score = [8, 16, 24]
677
+ # assert 'aggregate_score' in result_df.columns, "Column 'aggregate_score' not found in the output."
678
+ # assert result_df['aggregate_score'].tolist() == expected_aggregate_score, "Aggregate scores do not match the expected values."
679
+
680
+ # async def main():
681
+ # await test_query_dataframes()
682
+
683
+ # if __name__ == '__main__':
684
+ # asyncio.run(main())
685
+