dhisana 0.0.1.dev5__tar.gz → 0.0.1.dev7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/PKG-INFO +2 -1
- {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/setup.py +3 -2
- {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/ui/components.py +44 -5
- {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/utils/agent_tools.py +306 -76
- dhisana-0.0.1.dev7/src/dhisana/utils/linkedin_crawler.py +177 -0
- {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/utils/openai_helpers.py +283 -71
- {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/utils/openapi_tool/convert_openai_spec_to_tool.py +14 -9
- {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/utils/tools_json.py +29 -0
- {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana.egg-info/PKG-INFO +2 -1
- {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana.egg-info/SOURCES.txt +1 -0
- {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana.egg-info/requires.txt +1 -0
- {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/tests/test_agent_tools.py +45 -13
- {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/README.md +0 -0
- {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/pyproject.toml +0 -0
- {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/setup.cfg +0 -0
- {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/__init__.py +0 -0
- {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/cli/__init__.py +0 -0
- {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/cli/cli.py +0 -0
- {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/cli/datasets.py +0 -0
- {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/cli/models.py +0 -0
- {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/cli/predictions.py +0 -0
- {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/ui/__init__.py +0 -0
- {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/utils/__init__.py +0 -0
- {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/utils/assistant_tool_tag.py +0 -0
- {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/utils/openapi_spec_to_tools.py +0 -0
- {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/utils/openapi_tool/__init__.py +0 -0
- {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/utils/openapi_tool/api_models.py +0 -0
- {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/utils/openapi_tool/openapi_tool.py +0 -0
- {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana.egg-info/dependency_links.txt +0 -0
- {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana.egg-info/entry_points.txt +0 -0
- {dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: dhisana
|
|
3
|
-
Version: 0.0.1.
|
|
3
|
+
Version: 0.0.1.dev7
|
|
4
4
|
Summary: A Python SDK for Dhisana AI Platform
|
|
5
5
|
Home-page: https://github.com/dhisana-ai/dhisana-python-sdk
|
|
6
6
|
Author: Admin
|
|
@@ -22,3 +22,4 @@ Requires-Dist: requests
|
|
|
22
22
|
Requires-Dist: uvicorn[standard]
|
|
23
23
|
Requires-Dist: aiohttp
|
|
24
24
|
Requires-Dist: openapi_pydantic
|
|
25
|
+
Requires-Dist: pandas
|
|
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|
|
2
2
|
|
|
3
3
|
setup(
|
|
4
4
|
name='dhisana',
|
|
5
|
-
version='0.0.1-
|
|
5
|
+
version='0.0.1-dev7',
|
|
6
6
|
description='A Python SDK for Dhisana AI Platform',
|
|
7
7
|
author='Admin',
|
|
8
8
|
author_email='contact@dhisana.ai',
|
|
@@ -21,7 +21,8 @@ setup(
|
|
|
21
21
|
'requests',
|
|
22
22
|
'uvicorn[standard]',
|
|
23
23
|
'aiohttp',
|
|
24
|
-
'openapi_pydantic'
|
|
24
|
+
'openapi_pydantic',
|
|
25
|
+
'pandas'
|
|
25
26
|
],
|
|
26
27
|
entry_points={
|
|
27
28
|
'console_scripts': [
|
|
@@ -48,6 +48,18 @@ class Sidebar(Component):
|
|
|
48
48
|
},
|
|
49
49
|
}
|
|
50
50
|
|
|
51
|
+
class Text(Component):
|
|
52
|
+
def __init__(self, content: str):
|
|
53
|
+
self.content = content
|
|
54
|
+
|
|
55
|
+
def to_dict(self):
|
|
56
|
+
return {
|
|
57
|
+
'type': 'text',
|
|
58
|
+
'properties': {
|
|
59
|
+
'content': self.content,
|
|
60
|
+
},
|
|
61
|
+
}
|
|
62
|
+
|
|
51
63
|
|
|
52
64
|
class MainContent(Component):
|
|
53
65
|
def __init__(self, children: List[Component]):
|
|
@@ -78,16 +90,23 @@ class ChatWindow(Component):
|
|
|
78
90
|
|
|
79
91
|
|
|
80
92
|
class DataTable(Component):
|
|
81
|
-
def __init__(
|
|
93
|
+
def __init__(
|
|
94
|
+
self,
|
|
95
|
+
columns: List[Dict[str, Any]],
|
|
96
|
+
data_source: str,
|
|
97
|
+
actions: Optional[List[Dict[str, Any]]] = None,
|
|
98
|
+
):
|
|
82
99
|
self.columns = columns
|
|
83
|
-
self.data_source = data_source
|
|
100
|
+
self.data_source = data_source
|
|
101
|
+
self.actions = actions or []
|
|
84
102
|
|
|
85
103
|
def to_dict(self):
|
|
86
104
|
return {
|
|
87
105
|
'type': 'data-table',
|
|
88
106
|
'properties': {
|
|
89
107
|
'columns': self.columns,
|
|
90
|
-
'dataSource': self.data_source,
|
|
108
|
+
'dataSource': self.data_source,
|
|
109
|
+
'actions': self.actions,
|
|
91
110
|
},
|
|
92
111
|
}
|
|
93
112
|
|
|
@@ -172,14 +191,16 @@ class TextArea(Component):
|
|
|
172
191
|
|
|
173
192
|
|
|
174
193
|
class Upload(Component):
|
|
175
|
-
def __init__(self, name: str):
|
|
194
|
+
def __init__(self, name: str, required: bool = False):
|
|
176
195
|
self.name = name
|
|
196
|
+
self.required = required
|
|
177
197
|
|
|
178
198
|
def to_dict(self):
|
|
179
199
|
return {
|
|
180
200
|
'type': 'upload',
|
|
181
201
|
'properties': {
|
|
182
202
|
'name': self.name,
|
|
203
|
+
'required': self.required,
|
|
183
204
|
},
|
|
184
205
|
}
|
|
185
206
|
|
|
@@ -279,7 +300,7 @@ class Action:
|
|
|
279
300
|
self,
|
|
280
301
|
action_type: str,
|
|
281
302
|
method: str,
|
|
282
|
-
url: str,
|
|
303
|
+
url: Optional[str] = None,
|
|
283
304
|
data: Optional[Any] = None,
|
|
284
305
|
state: Optional[str] = None,
|
|
285
306
|
on_success: Optional[str] = None,
|
|
@@ -302,6 +323,24 @@ class Action:
|
|
|
302
323
|
}
|
|
303
324
|
|
|
304
325
|
|
|
326
|
+
class CustomInputOutputContent(Component):
|
|
327
|
+
def __init__(
|
|
328
|
+
self,
|
|
329
|
+
data_source: str,
|
|
330
|
+
actions: Optional[List[Dict[str, Any]]] = None,
|
|
331
|
+
):
|
|
332
|
+
self.data_source = data_source
|
|
333
|
+
self.actions = actions or []
|
|
334
|
+
|
|
335
|
+
def to_dict(self):
|
|
336
|
+
return {
|
|
337
|
+
'type': 'custom-input-output-content',
|
|
338
|
+
'properties': {
|
|
339
|
+
'dataSource': self.data_source,
|
|
340
|
+
'actions': self.actions,
|
|
341
|
+
},
|
|
342
|
+
}
|
|
343
|
+
|
|
305
344
|
def render(
|
|
306
345
|
layout: str,
|
|
307
346
|
components: List[Component],
|
|
@@ -7,22 +7,36 @@ import json
|
|
|
7
7
|
import uuid
|
|
8
8
|
import io
|
|
9
9
|
import base64
|
|
10
|
+
import csv
|
|
11
|
+
import logging
|
|
12
|
+
from typing import List, Dict, Any, Optional
|
|
13
|
+
|
|
14
|
+
import pandas as pd
|
|
15
|
+
import httpx
|
|
10
16
|
from bs4 import BeautifulSoup
|
|
11
17
|
from playwright.async_api import async_playwright
|
|
12
18
|
from email.mime.text import MIMEText
|
|
13
|
-
from typing import List, Dict, Any
|
|
14
|
-
from .assistant_tool_tag import assistant_tool
|
|
15
19
|
from google.oauth2 import service_account
|
|
16
20
|
from googleapiclient.discovery import build
|
|
17
21
|
from googleapiclient.http import MediaIoBaseDownload, MediaFileUpload
|
|
18
|
-
import httpx
|
|
19
22
|
from google.auth.transport.requests import Request
|
|
20
|
-
from typing import List
|
|
21
23
|
from googleapiclient.errors import HttpError
|
|
24
|
+
from pydantic import BaseModel
|
|
25
|
+
from fastapi import HTTPException
|
|
26
|
+
from openai import LengthFinishReasonError, OpenAI, OpenAIError, AsyncOpenAI
|
|
27
|
+
from typing import List, Optional
|
|
28
|
+
import tempfile
|
|
29
|
+
import pandas as pd
|
|
30
|
+
from typing import List, Optional
|
|
31
|
+
import time
|
|
32
|
+
|
|
22
33
|
|
|
34
|
+
|
|
35
|
+
from dhisana.utils.assistant_tool_tag import assistant_tool
|
|
23
36
|
GLOBAL_DATA_MODELS = []
|
|
24
37
|
GLOBAL_TOOLS_FUNCTIONS = {}
|
|
25
38
|
|
|
39
|
+
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
|
|
26
40
|
|
|
27
41
|
@assistant_tool
|
|
28
42
|
async def get_html_content_from_url(url):
|
|
@@ -31,13 +45,13 @@ async def get_html_content_from_url(url):
|
|
|
31
45
|
context = await browser.new_context()
|
|
32
46
|
page = await context.new_page()
|
|
33
47
|
|
|
34
|
-
|
|
48
|
+
logging.info(f"Requesting {url}")
|
|
35
49
|
try:
|
|
36
50
|
await page.goto(url, timeout=10000)
|
|
37
51
|
html_content = await page.content()
|
|
38
52
|
return await parse_html_content(html_content)
|
|
39
53
|
except Exception as e:
|
|
40
|
-
|
|
54
|
+
logging.info(f"Failed to fetch {url}: {e}")
|
|
41
55
|
return ""
|
|
42
56
|
finally:
|
|
43
57
|
await browser.close()
|
|
@@ -130,7 +144,7 @@ async def get_file_content_from_googledrive_by_name(file_name: str = None) -> st
|
|
|
130
144
|
done = False
|
|
131
145
|
while not done:
|
|
132
146
|
status, done = downloader.next_chunk()
|
|
133
|
-
|
|
147
|
+
logging.info(f"{file_name} Download {int(status.progress() * 100)}%.")
|
|
134
148
|
|
|
135
149
|
# Close the file handle
|
|
136
150
|
fh.close()
|
|
@@ -141,83 +155,86 @@ async def get_file_content_from_googledrive_by_name(file_name: str = None) -> st
|
|
|
141
155
|
|
|
142
156
|
@assistant_tool
|
|
143
157
|
async def write_content_to_googledrive(cloud_file_path: str, local_file_path: str) -> str:
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
158
|
+
try:
|
|
159
|
+
"""
|
|
160
|
+
Writes content from a local file to a file in Google Drive using a service account.
|
|
161
|
+
If the file does not exist in Google Drive, it creates it along with any necessary intermediate directories.
|
|
162
|
+
|
|
163
|
+
:param cloud_file_path: The path of the file to create or update on Google Drive.
|
|
164
|
+
:param local_file_path: The path to the local file whose content will be uploaded.
|
|
165
|
+
:return: The file ID of the uploaded or updated file.
|
|
166
|
+
"""
|
|
152
167
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
168
|
+
# Retrieve the service account JSON and email for automation from environment variables
|
|
169
|
+
email_for_automation = os.getenv('EMAIL_FOR_AUTOMATION')
|
|
170
|
+
service_account_base64 = os.getenv('GOOGLE_SERVICE_KEY')
|
|
171
|
+
service_account_json = convert_base_64_json(service_account_base64)
|
|
157
172
|
|
|
158
|
-
|
|
159
|
-
|
|
173
|
+
# Parse the JSON string into a dictionary
|
|
174
|
+
service_account_info = json.loads(service_account_json)
|
|
160
175
|
|
|
161
|
-
|
|
162
|
-
|
|
176
|
+
# Define the required scope for Google Drive API access
|
|
177
|
+
SCOPES = ['https://www.googleapis.com/auth/drive']
|
|
163
178
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
179
|
+
# Authenticate using the service account info and impersonate the specific email
|
|
180
|
+
credentials = service_account.Credentials.from_service_account_info(
|
|
181
|
+
service_account_info, scopes=SCOPES
|
|
182
|
+
).with_subject(email_for_automation)
|
|
168
183
|
|
|
169
|
-
|
|
170
|
-
|
|
184
|
+
# Build the Google Drive service object
|
|
185
|
+
service = build('drive', 'v3', credentials=credentials)
|
|
171
186
|
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
187
|
+
# Split the cloud file path into components
|
|
188
|
+
path_components = cloud_file_path.split('/')
|
|
189
|
+
parent_id = 'root'
|
|
190
|
+
|
|
191
|
+
# Create intermediate directories if they don't exist
|
|
192
|
+
for component in path_components[:-1]:
|
|
193
|
+
query = f"'{parent_id}' in parents and name = '{component}' and mimeType = 'application/vnd.google-apps.folder'"
|
|
194
|
+
results = service.files().list(q=query, pageSize=1, fields="files(id, name)").execute()
|
|
195
|
+
items = results.get('files', [])
|
|
196
|
+
|
|
197
|
+
if items:
|
|
198
|
+
parent_id = items[0]['id']
|
|
199
|
+
else:
|
|
200
|
+
file_metadata = {
|
|
201
|
+
'name': component,
|
|
202
|
+
'mimeType': 'application/vnd.google-apps.folder',
|
|
203
|
+
'parents': [parent_id]
|
|
204
|
+
}
|
|
205
|
+
folder = service.files().create(body=file_metadata, fields='id').execute()
|
|
206
|
+
parent_id = folder.get('id')
|
|
207
|
+
|
|
208
|
+
# Prepare the file for upload
|
|
209
|
+
media_body = MediaFileUpload(local_file_path, resumable=True)
|
|
210
|
+
file_name = path_components[-1]
|
|
211
|
+
|
|
212
|
+
# Check if the file exists in the specified directory
|
|
213
|
+
query = f"'{parent_id}' in parents and name = '{file_name}'"
|
|
179
214
|
results = service.files().list(q=query, pageSize=1, fields="files(id, name)").execute()
|
|
180
215
|
items = results.get('files', [])
|
|
181
|
-
|
|
216
|
+
|
|
182
217
|
if items:
|
|
183
|
-
|
|
218
|
+
# File exists, update its content
|
|
219
|
+
file_id = items[0]['id']
|
|
220
|
+
updated_file = service.files().update(
|
|
221
|
+
fileId=file_id,
|
|
222
|
+
media_body=media_body
|
|
223
|
+
).execute()
|
|
184
224
|
else:
|
|
225
|
+
# File does not exist, create a new one
|
|
185
226
|
file_metadata = {
|
|
186
|
-
'name':
|
|
187
|
-
'mimeType': 'application/vnd.google-apps.folder',
|
|
227
|
+
'name': file_name,
|
|
188
228
|
'parents': [parent_id]
|
|
189
229
|
}
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
query = f"'{parent_id}' in parents and name = '{file_name}'"
|
|
199
|
-
results = service.files().list(q=query, pageSize=1, fields="files(id, name)").execute()
|
|
200
|
-
items = results.get('files', [])
|
|
201
|
-
|
|
202
|
-
if items:
|
|
203
|
-
# File exists, update its content
|
|
204
|
-
file_id = items[0]['id']
|
|
205
|
-
updated_file = service.files().update(
|
|
206
|
-
fileId=file_id,
|
|
207
|
-
media_body=media_body
|
|
208
|
-
).execute()
|
|
209
|
-
else:
|
|
210
|
-
# File does not exist, create a new one
|
|
211
|
-
file_metadata = {
|
|
212
|
-
'name': file_name,
|
|
213
|
-
'parents': [parent_id]
|
|
214
|
-
}
|
|
215
|
-
created_file = service.files().create(
|
|
216
|
-
body=file_metadata,
|
|
217
|
-
media_body=media_body,
|
|
218
|
-
fields='id'
|
|
219
|
-
).execute()
|
|
220
|
-
file_id = created_file.get('id')
|
|
230
|
+
created_file = service.files().create(
|
|
231
|
+
body=file_metadata,
|
|
232
|
+
media_body=media_body,
|
|
233
|
+
fields='id'
|
|
234
|
+
).execute()
|
|
235
|
+
file_id = created_file.get('id')
|
|
236
|
+
except HttpError as error:
|
|
237
|
+
raise Exception(f"list_files_in_drive_folder_by_name An error occurred: {error}")
|
|
221
238
|
|
|
222
239
|
return file_id
|
|
223
240
|
|
|
@@ -278,7 +295,7 @@ async def list_files_in_drive_folder_by_name(folder_path: str = None) -> List[st
|
|
|
278
295
|
# Update folder_id to the ID of the found folder
|
|
279
296
|
folder_id = items[0]['id']
|
|
280
297
|
except HttpError as error:
|
|
281
|
-
raise Exception(f"An error occurred: {error}")
|
|
298
|
+
raise Exception(f"list_files_in_drive_folder_by_name An error occurred: {error}")
|
|
282
299
|
|
|
283
300
|
# Now folder_id is the ID of the desired folder
|
|
284
301
|
# List all files in the specified folder
|
|
@@ -294,7 +311,7 @@ async def list_files_in_drive_folder_by_name(folder_path: str = None) -> List[st
|
|
|
294
311
|
file_names = [item['name'] for item in items]
|
|
295
312
|
return file_names
|
|
296
313
|
except HttpError as error:
|
|
297
|
-
raise Exception(f"An error occurred while listing files: {error}")
|
|
314
|
+
raise Exception(f"list_files_in_drive_folder_by_name An error occurred while listing files: {error}")
|
|
298
315
|
|
|
299
316
|
|
|
300
317
|
@assistant_tool
|
|
@@ -442,14 +459,227 @@ async def get_calendar_events_using_service_account_async(
|
|
|
442
459
|
events = events_result.get('items', [])
|
|
443
460
|
|
|
444
461
|
if not events:
|
|
445
|
-
|
|
462
|
+
logging.info('No upcoming events found within the specified range.')
|
|
446
463
|
else:
|
|
447
|
-
|
|
464
|
+
logging.info('Upcoming events:')
|
|
448
465
|
for event in events:
|
|
449
466
|
start = event['start'].get('dateTime', event['start'].get('date'))
|
|
450
|
-
|
|
467
|
+
logging.info(f"{start} - {event.get('summary', 'No Title')}")
|
|
451
468
|
|
|
452
469
|
return events
|
|
453
470
|
|
|
471
|
+
class FileItem:
|
|
472
|
+
def __init__(self, file_path: str):
|
|
473
|
+
self.file_path = file_path
|
|
474
|
+
|
|
475
|
+
class FileList:
|
|
476
|
+
def __init__(self, files: List[FileItem]):
|
|
477
|
+
self.files = files
|
|
478
|
+
|
|
479
|
+
class PandasQuery(BaseModel):
|
|
480
|
+
pandas_query: str
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
@assistant_tool
|
|
485
|
+
async def query_dataframes(user_query: str, input_files: Optional[List[str]], output_file_path: Optional[str] = None) -> str:
|
|
486
|
+
"""
|
|
487
|
+
Query multiple dataframes based on a user query and write the output dataframe to a specified output file path.
|
|
488
|
+
|
|
489
|
+
Args:
|
|
490
|
+
user_query (str): User query in natural language.
|
|
491
|
+
input_files (List[str]): List of paths to CSV files to be loaded into dataframes.
|
|
492
|
+
output_file_path (Optional[str]): Path to the output file where the resulting dataframe will be saved.
|
|
493
|
+
If not specified, a unique file path will be generated in '/tmp/run_interim_outputs/'.
|
|
494
|
+
|
|
495
|
+
Returns:
|
|
496
|
+
str: A JSON string representing the FileList containing the path to the output file if created, otherwise an empty list.
|
|
497
|
+
"""
|
|
498
|
+
max_retries = 3
|
|
499
|
+
# Check if the list of CSV files or the user query is empty
|
|
500
|
+
if not input_files or not user_query:
|
|
501
|
+
# Return an empty FileList as JSON
|
|
502
|
+
return json.dumps({"files": []})
|
|
503
|
+
|
|
504
|
+
# If output_file_path is not specified, generate one
|
|
505
|
+
if not output_file_path:
|
|
506
|
+
output_folder = '/tmp/run_interim_outputs/'
|
|
507
|
+
# Ensure output_folder exists
|
|
508
|
+
os.makedirs(output_folder, exist_ok=True)
|
|
509
|
+
# Generate a unique filename
|
|
510
|
+
unique_number = int(time.time() * 1000) # milliseconds since epoch
|
|
511
|
+
output_file_name = f'query_dataframe_{unique_number}.csv'
|
|
512
|
+
output_file_path = os.path.join(output_folder, output_file_name)
|
|
513
|
+
else:
|
|
514
|
+
# Ensure the directory exists
|
|
515
|
+
output_folder = os.path.dirname(output_file_path)
|
|
516
|
+
if output_folder:
|
|
517
|
+
os.makedirs(output_folder, exist_ok=True)
|
|
518
|
+
|
|
519
|
+
# Load CSV files into dataframes, skipping empty files
|
|
520
|
+
data_frames = []
|
|
521
|
+
df_names = []
|
|
522
|
+
for idx, file in enumerate(input_files):
|
|
523
|
+
# Check if the file is empty
|
|
524
|
+
if os.path.getsize(file) == 0:
|
|
525
|
+
# Skip empty files
|
|
526
|
+
continue
|
|
527
|
+
df = pd.read_csv(file)
|
|
528
|
+
data_frames.append(df)
|
|
529
|
+
df_name = f'df{idx+1}'
|
|
530
|
+
df_names.append(df_name)
|
|
531
|
+
|
|
532
|
+
# Check if any dataframes were loaded
|
|
533
|
+
if not data_frames:
|
|
534
|
+
# Return an empty FileList as JSON
|
|
535
|
+
return json.dumps({"files": []})
|
|
536
|
+
|
|
537
|
+
# Create a context with the dataframes and their schemas
|
|
538
|
+
schema_info = ""
|
|
539
|
+
for df_name, df in zip(df_names, data_frames):
|
|
540
|
+
schema_info += f"DataFrame '{df_name}' columns: {', '.join(df.columns)}\n"
|
|
541
|
+
|
|
542
|
+
# Initialize the error message as empty
|
|
543
|
+
error_message = ""
|
|
544
|
+
|
|
545
|
+
for attempt in range(max_retries):
|
|
546
|
+
# Prepare the message
|
|
547
|
+
message = f"""
|
|
548
|
+
You are an expert data analyst. Given the following DataFrames and their schemas:
|
|
549
|
+
|
|
550
|
+
{schema_info}
|
|
551
|
+
|
|
552
|
+
Write a pandas query to answer the following question:
|
|
553
|
+
|
|
554
|
+
\"\"\"{user_query}\"\"\"
|
|
555
|
+
|
|
556
|
+
Your query should use the provided DataFrames ({', '.join(df_names)}) and produce a DataFrame named 'result_df'. Do not include any imports or explanations; only provide the pandas query code that assigns the result to 'result_df'.
|
|
557
|
+
"""
|
|
558
|
+
if error_message:
|
|
559
|
+
message += f"\nThe previous query returned the following error:\n{error_message}\nPlease fix the query."
|
|
560
|
+
|
|
561
|
+
# Get structured output
|
|
562
|
+
pandas_query_result, status = await get_structured_output(message, PandasQuery)
|
|
563
|
+
if status == 'SUCCESS' and pandas_query_result and pandas_query_result.pandas_query:
|
|
564
|
+
pandas_query = pandas_query_result.pandas_query
|
|
565
|
+
# Execute the query safely
|
|
566
|
+
local_vars = {name: df for name, df in zip(df_names, data_frames)}
|
|
567
|
+
global_vars = {}
|
|
568
|
+
try:
|
|
569
|
+
exec(pandas_query, global_vars, local_vars)
|
|
570
|
+
result_df = local_vars.get('result_df')
|
|
571
|
+
if result_df is None:
|
|
572
|
+
raise ValueError("The query did not produce a DataFrame named 'result_df'.")
|
|
573
|
+
# If execution is successful, break out of the loop
|
|
574
|
+
break
|
|
575
|
+
except Exception as e:
|
|
576
|
+
# Capture the error message
|
|
577
|
+
error_message = str(e)
|
|
578
|
+
# If this was the last attempt, raise the error
|
|
579
|
+
if attempt == max_retries - 1:
|
|
580
|
+
raise RuntimeError(f"Error executing generated query after {max_retries} attempts: {error_message}")
|
|
581
|
+
# Otherwise, continue to the next iteration
|
|
582
|
+
continue
|
|
583
|
+
else:
|
|
584
|
+
# If unable to get a valid response, raise an error
|
|
585
|
+
if attempt == max_retries - 1:
|
|
586
|
+
raise RuntimeError("Failed to get a valid pandas query after multiple attempts.")
|
|
587
|
+
continue
|
|
588
|
+
|
|
589
|
+
# Write the resulting DataFrame to the output file
|
|
590
|
+
result_df.to_csv(output_file_path, index=False)
|
|
591
|
+
|
|
592
|
+
# Create FileList object
|
|
593
|
+
file_list = FileList(files=[FileItem(file_path=output_file_path)])
|
|
594
|
+
|
|
595
|
+
# Convert FileList to JSON
|
|
596
|
+
def file_item_to_dict(file_item):
|
|
597
|
+
return {"file_path": file_item.file_path}
|
|
598
|
+
|
|
599
|
+
file_list_dict = {
|
|
600
|
+
"files": [file_item_to_dict(file_item) for file_item in file_list.files]
|
|
601
|
+
}
|
|
602
|
+
file_list_json = json.dumps(file_list_dict, indent=2)
|
|
603
|
+
return file_list_json
|
|
604
|
+
|
|
605
|
+
@assistant_tool
|
|
606
|
+
async def load_csv_file(input_file_path: str):
|
|
607
|
+
with open(input_file_path, newline='') as csvfile:
|
|
608
|
+
reader = csv.DictReader(csvfile)
|
|
609
|
+
return [row for row in reader]
|
|
610
|
+
|
|
611
|
+
async def get_structured_output(message: str, response_type):
|
|
612
|
+
try:
|
|
613
|
+
client = AsyncOpenAI()
|
|
614
|
+
completion = await client.beta.chat.completions.parse(
|
|
615
|
+
model="gpt-4o-2024-08-06",
|
|
616
|
+
messages=[
|
|
617
|
+
{"role": "system", "content": "Extract structured content from input. Output is in JSON Format."},
|
|
618
|
+
{"role": "user", "content": message},
|
|
619
|
+
],
|
|
620
|
+
response_format=response_type,
|
|
621
|
+
)
|
|
622
|
+
|
|
623
|
+
response = completion.choices[0].message
|
|
624
|
+
if response.parsed:
|
|
625
|
+
return response.parsed, 'SUCCESS'
|
|
626
|
+
elif response.refusal:
|
|
627
|
+
logging.warning("ERROR: Refusal response: %s", response.refusal)
|
|
628
|
+
return response.refusal, 'FAIL'
|
|
629
|
+
|
|
630
|
+
except LengthFinishReasonError as e:
|
|
631
|
+
logging.error(f"Too many tokens: {e}")
|
|
632
|
+
raise HTTPException(status_code=502, detail="The request exceeded the maximum token limit.")
|
|
633
|
+
except OpenAIError as e:
|
|
634
|
+
logging.error(f"OpenAI API error: {e}")
|
|
635
|
+
raise HTTPException(status_code=502, detail="Error communicating with the OpenAI API.")
|
|
636
|
+
except Exception as e:
|
|
637
|
+
logging.error(f"Unexpected error: {e}")
|
|
638
|
+
raise HTTPException(status_code=500, detail="An unexpected error occurred while processing your request.")
|
|
639
|
+
|
|
454
640
|
GLOBAL_TOOLS_FUNCTIONS = {name: func for name, func in globals().items(
|
|
455
641
|
) if callable(func) and getattr(func, 'is_assistant_tool', False)}
|
|
642
|
+
|
|
643
|
+
|
|
644
|
+
# import asyncio
|
|
645
|
+
# import os
|
|
646
|
+
# import pandas as pd
|
|
647
|
+
|
|
648
|
+
# async def test_query_dataframes():
|
|
649
|
+
# # Setup: Create a temporary CSV file with sample data
|
|
650
|
+
# input_csv_path = '/tmp/leads_gtm/scored_leads_test.csv'
|
|
651
|
+
# output_csv_path = '/tmp/leads_gtm/totalled_score.csv'
|
|
652
|
+
# sample_data = {
|
|
653
|
+
# 'job_title_match_score': [1, 2, 3],
|
|
654
|
+
# 'skill_relevance_match_score': [1, 2, 3],
|
|
655
|
+
# 'location_match_score': [1, 2, 3],
|
|
656
|
+
# 'education_history_match_score': [1, 2, 3],
|
|
657
|
+
# 'job_history_match_score': [1, 2, 3],
|
|
658
|
+
# 'company_match_score': [1, 2, 3],
|
|
659
|
+
# 'industry_match_score': [1, 2, 3],
|
|
660
|
+
# 'keywords_match_score': [1, 2, 3]
|
|
661
|
+
# }
|
|
662
|
+
# df = pd.DataFrame(sample_data)
|
|
663
|
+
# df.to_csv(input_csv_path, index=False)
|
|
664
|
+
|
|
665
|
+
# # Define the input parameters
|
|
666
|
+
# input_csv_files = [input_csv_path]
|
|
667
|
+
# user_query = "Sum the columns 'job_title_match_score', 'skill_relevance_match_score', 'location_match_score', 'education_history_match_score', 'job_history_match_score', 'company_match_score', 'industry_match_score', 'keywords_match_score' to create a new column 'aggregate_score'. Save the output to '/tmp/totalled_score.csv'."
|
|
668
|
+
# output_file = output_csv_path
|
|
669
|
+
|
|
670
|
+
# # Call the function
|
|
671
|
+
# result = await query_dataframes(input_csv_files, user_query, output_file)
|
|
672
|
+
|
|
673
|
+
# # Verify the output
|
|
674
|
+
# assert os.path.exists(output_csv_path), "Output file was not created."
|
|
675
|
+
# result_df = pd.read_csv(output_csv_path)
|
|
676
|
+
# expected_aggregate_score = [8, 16, 24]
|
|
677
|
+
# assert 'aggregate_score' in result_df.columns, "Column 'aggregate_score' not found in the output."
|
|
678
|
+
# assert result_df['aggregate_score'].tolist() == expected_aggregate_score, "Aggregate scores do not match the expected values."
|
|
679
|
+
|
|
680
|
+
# async def main():
|
|
681
|
+
# await test_query_dataframes()
|
|
682
|
+
|
|
683
|
+
# if __name__ == '__main__':
|
|
684
|
+
# asyncio.run(main())
|
|
685
|
+
|