PyPI - dhisana - Versions diffs - 0.0.1.dev5__tar.gz → 0.0.1.dev7__tar.gz - Mend

dhisana 0.0.1.dev5tar.gz → 0.0.1.dev7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

{dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dhisana
-Version: 0.0.1.dev5
+Version: 0.0.1.dev7
 Summary: A Python SDK for Dhisana AI Platform
 Home-page: https://github.com/dhisana-ai/dhisana-python-sdk
 Author: Admin
@@ -22,3 +22,4 @@ Requires-Dist: requests
 Requires-Dist: uvicorn[standard]
 Requires-Dist: aiohttp
 Requires-Dist: openapi_pydantic
+Requires-Dist: pandas

{dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/setup.py RENAMED Viewed

@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
 setup(
     name='dhisana',
-    version='0.0.1-dev5',
+    version='0.0.1-dev7',
     description='A Python SDK for Dhisana AI Platform',
     author='Admin',
     author_email='contact@dhisana.ai',
@@ -21,7 +21,8 @@ setup(
         'requests',
         'uvicorn[standard]',
         'aiohttp',
-        'openapi_pydantic'
+        'openapi_pydantic',
+        'pandas'
     ],
     entry_points={
         'console_scripts': [

{dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/ui/components.py RENAMED Viewed

@@ -48,6 +48,18 @@ class Sidebar(Component):
             },
         }
+class Text(Component):
+    def __init__(self, content: str):
+        self.content = content
+    def to_dict(self):
+        return {
+            'type': 'text',
+            'properties': {
+                'content': self.content,
+            },
+        }
 class MainContent(Component):
     def __init__(self, children: List[Component]):
@@ -78,16 +90,23 @@ class ChatWindow(Component):
 class DataTable(Component):
-    def __init__(self, columns: List[Dict[str, Any]], data_source: str):
+    def __init__(
+        self,
+        columns: List[Dict[str, Any]],
+        data_source: str,
+        actions: Optional[List[Dict[str, Any]]] = None,
+    ):
         self.columns = columns
-        self.data_source = data_source  # Should be a reference to data in dataContext
+        self.data_source = data_source
+        self.actions = actions or []
     def to_dict(self):
         return {
             'type': 'data-table',
             'properties': {
                 'columns': self.columns,
-                'dataSource': self.data_source,  # Should be in the form '{{dataKey}}'
+                'dataSource': self.data_source,
+                'actions': self.actions,
             },
         }
@@ -172,14 +191,16 @@ class TextArea(Component):
 class Upload(Component):
-    def __init__(self, name: str):
+    def __init__(self, name: str, required: bool = False):
         self.name = name
+        self.required = required
     def to_dict(self):
         return {
             'type': 'upload',
             'properties': {
                 'name': self.name,
+                'required': self.required,
             },
         }
@@ -279,7 +300,7 @@ class Action:
         self,
         action_type: str,
         method: str,
-        url: str,
+        url: Optional[str] = None,
         data: Optional[Any] = None,
         state: Optional[str] = None,
         on_success: Optional[str] = None,
@@ -302,6 +323,24 @@ class Action:
         }
+class CustomInputOutputContent(Component):
+    def __init__(
+        self,
+        data_source: str,
+        actions: Optional[List[Dict[str, Any]]] = None,
+    ):
+        self.data_source = data_source
+        self.actions = actions or []
+    def to_dict(self):
+        return {
+            'type': 'custom-input-output-content',
+            'properties': {
+                'dataSource': self.data_source,
+                'actions': self.actions,
+            },
+        }
 def render(
     layout: str,
     components: List[Component],

{dhisana-0.0.1.dev5 → dhisana-0.0.1.dev7}/src/dhisana/utils/agent_tools.py RENAMED Viewed

@@ -7,22 +7,36 @@ import json
 import uuid
 import io
 import base64
+import csv
+import logging
+from typing import List, Dict, Any, Optional
+import pandas as pd
+import httpx
 from bs4 import BeautifulSoup
 from playwright.async_api import async_playwright
 from email.mime.text import MIMEText
-from typing import List, Dict, Any
-from .assistant_tool_tag import assistant_tool
 from google.oauth2 import service_account
 from googleapiclient.discovery import build
 from googleapiclient.http import MediaIoBaseDownload, MediaFileUpload
-import httpx
 from google.auth.transport.requests import Request
-from typing import List
 from googleapiclient.errors import HttpError
+from pydantic import BaseModel
+from fastapi import HTTPException
+from openai import LengthFinishReasonError, OpenAI, OpenAIError, AsyncOpenAI
+from typing import List, Optional
+import tempfile
+import pandas as pd
+from typing import List, Optional
+import time
+from dhisana.utils.assistant_tool_tag import assistant_tool
 GLOBAL_DATA_MODELS = []
 GLOBAL_TOOLS_FUNCTIONS = {}
+OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
 @assistant_tool
 async def get_html_content_from_url(url):
@@ -31,13 +45,13 @@ async def get_html_content_from_url(url):
         context = await browser.new_context()
         page = await context.new_page()
-        print(f"Requesting {url}")
+        logging.info(f"Requesting {url}")
         try:
             await page.goto(url, timeout=10000)
             html_content = await page.content()
             return await parse_html_content(html_content)
         except Exception as e:
-            print(f"Failed to fetch {url}: {e}")
+            logging.info(f"Failed to fetch {url}: {e}")
             return ""
         finally:
             await browser.close()
@@ -130,7 +144,7 @@ async def get_file_content_from_googledrive_by_name(file_name: str = None) -> st
     done = False
     while not done:
         status, done = downloader.next_chunk()
-        print(f"Download {int(status.progress() * 100)}%.")
+        logging.info(f"{file_name} Download {int(status.progress() * 100)}%.")
     # Close the file handle
     fh.close()
@@ -141,83 +155,86 @@ async def get_file_content_from_googledrive_by_name(file_name: str = None) -> st
 @assistant_tool
 async def write_content_to_googledrive(cloud_file_path: str, local_file_path: str) -> str:
-    """
-    Writes content from a local file to a file in Google Drive using a service account.
-    If the file does not exist in Google Drive, it creates it along with any necessary intermediate directories.
-    :param cloud_file_path: The path of the file to create or update on Google Drive.
-    :param local_file_path: The path to the local file whose content will be uploaded.
-    :return: The file ID of the uploaded or updated file.
-    """
+    try:
+        """
+        Writes content from a local file to a file in Google Drive using a service account.
+        If the file does not exist in Google Drive, it creates it along with any necessary intermediate directories.
+        :param cloud_file_path: The path of the file to create or update on Google Drive.
+        :param local_file_path: The path to the local file whose content will be uploaded.
+        :return: The file ID of the uploaded or updated file.
+        """
-    # Retrieve the service account JSON and email for automation from environment variables
-    email_for_automation = os.getenv('EMAIL_FOR_AUTOMATION')
-    service_account_base64 = os.getenv('GOOGLE_SERVICE_KEY')
-    service_account_json = convert_base_64_json(service_account_base64)
+        # Retrieve the service account JSON and email for automation from environment variables
+        email_for_automation = os.getenv('EMAIL_FOR_AUTOMATION')
+        service_account_base64 = os.getenv('GOOGLE_SERVICE_KEY')
+        service_account_json = convert_base_64_json(service_account_base64)
-    # Parse the JSON string into a dictionary
-    service_account_info = json.loads(service_account_json)
+        # Parse the JSON string into a dictionary
+        service_account_info = json.loads(service_account_json)
-    # Define the required scope for Google Drive API access
-    SCOPES = ['https://www.googleapis.com/auth/drive']
+        # Define the required scope for Google Drive API access
+        SCOPES = ['https://www.googleapis.com/auth/drive']
-    # Authenticate using the service account info and impersonate the specific email
-    credentials = service_account.Credentials.from_service_account_info(
-        service_account_info, scopes=SCOPES
-    ).with_subject(email_for_automation)
+        # Authenticate using the service account info and impersonate the specific email
+        credentials = service_account.Credentials.from_service_account_info(
+            service_account_info, scopes=SCOPES
+        ).with_subject(email_for_automation)
-    # Build the Google Drive service object
-    service = build('drive', 'v3', credentials=credentials)
+        # Build the Google Drive service object
+        service = build('drive', 'v3', credentials=credentials)
-    # Split the cloud file path into components
-    path_components = cloud_file_path.split('/')
-    parent_id = 'root'
-    # Create intermediate directories if they don't exist
-    for component in path_components[:-1]:
-        query = f"'{parent_id}' in parents and name = '{component}' and mimeType = 'application/vnd.google-apps.folder'"
+        # Split the cloud file path into components
+        path_components = cloud_file_path.split('/')
+        parent_id = 'root'
+        # Create intermediate directories if they don't exist
+        for component in path_components[:-1]:
+            query = f"'{parent_id}' in parents and name = '{component}' and mimeType = 'application/vnd.google-apps.folder'"
+            results = service.files().list(q=query, pageSize=1, fields="files(id, name)").execute()
+            items = results.get('files', [])
+            if items:
+                parent_id = items[0]['id']
+            else:
+                file_metadata = {
+                    'name': component,
+                    'mimeType': 'application/vnd.google-apps.folder',
+                    'parents': [parent_id]
+                }
+                folder = service.files().create(body=file_metadata, fields='id').execute()
+                parent_id = folder.get('id')
+        # Prepare the file for upload
+        media_body = MediaFileUpload(local_file_path, resumable=True)
+        file_name = path_components[-1]
+        # Check if the file exists in the specified directory
+        query = f"'{parent_id}' in parents and name = '{file_name}'"
         results = service.files().list(q=query, pageSize=1, fields="files(id, name)").execute()
         items = results.get('files', [])
         if items:
-            parent_id = items[0]['id']
+            # File exists, update its content
+            file_id = items[0]['id']
+            updated_file = service.files().update(
+                fileId=file_id,
+                media_body=media_body
+            ).execute()
         else:
+            # File does not exist, create a new one
             file_metadata = {
-                'name': component,
-                'mimeType': 'application/vnd.google-apps.folder',
+                'name': file_name,
                 'parents': [parent_id]
             }
-            folder = service.files().create(body=file_metadata, fields='id').execute()
-            parent_id = folder.get('id')
-    # Prepare the file for upload
-    media_body = MediaFileUpload(local_file_path, resumable=True)
-    file_name = path_components[-1]
-    # Check if the file exists in the specified directory
-    query = f"'{parent_id}' in parents and name = '{file_name}'"
-    results = service.files().list(q=query, pageSize=1, fields="files(id, name)").execute()
-    items = results.get('files', [])
-    if items:
-        # File exists, update its content
-        file_id = items[0]['id']
-        updated_file = service.files().update(
-            fileId=file_id,
-            media_body=media_body
-        ).execute()
-    else:
-        # File does not exist, create a new one
-        file_metadata = {
-            'name': file_name,
-            'parents': [parent_id]
-        }
-        created_file = service.files().create(
-            body=file_metadata,
-            media_body=media_body,
-            fields='id'
-        ).execute()
-        file_id = created_file.get('id')
+            created_file = service.files().create(
+                body=file_metadata,
+                media_body=media_body,
+                fields='id'
+            ).execute()
+            file_id = created_file.get('id')
+    except HttpError as error:
+            raise Exception(f"list_files_in_drive_folder_by_name An error occurred: {error}")
     return file_id
@@ -278,7 +295,7 @@ async def list_files_in_drive_folder_by_name(folder_path: str = None) -> List[st
                 # Update folder_id to the ID of the found folder
                 folder_id = items[0]['id']
             except HttpError as error:
-                raise Exception(f"An error occurred: {error}")
+                raise Exception(f"list_files_in_drive_folder_by_name An error occurred: {error}")
     # Now folder_id is the ID of the desired folder
     # List all files in the specified folder
@@ -294,7 +311,7 @@ async def list_files_in_drive_folder_by_name(folder_path: str = None) -> List[st
         file_names = [item['name'] for item in items]
         return file_names
     except HttpError as error:
-        raise Exception(f"An error occurred while listing files: {error}")
+        raise Exception(f"list_files_in_drive_folder_by_name An error occurred while listing files: {error}")
 @assistant_tool
@@ -442,14 +459,227 @@ async def get_calendar_events_using_service_account_async(
     events = events_result.get('items', [])
     if not events:
-        print('No upcoming events found within the specified range.')
+        logging.info('No upcoming events found within the specified range.')
     else:
-        print('Upcoming events:')
+        logging.info('Upcoming events:')
         for event in events:
             start = event['start'].get('dateTime', event['start'].get('date'))
-            print(f"{start} - {event.get('summary', 'No Title')}")
+            logging.info(f"{start} - {event.get('summary', 'No Title')}")
     return events
+class FileItem:
+    def __init__(self, file_path: str):
+        self.file_path = file_path
+class FileList:
+    def __init__(self, files: List[FileItem]):
+        self.files = files
+class PandasQuery(BaseModel):
+    pandas_query: str
+@assistant_tool
+async def query_dataframes(user_query: str, input_files: Optional[List[str]], output_file_path: Optional[str] = None) -> str:
+    """
+    Query multiple dataframes based on a user query and write the output dataframe to a specified output file path.
+    Args:
+        user_query (str): User query in natural language.
+        input_files (List[str]): List of paths to CSV files to be loaded into dataframes.
+        output_file_path (Optional[str]): Path to the output file where the resulting dataframe will be saved.
+            If not specified, a unique file path will be generated in '/tmp/run_interim_outputs/'.
+    Returns:
+        str: A JSON string representing the FileList containing the path to the output file if created, otherwise an empty list.
+    """
+    max_retries = 3
+    # Check if the list of CSV files or the user query is empty
+    if not input_files or not user_query:
+        # Return an empty FileList as JSON
+        return json.dumps({"files": []})
+    # If output_file_path is not specified, generate one
+    if not output_file_path:
+        output_folder = '/tmp/run_interim_outputs/'
+        # Ensure output_folder exists
+        os.makedirs(output_folder, exist_ok=True)
+        # Generate a unique filename
+        unique_number = int(time.time() * 1000)  # milliseconds since epoch
+        output_file_name = f'query_dataframe_{unique_number}.csv'
+        output_file_path = os.path.join(output_folder, output_file_name)
+    else:
+        # Ensure the directory exists
+        output_folder = os.path.dirname(output_file_path)
+        if output_folder:
+            os.makedirs(output_folder, exist_ok=True)
+    # Load CSV files into dataframes, skipping empty files
+    data_frames = []
+    df_names = []
+    for idx, file in enumerate(input_files):
+        # Check if the file is empty
+        if os.path.getsize(file) == 0:
+            # Skip empty files
+            continue
+        df = pd.read_csv(file)
+        data_frames.append(df)
+        df_name = f'df{idx+1}'
+        df_names.append(df_name)
+    # Check if any dataframes were loaded
+    if not data_frames:
+        # Return an empty FileList as JSON
+        return json.dumps({"files": []})
+    # Create a context with the dataframes and their schemas
+    schema_info = ""
+    for df_name, df in zip(df_names, data_frames):
+        schema_info += f"DataFrame '{df_name}' columns: {', '.join(df.columns)}\n"
+    # Initialize the error message as empty
+    error_message = ""
+    for attempt in range(max_retries):
+        # Prepare the message
+        message = f"""
+        You are an expert data analyst. Given the following DataFrames and their schemas:
+        {schema_info}
+        Write a pandas query to answer the following question:
+        \"\"\"{user_query}\"\"\"
+        Your query should use the provided DataFrames ({', '.join(df_names)}) and produce a DataFrame named 'result_df'. Do not include any imports or explanations; only provide the pandas query code that assigns the result to 'result_df'.
+        """
+        if error_message:
+            message += f"\nThe previous query returned the following error:\n{error_message}\nPlease fix the query."
+        # Get structured output
+        pandas_query_result, status = await get_structured_output(message, PandasQuery)
+        if status == 'SUCCESS' and pandas_query_result and pandas_query_result.pandas_query:
+            pandas_query = pandas_query_result.pandas_query
+            # Execute the query safely
+            local_vars = {name: df for name, df in zip(df_names, data_frames)}
+            global_vars = {}
+            try:
+                exec(pandas_query, global_vars, local_vars)
+                result_df = local_vars.get('result_df')
+                if result_df is None:
+                    raise ValueError("The query did not produce a DataFrame named 'result_df'.")
+                # If execution is successful, break out of the loop
+                break
+            except Exception as e:
+                # Capture the error message
+                error_message = str(e)
+                # If this was the last attempt, raise the error
+                if attempt == max_retries - 1:
+                    raise RuntimeError(f"Error executing generated query after {max_retries} attempts: {error_message}")
+                # Otherwise, continue to the next iteration
+                continue
+        else:
+            # If unable to get a valid response, raise an error
+            if attempt == max_retries - 1:
+                raise RuntimeError("Failed to get a valid pandas query after multiple attempts.")
+            continue
+    # Write the resulting DataFrame to the output file
+    result_df.to_csv(output_file_path, index=False)
+    # Create FileList object
+    file_list = FileList(files=[FileItem(file_path=output_file_path)])
+    # Convert FileList to JSON
+    def file_item_to_dict(file_item):
+        return {"file_path": file_item.file_path}
+    file_list_dict = {
+        "files": [file_item_to_dict(file_item) for file_item in file_list.files]
+    }
+    file_list_json = json.dumps(file_list_dict, indent=2)
+    return file_list_json
+@assistant_tool
+async def load_csv_file(input_file_path: str):
+    with open(input_file_path, newline='') as csvfile:
+        reader = csv.DictReader(csvfile)
+        return [row for row in reader]
+async def get_structured_output(message: str, response_type):
+    try:
+        client = AsyncOpenAI()
+        completion = await client.beta.chat.completions.parse(
+            model="gpt-4o-2024-08-06",
+            messages=[
+                {"role": "system", "content": "Extract structured content from input. Output is in JSON Format."},
+                {"role": "user", "content": message},
+            ],
+            response_format=response_type,
+        )
+        response = completion.choices[0].message
+        if response.parsed:
+            return response.parsed, 'SUCCESS'
+        elif response.refusal:
+            logging.warning("ERROR: Refusal response: %s", response.refusal)
+            return response.refusal, 'FAIL'
+    except LengthFinishReasonError as e:
+        logging.error(f"Too many tokens: {e}")
+        raise HTTPException(status_code=502, detail="The request exceeded the maximum token limit.")
+    except OpenAIError as e:
+        logging.error(f"OpenAI API error: {e}")
+        raise HTTPException(status_code=502, detail="Error communicating with the OpenAI API.")
+    except Exception as e:
+        logging.error(f"Unexpected error: {e}")
+        raise HTTPException(status_code=500, detail="An unexpected error occurred while processing your request.")
 GLOBAL_TOOLS_FUNCTIONS = {name: func for name, func in globals().items(
 ) if callable(func) and getattr(func, 'is_assistant_tool', False)}
+# import asyncio
+# import os
+# import pandas as pd
+# async def test_query_dataframes():
+#     # Setup: Create a temporary CSV file with sample data
+#     input_csv_path = '/tmp/leads_gtm/scored_leads_test.csv'
+#     output_csv_path = '/tmp/leads_gtm/totalled_score.csv'
+#     sample_data = {
+#         'job_title_match_score': [1, 2, 3],
+#         'skill_relevance_match_score': [1, 2, 3],
+#         'location_match_score': [1, 2, 3],
+#         'education_history_match_score': [1, 2, 3],
+#         'job_history_match_score': [1, 2, 3],
+#         'company_match_score': [1, 2, 3],
+#         'industry_match_score': [1, 2, 3],
+#         'keywords_match_score': [1, 2, 3]
+#     }
+#     df = pd.DataFrame(sample_data)
+#     df.to_csv(input_csv_path, index=False)
+#     # Define the input parameters
+#     input_csv_files = [input_csv_path]
+#     user_query = "Sum the columns 'job_title_match_score', 'skill_relevance_match_score', 'location_match_score', 'education_history_match_score', 'job_history_match_score', 'company_match_score', 'industry_match_score', 'keywords_match_score' to create a new column 'aggregate_score'. Save the output to '/tmp/totalled_score.csv'."
+#     output_file = output_csv_path
+#     # Call the function
+#     result = await query_dataframes(input_csv_files, user_query, output_file)
+#     # Verify the output
+#     assert os.path.exists(output_csv_path), "Output file was not created."
+#     result_df = pd.read_csv(output_csv_path)
+#     expected_aggregate_score = [8, 16, 24]
+#     assert 'aggregate_score' in result_df.columns, "Column 'aggregate_score' not found in the output."
+#     assert result_df['aggregate_score'].tolist() == expected_aggregate_score, "Aggregate scores do not match the expected values."
+# async def main():
+#     await test_query_dataframes()
+# if __name__ == '__main__':
+#     asyncio.run(main())

dhisana 0.0.1.dev5__tar.gz → 0.0.1.dev7__tar.gz

dhisana 0.0.1.dev5tar.gz → 0.0.1.dev7tar.gz