dhisana 0.0.1.dev243__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. dhisana/__init__.py +1 -0
  2. dhisana/cli/__init__.py +1 -0
  3. dhisana/cli/cli.py +20 -0
  4. dhisana/cli/datasets.py +27 -0
  5. dhisana/cli/models.py +26 -0
  6. dhisana/cli/predictions.py +20 -0
  7. dhisana/schemas/__init__.py +1 -0
  8. dhisana/schemas/common.py +399 -0
  9. dhisana/schemas/sales.py +965 -0
  10. dhisana/ui/__init__.py +1 -0
  11. dhisana/ui/components.py +472 -0
  12. dhisana/utils/__init__.py +1 -0
  13. dhisana/utils/add_mapping.py +352 -0
  14. dhisana/utils/agent_tools.py +51 -0
  15. dhisana/utils/apollo_tools.py +1597 -0
  16. dhisana/utils/assistant_tool_tag.py +4 -0
  17. dhisana/utils/built_with_api_tools.py +282 -0
  18. dhisana/utils/cache_output_tools.py +98 -0
  19. dhisana/utils/cache_output_tools_local.py +78 -0
  20. dhisana/utils/check_email_validity_tools.py +717 -0
  21. dhisana/utils/check_for_intent_signal.py +107 -0
  22. dhisana/utils/check_linkedin_url_validity.py +209 -0
  23. dhisana/utils/clay_tools.py +43 -0
  24. dhisana/utils/clean_properties.py +135 -0
  25. dhisana/utils/company_utils.py +60 -0
  26. dhisana/utils/compose_salesnav_query.py +259 -0
  27. dhisana/utils/compose_search_query.py +759 -0
  28. dhisana/utils/compose_three_step_workflow.py +234 -0
  29. dhisana/utils/composite_tools.py +137 -0
  30. dhisana/utils/dataframe_tools.py +237 -0
  31. dhisana/utils/domain_parser.py +45 -0
  32. dhisana/utils/email_body_utils.py +72 -0
  33. dhisana/utils/email_parse_helpers.py +132 -0
  34. dhisana/utils/email_provider.py +375 -0
  35. dhisana/utils/enrich_lead_information.py +933 -0
  36. dhisana/utils/extract_email_content_for_llm.py +101 -0
  37. dhisana/utils/fetch_openai_config.py +129 -0
  38. dhisana/utils/field_validators.py +426 -0
  39. dhisana/utils/g2_tools.py +104 -0
  40. dhisana/utils/generate_content.py +41 -0
  41. dhisana/utils/generate_custom_message.py +271 -0
  42. dhisana/utils/generate_email.py +278 -0
  43. dhisana/utils/generate_email_response.py +465 -0
  44. dhisana/utils/generate_flow.py +102 -0
  45. dhisana/utils/generate_leads_salesnav.py +303 -0
  46. dhisana/utils/generate_linkedin_connect_message.py +224 -0
  47. dhisana/utils/generate_linkedin_response_message.py +317 -0
  48. dhisana/utils/generate_structured_output_internal.py +462 -0
  49. dhisana/utils/google_custom_search.py +267 -0
  50. dhisana/utils/google_oauth_tools.py +727 -0
  51. dhisana/utils/google_workspace_tools.py +1294 -0
  52. dhisana/utils/hubspot_clearbit.py +96 -0
  53. dhisana/utils/hubspot_crm_tools.py +2440 -0
  54. dhisana/utils/instantly_tools.py +149 -0
  55. dhisana/utils/linkedin_crawler.py +168 -0
  56. dhisana/utils/lusha_tools.py +333 -0
  57. dhisana/utils/mailgun_tools.py +156 -0
  58. dhisana/utils/mailreach_tools.py +123 -0
  59. dhisana/utils/microsoft365_tools.py +455 -0
  60. dhisana/utils/openai_assistant_and_file_utils.py +267 -0
  61. dhisana/utils/openai_helpers.py +977 -0
  62. dhisana/utils/openapi_spec_to_tools.py +45 -0
  63. dhisana/utils/openapi_tool/__init__.py +1 -0
  64. dhisana/utils/openapi_tool/api_models.py +633 -0
  65. dhisana/utils/openapi_tool/convert_openai_spec_to_tool.py +271 -0
  66. dhisana/utils/openapi_tool/openapi_tool.py +319 -0
  67. dhisana/utils/parse_linkedin_messages_txt.py +100 -0
  68. dhisana/utils/profile.py +37 -0
  69. dhisana/utils/proxy_curl_tools.py +1226 -0
  70. dhisana/utils/proxycurl_search_leads.py +426 -0
  71. dhisana/utils/python_function_to_tools.py +83 -0
  72. dhisana/utils/research_lead.py +176 -0
  73. dhisana/utils/sales_navigator_crawler.py +1103 -0
  74. dhisana/utils/salesforce_crm_tools.py +477 -0
  75. dhisana/utils/search_router.py +131 -0
  76. dhisana/utils/search_router_jobs.py +51 -0
  77. dhisana/utils/sendgrid_tools.py +162 -0
  78. dhisana/utils/serarch_router_local_business.py +75 -0
  79. dhisana/utils/serpapi_additional_tools.py +290 -0
  80. dhisana/utils/serpapi_google_jobs.py +117 -0
  81. dhisana/utils/serpapi_google_search.py +188 -0
  82. dhisana/utils/serpapi_local_business_search.py +129 -0
  83. dhisana/utils/serpapi_search_tools.py +852 -0
  84. dhisana/utils/serperdev_google_jobs.py +125 -0
  85. dhisana/utils/serperdev_local_business.py +154 -0
  86. dhisana/utils/serperdev_search.py +233 -0
  87. dhisana/utils/smtp_email_tools.py +582 -0
  88. dhisana/utils/test_connect.py +2087 -0
  89. dhisana/utils/trasform_json.py +173 -0
  90. dhisana/utils/web_download_parse_tools.py +189 -0
  91. dhisana/utils/workflow_code_model.py +5 -0
  92. dhisana/utils/zoominfo_tools.py +357 -0
  93. dhisana/workflow/__init__.py +1 -0
  94. dhisana/workflow/agent.py +18 -0
  95. dhisana/workflow/flow.py +44 -0
  96. dhisana/workflow/task.py +43 -0
  97. dhisana/workflow/test.py +90 -0
  98. dhisana-0.0.1.dev243.dist-info/METADATA +43 -0
  99. dhisana-0.0.1.dev243.dist-info/RECORD +102 -0
  100. dhisana-0.0.1.dev243.dist-info/WHEEL +5 -0
  101. dhisana-0.0.1.dev243.dist-info/entry_points.txt +2 -0
  102. dhisana-0.0.1.dev243.dist-info/top_level.txt +1 -0
@@ -0,0 +1,173 @@
1
+ import json
2
+ from typing import Any, Dict, List, Optional, Type
3
+
4
+ from pydantic import BaseModel
5
+
6
+ from dhisana.utils.assistant_tool_tag import assistant_tool
7
+ from dhisana.utils.generate_structured_output_internal import get_structured_output_internal
8
+
9
+ GLOBAL_GENERATED_PYTHON_CODE = {}
10
+ import logging
11
+ logger = logging.getLogger(__name__)
12
+
13
+ class GeneratedPythonCode(BaseModel):
14
+ python_code: str
15
+
16
+
17
+ @assistant_tool
18
+ async def transform_json_code(
19
+ input_json: str,
20
+ output_json: str,
21
+ function_name: str,
22
+ tool_config: Optional[List[Dict]] = None
23
+ ) -> str:
24
+ """
25
+ Use LLM to generate python code to transform JSON from format X to format Y.
26
+ Save that to a GLOBAL variable.
27
+
28
+ Args:
29
+ input_json (str): Example input JSON.
30
+ output_json (str): Example output JSON.
31
+ function_name (str): The generated python function should be saved as.
32
+ tool_config (Optional[List[Dict]]): Optional tool configuration.
33
+
34
+ Returns:
35
+ str: Function name that was saved to the global scope.
36
+ """
37
+ max_retries = 3
38
+ error_message = ""
39
+
40
+ for attempt in range(max_retries):
41
+ # Prepare the message
42
+ message = f"""
43
+ Given the following input and output JSON schemas, generate a Python function that transforms the input JSON to the output JSON.
44
+ Example Input JSON:
45
+ {input_json}
46
+ Example Output JSON:
47
+ {output_json}
48
+ Name the function as:
49
+ {function_name}
50
+ Check for NoneType in code before any concatenation and make sure errors do not happen like
51
+ "unsupported operand type(s) for +: 'NoneType' and 'str'".
52
+ Preserve the output type to be of the type in output JSON. Convert input field to string and
53
+ assign to output field if types don't match.
54
+ Return the function code in 'python_code'. Do not include any imports or explanations; only
55
+ provide the '{function_name}' code that takes 'input_json' as input and returns the transformed
56
+ 'output_json' as output.
57
+ """
58
+ if error_message:
59
+ message += f"\nThe previous attempt returned the following error:\n{error_message}\nPlease fix the function."
60
+
61
+ # Get structured output
62
+ generated_python_code, status = await get_structured_output_internal(message, GeneratedPythonCode, tool_config=tool_config)
63
+ if status == 'SUCCESS' and generated_python_code and generated_python_code.python_code:
64
+ function_string = generated_python_code.python_code
65
+ # Execute the generated function
66
+ try:
67
+ exec(function_string, globals())
68
+ # Test the function
69
+ input_data = json.loads(input_json)
70
+ output_data = globals()[function_name](input_data)
71
+ if output_data:
72
+ # Store the function code
73
+ GLOBAL_GENERATED_PYTHON_CODE[function_name] = globals()[function_name]
74
+ return function_name
75
+ else:
76
+ error_message = "The function did not produce the expected output."
77
+ except Exception as e:
78
+ error_message = str(e)
79
+ else:
80
+ error_message = "Failed to generate valid Python code."
81
+
82
+ if attempt == max_retries - 1:
83
+ raise RuntimeError(f"Error executing generated function after {max_retries} attempts: {error_message}")
84
+
85
+
86
+ @assistant_tool
87
+ async def transform_json_with_type(
88
+ input_json_str: str,
89
+ response_type: Type[BaseModel],
90
+ function_name: str,
91
+ tool_config: Optional[List[Dict]] = None
92
+ ):
93
+ """
94
+ Transforms the input JSON into the format specified by the given Pydantic response type.
95
+
96
+ Args:
97
+ input_json_str (str): The input JSON string to be transformed.
98
+ response_type (Type[BaseModel]): The Pydantic model defining the desired output format.
99
+ function_name (str): The name of the function to generate and execute.
100
+ tool_config (Optional[List[Dict]]): Optional tool configuration.
101
+
102
+ Returns:
103
+ The transformed JSON string matching the response_type format.
104
+ """
105
+ # Create a sample instance of the Pydantic model
106
+ sample_instance = response_type.construct()
107
+ # Convert the instance to JSON
108
+ response_type_json_str = sample_instance.json()
109
+ return await transform_json_code(input_json_str, response_type_json_str, function_name, tool_config=tool_config)
110
+
111
+
112
+ # ----------------------------------------------------
113
+ # Property Mapping (LLM-based)
114
+ # ----------------------------------------------------
115
+ class PropertyMapping(BaseModel):
116
+ input_property_name: str
117
+ mapped_property_name: str
118
+
119
+
120
+ class PropertyMappingList(BaseModel):
121
+ properties: List[PropertyMapping]
122
+
123
+
124
+ async def create_property_mapping(
125
+ sample_input: Dict[str, Any],
126
+ required_fields: List[str],
127
+ entity_type: str,
128
+ tool_config: Optional[List[Dict[str, Any]]] = None
129
+ ) -> Dict[str, str]:
130
+ """
131
+ Generate a property mapping from the input fields to the required fields for either a
132
+ Lead or an Account (Company). Calls an LLM to produce a JSON dictionary of field mappings.
133
+
134
+ :param sample_input: A sample dictionary from the input data.
135
+ :param required_fields: A list of fields we want to map to (e.g. ["organization_name", "first_name"]).
136
+ :param entity_type: "lead" or "account", used in the prompt to clarify context for the LLM.
137
+ :param tool_config: Optional LLM config.
138
+
139
+ :return: Dict of {"existingField": "requiredFieldName", ...}
140
+ """
141
+ # We'll only show the top-level of sample_input in the prompt for brevity
142
+ truncated_sample = {k: str(sample_input[k])[:128] for k in list(sample_input.keys())}
143
+
144
+ # Prepare a textual prompt for the LLM
145
+ user_prompt = f"""
146
+ The user has data representing a {entity_type} but the fields may not match the required format.
147
+ Required fields are: {required_fields}.
148
+ A sample of the input is: {json.dumps(truncated_sample, indent=2)}
149
+
150
+ Please output a JSON output mapping input_property_name to mapped_property_name.
151
+ You MUST map only one input property to one output property.
152
+ If a input property does not match any required field, you can skip mapping it. Map the best match.
153
+ DO NOT map the same input property to multiple output properties.
154
+ """
155
+
156
+ logger.info(f"Asking LLM to create property mapping for entity_type='{entity_type}'...")
157
+
158
+ response, status = await get_structured_output_internal(
159
+ prompt=user_prompt,
160
+ response_format=PropertyMappingList,
161
+ effort="high",
162
+ model="gpt-5.1-chat",
163
+ tool_config=tool_config
164
+ )
165
+ if status == "SUCCESS" and response and response.properties:
166
+ mapping = {}
167
+ for prop in response.properties:
168
+ mapping[prop.input_property_name] = prop.mapped_property_name
169
+ return mapping
170
+ else:
171
+ logger.warning("Could not generate property mapping from LLM. Returning empty mapping.")
172
+ return {}
173
+
@@ -0,0 +1,189 @@
1
+ # Tools to download and parse web content.
2
+ # Uses plyaWright to fetch HTML content from a URL, parse HTML content as text, and extract structured data from HTML content.
3
+
4
+ import csv
5
+ import logging
6
+ import os
7
+ from bs4 import BeautifulSoup
8
+ import html2text
9
+ from playwright.async_api import async_playwright
10
+ from dhisana.utils.assistant_tool_tag import assistant_tool
11
+ from urllib.parse import urlparse
12
+ import re
13
+ from datetime import datetime
14
+ from dhisana.utils.dataframe_tools import get_structured_output
15
+
16
+
17
+ @assistant_tool
18
+ def parse_html_content_as_text(html_content):
19
+ h = html2text.HTML2Text()
20
+ h.ignore_links = False # Keeps links in the markdown
21
+ h.ignore_images = True # Removes images
22
+ return h.handle(html_content)
23
+
24
+ @assistant_tool
25
+ async def standardize_url(url):
26
+ parsed_url = urlparse(url)
27
+ if not parsed_url.scheme:
28
+ url = "https://" + url
29
+ parsed_url = urlparse(url)
30
+ if parsed_url.hostname and parsed_url.hostname.count('.') == 1:
31
+ url = url.replace(parsed_url.hostname, "www." + parsed_url.hostname)
32
+ return url
33
+
34
+ @assistant_tool
35
+ async def fetch_html_content(url):
36
+ url = await standardize_url(url)
37
+ async with async_playwright() as playwright:
38
+ browser = await playwright.chromium.launch(headless=True)
39
+ context = await browser.new_context()
40
+ page = await context.new_page()
41
+ logging.info(f"Requesting {url}")
42
+ try:
43
+ await page.goto(url, timeout=10000)
44
+ return await page.content()
45
+ except Exception as e:
46
+ logging.info(f"Failed to fetch {url}: {e}")
47
+ return ""
48
+ finally:
49
+ await browser.close()
50
+
51
+ @assistant_tool
52
+ async def get_html_content_from_url(url):
53
+ html_content = await fetch_html_content(url)
54
+ return await clean_html_content(html_content)
55
+
56
+ @assistant_tool
57
+ async def get_text_content_from_url(url):
58
+ html_content = await fetch_html_content(url)
59
+ return await parse_text_content(html_content)
60
+
61
+ @assistant_tool
62
+ async def parse_text_content(html_content):
63
+ if not html_content:
64
+ return ""
65
+ soup = BeautifulSoup(html_content, 'html.parser')
66
+ for element in soup(['script', 'style', 'meta', 'code', 'svg']):
67
+ element.decompose()
68
+ return soup.get_text(separator=' ', strip=True)
69
+
70
+ @assistant_tool
71
+ async def clean_html_content(html_content):
72
+ if not html_content:
73
+ return ""
74
+ soup = BeautifulSoup(html_content, 'html.parser')
75
+ for element in soup(['script', 'style', 'meta', 'code', 'svg']):
76
+ element.decompose()
77
+ return str(soup)
78
+
79
+ @assistant_tool
80
+ async def process_files_in_folder_for_leads(folder_path: str, file_extension: str, response_list_type, response_item_type, output_file: str):
81
+ """
82
+ Process files in a folder, extract structured data, and write to a CSV file using properties from response_item_type.
83
+
84
+ Parameters:
85
+ - folder_path (str): The path to the folder containing files.
86
+ - file_extension (str): The file extension to filter files (e.g., '.html').
87
+ - response_list_type: The type of response expected from get_structured_output.
88
+ - response_item_type: The Pydantic model for each item in the response.
89
+ - output_file (str): The path where the output CSV file will be saved.
90
+
91
+ Returns:
92
+ - str: The file path of the generated CSV file.
93
+ """
94
+
95
+ # Ensure the parent directory of output_file exists
96
+ os.makedirs(os.path.dirname(output_file), exist_ok=True)
97
+
98
+ # Use the properties from response_item_type for headers
99
+ keys = list(response_item_type.__fields__.keys())
100
+
101
+ # Open the CSV file in write mode initially to write the header
102
+ with open(output_file, 'w', newline='') as csv_file:
103
+ dict_writer = csv.DictWriter(csv_file, fieldnames=keys)
104
+ dict_writer.writeheader()
105
+
106
+ # Process each file and append to the CSV file
107
+ for file_name in os.listdir(folder_path):
108
+ if file_name.endswith(file_extension):
109
+ file_path = os.path.join(folder_path, file_name)
110
+ with open(file_path, 'r') as file:
111
+ html_content = file.read()
112
+ parsed_content = parse_html_content_as_text(html_content)
113
+ prompt = "Extract structured content from input. Output is in JSON Format. DO NOT make up values. Use what is provided in input. \n\n Input: " + parsed_content
114
+ prompt = prompt[:1040000] # Limit prompt length to 1040000 characters
115
+ structured_data, result = await get_structured_output(parsed_content, response_list_type)
116
+ if result != 'SUCCESS':
117
+ logging.warning(f"Failed to extract structured data from {file_name}: {structured_data}")
118
+ continue
119
+ for item in structured_data.data:
120
+ # Append each item to the CSV file immediately
121
+ with open(output_file, 'a', newline='') as csv_file:
122
+ dict_writer = csv.DictWriter(csv_file, fieldnames=keys)
123
+ dict_writer.writerow(item.dict())
124
+
125
+ return output_file
126
+
127
+ @assistant_tool
128
+ async def process_files_in_folder_for_linkedin_urls(folder_path: str, file_extension: str):
129
+ """
130
+ Process files in a folder, extract LinkedIn URLs, and write to a CSV file.
131
+
132
+ Parameters:
133
+ - folder_path (str): The path to the folder containing files.
134
+ - file_extension (str): The file extension to filter files (e.g., '*.html').
135
+
136
+ Returns:
137
+ - str: The file path of the generated CSV file.
138
+ """
139
+ linkedin_urls = set()
140
+
141
+ for file_name in os.listdir(folder_path):
142
+ if file_name.endswith(file_extension):
143
+ file_path = os.path.join(folder_path, file_name)
144
+ with open(file_path, 'r') as file:
145
+ html_content = file.read()
146
+ soup = BeautifulSoup(html_content, 'html.parser')
147
+ for link in soup.find_all('a', href=True):
148
+ url = link['href']
149
+ if re.match(r'^https://www\.linkedin\.com/in/[^?]+', url):
150
+ linkedin_urls.add(url.split('?')[0]) # Remove query parameters
151
+
152
+ # Write the LinkedIn URLs to a CSV file
153
+ csv_file_path = os.path.join(folder_path, 'linkedin_urls.csv')
154
+ with open(csv_file_path, 'w', newline='') as csv_file:
155
+ dict_writer = csv.DictWriter(csv_file, fieldnames=['id', 'linkedin_url'])
156
+ dict_writer.writeheader()
157
+ for url in linkedin_urls:
158
+ unique_id = datetime.now().strftime('%Y%m%d%H%M%S%f')
159
+ dict_writer.writerow({'id': unique_id, 'linkedin_url': url})
160
+
161
+ return csv_file_path
162
+
163
+ @assistant_tool
164
+ async def get_lead_urls_from_sales_nav_search_results(folder_path: str, file_extension: str, output_file_path: str):
165
+ linkedin_urls = set()
166
+
167
+ for file_name in os.listdir(folder_path):
168
+ if file_name.endswith(file_extension):
169
+ file_path = os.path.join(folder_path, file_name)
170
+ with open(file_path, 'r') as file:
171
+ html_content = file.read()
172
+ soup = BeautifulSoup(html_content, 'html.parser')
173
+ for link in soup.find_all('a', href=True):
174
+ url = link['href']
175
+ match = re.match(r'^/sales/lead/([^,]+),', url)
176
+ if match:
177
+ lead_id = match.group(1)
178
+ linkedin_urls.add(lead_id)
179
+
180
+ # Write the LinkedIn URLs to a CSV file at the output_file_path
181
+ with open(output_file_path, 'w', newline='') as csv_file:
182
+ dict_writer = csv.DictWriter(csv_file, fieldnames=['id', 'linkedin_url'])
183
+ dict_writer.writeheader()
184
+ for lead_id in sorted(linkedin_urls):
185
+ linkedin_url = f'https://www.linkedin.com/in/{lead_id}'
186
+ dict_writer.writerow({'id': lead_id, 'linkedin_url': linkedin_url})
187
+
188
+ return output_file_path
189
+
@@ -0,0 +1,5 @@
1
+ from pydantic import BaseModel
2
+
3
+
4
+ class WorkflowPythonCode(BaseModel):
5
+ workflow_python_code: str