dhisana 0.0.1.dev243__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dhisana/__init__.py +1 -0
- dhisana/cli/__init__.py +1 -0
- dhisana/cli/cli.py +20 -0
- dhisana/cli/datasets.py +27 -0
- dhisana/cli/models.py +26 -0
- dhisana/cli/predictions.py +20 -0
- dhisana/schemas/__init__.py +1 -0
- dhisana/schemas/common.py +399 -0
- dhisana/schemas/sales.py +965 -0
- dhisana/ui/__init__.py +1 -0
- dhisana/ui/components.py +472 -0
- dhisana/utils/__init__.py +1 -0
- dhisana/utils/add_mapping.py +352 -0
- dhisana/utils/agent_tools.py +51 -0
- dhisana/utils/apollo_tools.py +1597 -0
- dhisana/utils/assistant_tool_tag.py +4 -0
- dhisana/utils/built_with_api_tools.py +282 -0
- dhisana/utils/cache_output_tools.py +98 -0
- dhisana/utils/cache_output_tools_local.py +78 -0
- dhisana/utils/check_email_validity_tools.py +717 -0
- dhisana/utils/check_for_intent_signal.py +107 -0
- dhisana/utils/check_linkedin_url_validity.py +209 -0
- dhisana/utils/clay_tools.py +43 -0
- dhisana/utils/clean_properties.py +135 -0
- dhisana/utils/company_utils.py +60 -0
- dhisana/utils/compose_salesnav_query.py +259 -0
- dhisana/utils/compose_search_query.py +759 -0
- dhisana/utils/compose_three_step_workflow.py +234 -0
- dhisana/utils/composite_tools.py +137 -0
- dhisana/utils/dataframe_tools.py +237 -0
- dhisana/utils/domain_parser.py +45 -0
- dhisana/utils/email_body_utils.py +72 -0
- dhisana/utils/email_parse_helpers.py +132 -0
- dhisana/utils/email_provider.py +375 -0
- dhisana/utils/enrich_lead_information.py +933 -0
- dhisana/utils/extract_email_content_for_llm.py +101 -0
- dhisana/utils/fetch_openai_config.py +129 -0
- dhisana/utils/field_validators.py +426 -0
- dhisana/utils/g2_tools.py +104 -0
- dhisana/utils/generate_content.py +41 -0
- dhisana/utils/generate_custom_message.py +271 -0
- dhisana/utils/generate_email.py +278 -0
- dhisana/utils/generate_email_response.py +465 -0
- dhisana/utils/generate_flow.py +102 -0
- dhisana/utils/generate_leads_salesnav.py +303 -0
- dhisana/utils/generate_linkedin_connect_message.py +224 -0
- dhisana/utils/generate_linkedin_response_message.py +317 -0
- dhisana/utils/generate_structured_output_internal.py +462 -0
- dhisana/utils/google_custom_search.py +267 -0
- dhisana/utils/google_oauth_tools.py +727 -0
- dhisana/utils/google_workspace_tools.py +1294 -0
- dhisana/utils/hubspot_clearbit.py +96 -0
- dhisana/utils/hubspot_crm_tools.py +2440 -0
- dhisana/utils/instantly_tools.py +149 -0
- dhisana/utils/linkedin_crawler.py +168 -0
- dhisana/utils/lusha_tools.py +333 -0
- dhisana/utils/mailgun_tools.py +156 -0
- dhisana/utils/mailreach_tools.py +123 -0
- dhisana/utils/microsoft365_tools.py +455 -0
- dhisana/utils/openai_assistant_and_file_utils.py +267 -0
- dhisana/utils/openai_helpers.py +977 -0
- dhisana/utils/openapi_spec_to_tools.py +45 -0
- dhisana/utils/openapi_tool/__init__.py +1 -0
- dhisana/utils/openapi_tool/api_models.py +633 -0
- dhisana/utils/openapi_tool/convert_openai_spec_to_tool.py +271 -0
- dhisana/utils/openapi_tool/openapi_tool.py +319 -0
- dhisana/utils/parse_linkedin_messages_txt.py +100 -0
- dhisana/utils/profile.py +37 -0
- dhisana/utils/proxy_curl_tools.py +1226 -0
- dhisana/utils/proxycurl_search_leads.py +426 -0
- dhisana/utils/python_function_to_tools.py +83 -0
- dhisana/utils/research_lead.py +176 -0
- dhisana/utils/sales_navigator_crawler.py +1103 -0
- dhisana/utils/salesforce_crm_tools.py +477 -0
- dhisana/utils/search_router.py +131 -0
- dhisana/utils/search_router_jobs.py +51 -0
- dhisana/utils/sendgrid_tools.py +162 -0
- dhisana/utils/serarch_router_local_business.py +75 -0
- dhisana/utils/serpapi_additional_tools.py +290 -0
- dhisana/utils/serpapi_google_jobs.py +117 -0
- dhisana/utils/serpapi_google_search.py +188 -0
- dhisana/utils/serpapi_local_business_search.py +129 -0
- dhisana/utils/serpapi_search_tools.py +852 -0
- dhisana/utils/serperdev_google_jobs.py +125 -0
- dhisana/utils/serperdev_local_business.py +154 -0
- dhisana/utils/serperdev_search.py +233 -0
- dhisana/utils/smtp_email_tools.py +582 -0
- dhisana/utils/test_connect.py +2087 -0
- dhisana/utils/trasform_json.py +173 -0
- dhisana/utils/web_download_parse_tools.py +189 -0
- dhisana/utils/workflow_code_model.py +5 -0
- dhisana/utils/zoominfo_tools.py +357 -0
- dhisana/workflow/__init__.py +1 -0
- dhisana/workflow/agent.py +18 -0
- dhisana/workflow/flow.py +44 -0
- dhisana/workflow/task.py +43 -0
- dhisana/workflow/test.py +90 -0
- dhisana-0.0.1.dev243.dist-info/METADATA +43 -0
- dhisana-0.0.1.dev243.dist-info/RECORD +102 -0
- dhisana-0.0.1.dev243.dist-info/WHEEL +5 -0
- dhisana-0.0.1.dev243.dist-info/entry_points.txt +2 -0
- dhisana-0.0.1.dev243.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Any, Dict, List, Optional, Type
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
|
|
6
|
+
from dhisana.utils.assistant_tool_tag import assistant_tool
|
|
7
|
+
from dhisana.utils.generate_structured_output_internal import get_structured_output_internal
|
|
8
|
+
|
|
9
|
+
GLOBAL_GENERATED_PYTHON_CODE = {}
|
|
10
|
+
import logging
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
class GeneratedPythonCode(BaseModel):
|
|
14
|
+
python_code: str
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@assistant_tool
|
|
18
|
+
async def transform_json_code(
|
|
19
|
+
input_json: str,
|
|
20
|
+
output_json: str,
|
|
21
|
+
function_name: str,
|
|
22
|
+
tool_config: Optional[List[Dict]] = None
|
|
23
|
+
) -> str:
|
|
24
|
+
"""
|
|
25
|
+
Use LLM to generate python code to transform JSON from format X to format Y.
|
|
26
|
+
Save that to a GLOBAL variable.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
input_json (str): Example input JSON.
|
|
30
|
+
output_json (str): Example output JSON.
|
|
31
|
+
function_name (str): The generated python function should be saved as.
|
|
32
|
+
tool_config (Optional[List[Dict]]): Optional tool configuration.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
str: Function name that was saved to the global scope.
|
|
36
|
+
"""
|
|
37
|
+
max_retries = 3
|
|
38
|
+
error_message = ""
|
|
39
|
+
|
|
40
|
+
for attempt in range(max_retries):
|
|
41
|
+
# Prepare the message
|
|
42
|
+
message = f"""
|
|
43
|
+
Given the following input and output JSON schemas, generate a Python function that transforms the input JSON to the output JSON.
|
|
44
|
+
Example Input JSON:
|
|
45
|
+
{input_json}
|
|
46
|
+
Example Output JSON:
|
|
47
|
+
{output_json}
|
|
48
|
+
Name the function as:
|
|
49
|
+
{function_name}
|
|
50
|
+
Check for NoneType in code before any concatenation and make sure errors do not happen like
|
|
51
|
+
"unsupported operand type(s) for +: 'NoneType' and 'str'".
|
|
52
|
+
Preserve the output type to be of the type in output JSON. Convert input field to string and
|
|
53
|
+
assign to output field if types don't match.
|
|
54
|
+
Return the function code in 'python_code'. Do not include any imports or explanations; only
|
|
55
|
+
provide the '{function_name}' code that takes 'input_json' as input and returns the transformed
|
|
56
|
+
'output_json' as output.
|
|
57
|
+
"""
|
|
58
|
+
if error_message:
|
|
59
|
+
message += f"\nThe previous attempt returned the following error:\n{error_message}\nPlease fix the function."
|
|
60
|
+
|
|
61
|
+
# Get structured output
|
|
62
|
+
generated_python_code, status = await get_structured_output_internal(message, GeneratedPythonCode, tool_config=tool_config)
|
|
63
|
+
if status == 'SUCCESS' and generated_python_code and generated_python_code.python_code:
|
|
64
|
+
function_string = generated_python_code.python_code
|
|
65
|
+
# Execute the generated function
|
|
66
|
+
try:
|
|
67
|
+
exec(function_string, globals())
|
|
68
|
+
# Test the function
|
|
69
|
+
input_data = json.loads(input_json)
|
|
70
|
+
output_data = globals()[function_name](input_data)
|
|
71
|
+
if output_data:
|
|
72
|
+
# Store the function code
|
|
73
|
+
GLOBAL_GENERATED_PYTHON_CODE[function_name] = globals()[function_name]
|
|
74
|
+
return function_name
|
|
75
|
+
else:
|
|
76
|
+
error_message = "The function did not produce the expected output."
|
|
77
|
+
except Exception as e:
|
|
78
|
+
error_message = str(e)
|
|
79
|
+
else:
|
|
80
|
+
error_message = "Failed to generate valid Python code."
|
|
81
|
+
|
|
82
|
+
if attempt == max_retries - 1:
|
|
83
|
+
raise RuntimeError(f"Error executing generated function after {max_retries} attempts: {error_message}")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@assistant_tool
|
|
87
|
+
async def transform_json_with_type(
|
|
88
|
+
input_json_str: str,
|
|
89
|
+
response_type: Type[BaseModel],
|
|
90
|
+
function_name: str,
|
|
91
|
+
tool_config: Optional[List[Dict]] = None
|
|
92
|
+
):
|
|
93
|
+
"""
|
|
94
|
+
Transforms the input JSON into the format specified by the given Pydantic response type.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
input_json_str (str): The input JSON string to be transformed.
|
|
98
|
+
response_type (Type[BaseModel]): The Pydantic model defining the desired output format.
|
|
99
|
+
function_name (str): The name of the function to generate and execute.
|
|
100
|
+
tool_config (Optional[List[Dict]]): Optional tool configuration.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
The transformed JSON string matching the response_type format.
|
|
104
|
+
"""
|
|
105
|
+
# Create a sample instance of the Pydantic model
|
|
106
|
+
sample_instance = response_type.construct()
|
|
107
|
+
# Convert the instance to JSON
|
|
108
|
+
response_type_json_str = sample_instance.json()
|
|
109
|
+
return await transform_json_code(input_json_str, response_type_json_str, function_name, tool_config=tool_config)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
# ----------------------------------------------------
|
|
113
|
+
# Property Mapping (LLM-based)
|
|
114
|
+
# ----------------------------------------------------
|
|
115
|
+
class PropertyMapping(BaseModel):
|
|
116
|
+
input_property_name: str
|
|
117
|
+
mapped_property_name: str
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class PropertyMappingList(BaseModel):
|
|
121
|
+
properties: List[PropertyMapping]
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
async def create_property_mapping(
|
|
125
|
+
sample_input: Dict[str, Any],
|
|
126
|
+
required_fields: List[str],
|
|
127
|
+
entity_type: str,
|
|
128
|
+
tool_config: Optional[List[Dict[str, Any]]] = None
|
|
129
|
+
) -> Dict[str, str]:
|
|
130
|
+
"""
|
|
131
|
+
Generate a property mapping from the input fields to the required fields for either a
|
|
132
|
+
Lead or an Account (Company). Calls an LLM to produce a JSON dictionary of field mappings.
|
|
133
|
+
|
|
134
|
+
:param sample_input: A sample dictionary from the input data.
|
|
135
|
+
:param required_fields: A list of fields we want to map to (e.g. ["organization_name", "first_name"]).
|
|
136
|
+
:param entity_type: "lead" or "account", used in the prompt to clarify context for the LLM.
|
|
137
|
+
:param tool_config: Optional LLM config.
|
|
138
|
+
|
|
139
|
+
:return: Dict of {"existingField": "requiredFieldName", ...}
|
|
140
|
+
"""
|
|
141
|
+
# We'll only show the top-level of sample_input in the prompt for brevity
|
|
142
|
+
truncated_sample = {k: str(sample_input[k])[:128] for k in list(sample_input.keys())}
|
|
143
|
+
|
|
144
|
+
# Prepare a textual prompt for the LLM
|
|
145
|
+
user_prompt = f"""
|
|
146
|
+
The user has data representing a {entity_type} but the fields may not match the required format.
|
|
147
|
+
Required fields are: {required_fields}.
|
|
148
|
+
A sample of the input is: {json.dumps(truncated_sample, indent=2)}
|
|
149
|
+
|
|
150
|
+
Please output a JSON output mapping input_property_name to mapped_property_name.
|
|
151
|
+
You MUST map only one input property to one output property.
|
|
152
|
+
If a input property does not match any required field, you can skip mapping it. Map the best match.
|
|
153
|
+
DO NOT map the same input property to multiple output properties.
|
|
154
|
+
"""
|
|
155
|
+
|
|
156
|
+
logger.info(f"Asking LLM to create property mapping for entity_type='{entity_type}'...")
|
|
157
|
+
|
|
158
|
+
response, status = await get_structured_output_internal(
|
|
159
|
+
prompt=user_prompt,
|
|
160
|
+
response_format=PropertyMappingList,
|
|
161
|
+
effort="high",
|
|
162
|
+
model="gpt-5.1-chat",
|
|
163
|
+
tool_config=tool_config
|
|
164
|
+
)
|
|
165
|
+
if status == "SUCCESS" and response and response.properties:
|
|
166
|
+
mapping = {}
|
|
167
|
+
for prop in response.properties:
|
|
168
|
+
mapping[prop.input_property_name] = prop.mapped_property_name
|
|
169
|
+
return mapping
|
|
170
|
+
else:
|
|
171
|
+
logger.warning("Could not generate property mapping from LLM. Returning empty mapping.")
|
|
172
|
+
return {}
|
|
173
|
+
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
# Tools to download and parse web content.
|
|
2
|
+
# Uses plyaWright to fetch HTML content from a URL, parse HTML content as text, and extract structured data from HTML content.
|
|
3
|
+
|
|
4
|
+
import csv
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
from bs4 import BeautifulSoup
|
|
8
|
+
import html2text
|
|
9
|
+
from playwright.async_api import async_playwright
|
|
10
|
+
from dhisana.utils.assistant_tool_tag import assistant_tool
|
|
11
|
+
from urllib.parse import urlparse
|
|
12
|
+
import re
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from dhisana.utils.dataframe_tools import get_structured_output
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@assistant_tool
|
|
18
|
+
def parse_html_content_as_text(html_content):
|
|
19
|
+
h = html2text.HTML2Text()
|
|
20
|
+
h.ignore_links = False # Keeps links in the markdown
|
|
21
|
+
h.ignore_images = True # Removes images
|
|
22
|
+
return h.handle(html_content)
|
|
23
|
+
|
|
24
|
+
@assistant_tool
|
|
25
|
+
async def standardize_url(url):
|
|
26
|
+
parsed_url = urlparse(url)
|
|
27
|
+
if not parsed_url.scheme:
|
|
28
|
+
url = "https://" + url
|
|
29
|
+
parsed_url = urlparse(url)
|
|
30
|
+
if parsed_url.hostname and parsed_url.hostname.count('.') == 1:
|
|
31
|
+
url = url.replace(parsed_url.hostname, "www." + parsed_url.hostname)
|
|
32
|
+
return url
|
|
33
|
+
|
|
34
|
+
@assistant_tool
|
|
35
|
+
async def fetch_html_content(url):
|
|
36
|
+
url = await standardize_url(url)
|
|
37
|
+
async with async_playwright() as playwright:
|
|
38
|
+
browser = await playwright.chromium.launch(headless=True)
|
|
39
|
+
context = await browser.new_context()
|
|
40
|
+
page = await context.new_page()
|
|
41
|
+
logging.info(f"Requesting {url}")
|
|
42
|
+
try:
|
|
43
|
+
await page.goto(url, timeout=10000)
|
|
44
|
+
return await page.content()
|
|
45
|
+
except Exception as e:
|
|
46
|
+
logging.info(f"Failed to fetch {url}: {e}")
|
|
47
|
+
return ""
|
|
48
|
+
finally:
|
|
49
|
+
await browser.close()
|
|
50
|
+
|
|
51
|
+
@assistant_tool
|
|
52
|
+
async def get_html_content_from_url(url):
|
|
53
|
+
html_content = await fetch_html_content(url)
|
|
54
|
+
return await clean_html_content(html_content)
|
|
55
|
+
|
|
56
|
+
@assistant_tool
|
|
57
|
+
async def get_text_content_from_url(url):
|
|
58
|
+
html_content = await fetch_html_content(url)
|
|
59
|
+
return await parse_text_content(html_content)
|
|
60
|
+
|
|
61
|
+
@assistant_tool
|
|
62
|
+
async def parse_text_content(html_content):
|
|
63
|
+
if not html_content:
|
|
64
|
+
return ""
|
|
65
|
+
soup = BeautifulSoup(html_content, 'html.parser')
|
|
66
|
+
for element in soup(['script', 'style', 'meta', 'code', 'svg']):
|
|
67
|
+
element.decompose()
|
|
68
|
+
return soup.get_text(separator=' ', strip=True)
|
|
69
|
+
|
|
70
|
+
@assistant_tool
|
|
71
|
+
async def clean_html_content(html_content):
|
|
72
|
+
if not html_content:
|
|
73
|
+
return ""
|
|
74
|
+
soup = BeautifulSoup(html_content, 'html.parser')
|
|
75
|
+
for element in soup(['script', 'style', 'meta', 'code', 'svg']):
|
|
76
|
+
element.decompose()
|
|
77
|
+
return str(soup)
|
|
78
|
+
|
|
79
|
+
@assistant_tool
|
|
80
|
+
async def process_files_in_folder_for_leads(folder_path: str, file_extension: str, response_list_type, response_item_type, output_file: str):
|
|
81
|
+
"""
|
|
82
|
+
Process files in a folder, extract structured data, and write to a CSV file using properties from response_item_type.
|
|
83
|
+
|
|
84
|
+
Parameters:
|
|
85
|
+
- folder_path (str): The path to the folder containing files.
|
|
86
|
+
- file_extension (str): The file extension to filter files (e.g., '.html').
|
|
87
|
+
- response_list_type: The type of response expected from get_structured_output.
|
|
88
|
+
- response_item_type: The Pydantic model for each item in the response.
|
|
89
|
+
- output_file (str): The path where the output CSV file will be saved.
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
- str: The file path of the generated CSV file.
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
# Ensure the parent directory of output_file exists
|
|
96
|
+
os.makedirs(os.path.dirname(output_file), exist_ok=True)
|
|
97
|
+
|
|
98
|
+
# Use the properties from response_item_type for headers
|
|
99
|
+
keys = list(response_item_type.__fields__.keys())
|
|
100
|
+
|
|
101
|
+
# Open the CSV file in write mode initially to write the header
|
|
102
|
+
with open(output_file, 'w', newline='') as csv_file:
|
|
103
|
+
dict_writer = csv.DictWriter(csv_file, fieldnames=keys)
|
|
104
|
+
dict_writer.writeheader()
|
|
105
|
+
|
|
106
|
+
# Process each file and append to the CSV file
|
|
107
|
+
for file_name in os.listdir(folder_path):
|
|
108
|
+
if file_name.endswith(file_extension):
|
|
109
|
+
file_path = os.path.join(folder_path, file_name)
|
|
110
|
+
with open(file_path, 'r') as file:
|
|
111
|
+
html_content = file.read()
|
|
112
|
+
parsed_content = parse_html_content_as_text(html_content)
|
|
113
|
+
prompt = "Extract structured content from input. Output is in JSON Format. DO NOT make up values. Use what is provided in input. \n\n Input: " + parsed_content
|
|
114
|
+
prompt = prompt[:1040000] # Limit prompt length to 1040000 characters
|
|
115
|
+
structured_data, result = await get_structured_output(parsed_content, response_list_type)
|
|
116
|
+
if result != 'SUCCESS':
|
|
117
|
+
logging.warning(f"Failed to extract structured data from {file_name}: {structured_data}")
|
|
118
|
+
continue
|
|
119
|
+
for item in structured_data.data:
|
|
120
|
+
# Append each item to the CSV file immediately
|
|
121
|
+
with open(output_file, 'a', newline='') as csv_file:
|
|
122
|
+
dict_writer = csv.DictWriter(csv_file, fieldnames=keys)
|
|
123
|
+
dict_writer.writerow(item.dict())
|
|
124
|
+
|
|
125
|
+
return output_file
|
|
126
|
+
|
|
127
|
+
@assistant_tool
|
|
128
|
+
async def process_files_in_folder_for_linkedin_urls(folder_path: str, file_extension: str):
|
|
129
|
+
"""
|
|
130
|
+
Process files in a folder, extract LinkedIn URLs, and write to a CSV file.
|
|
131
|
+
|
|
132
|
+
Parameters:
|
|
133
|
+
- folder_path (str): The path to the folder containing files.
|
|
134
|
+
- file_extension (str): The file extension to filter files (e.g., '*.html').
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
- str: The file path of the generated CSV file.
|
|
138
|
+
"""
|
|
139
|
+
linkedin_urls = set()
|
|
140
|
+
|
|
141
|
+
for file_name in os.listdir(folder_path):
|
|
142
|
+
if file_name.endswith(file_extension):
|
|
143
|
+
file_path = os.path.join(folder_path, file_name)
|
|
144
|
+
with open(file_path, 'r') as file:
|
|
145
|
+
html_content = file.read()
|
|
146
|
+
soup = BeautifulSoup(html_content, 'html.parser')
|
|
147
|
+
for link in soup.find_all('a', href=True):
|
|
148
|
+
url = link['href']
|
|
149
|
+
if re.match(r'^https://www\.linkedin\.com/in/[^?]+', url):
|
|
150
|
+
linkedin_urls.add(url.split('?')[0]) # Remove query parameters
|
|
151
|
+
|
|
152
|
+
# Write the LinkedIn URLs to a CSV file
|
|
153
|
+
csv_file_path = os.path.join(folder_path, 'linkedin_urls.csv')
|
|
154
|
+
with open(csv_file_path, 'w', newline='') as csv_file:
|
|
155
|
+
dict_writer = csv.DictWriter(csv_file, fieldnames=['id', 'linkedin_url'])
|
|
156
|
+
dict_writer.writeheader()
|
|
157
|
+
for url in linkedin_urls:
|
|
158
|
+
unique_id = datetime.now().strftime('%Y%m%d%H%M%S%f')
|
|
159
|
+
dict_writer.writerow({'id': unique_id, 'linkedin_url': url})
|
|
160
|
+
|
|
161
|
+
return csv_file_path
|
|
162
|
+
|
|
163
|
+
@assistant_tool
|
|
164
|
+
async def get_lead_urls_from_sales_nav_search_results(folder_path: str, file_extension: str, output_file_path: str):
|
|
165
|
+
linkedin_urls = set()
|
|
166
|
+
|
|
167
|
+
for file_name in os.listdir(folder_path):
|
|
168
|
+
if file_name.endswith(file_extension):
|
|
169
|
+
file_path = os.path.join(folder_path, file_name)
|
|
170
|
+
with open(file_path, 'r') as file:
|
|
171
|
+
html_content = file.read()
|
|
172
|
+
soup = BeautifulSoup(html_content, 'html.parser')
|
|
173
|
+
for link in soup.find_all('a', href=True):
|
|
174
|
+
url = link['href']
|
|
175
|
+
match = re.match(r'^/sales/lead/([^,]+),', url)
|
|
176
|
+
if match:
|
|
177
|
+
lead_id = match.group(1)
|
|
178
|
+
linkedin_urls.add(lead_id)
|
|
179
|
+
|
|
180
|
+
# Write the LinkedIn URLs to a CSV file at the output_file_path
|
|
181
|
+
with open(output_file_path, 'w', newline='') as csv_file:
|
|
182
|
+
dict_writer = csv.DictWriter(csv_file, fieldnames=['id', 'linkedin_url'])
|
|
183
|
+
dict_writer.writeheader()
|
|
184
|
+
for lead_id in sorted(linkedin_urls):
|
|
185
|
+
linkedin_url = f'https://www.linkedin.com/in/{lead_id}'
|
|
186
|
+
dict_writer.writerow({'id': lead_id, 'linkedin_url': linkedin_url})
|
|
187
|
+
|
|
188
|
+
return output_file_path
|
|
189
|
+
|