dhisana 0.0.1.dev243__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. dhisana/__init__.py +1 -0
  2. dhisana/cli/__init__.py +1 -0
  3. dhisana/cli/cli.py +20 -0
  4. dhisana/cli/datasets.py +27 -0
  5. dhisana/cli/models.py +26 -0
  6. dhisana/cli/predictions.py +20 -0
  7. dhisana/schemas/__init__.py +1 -0
  8. dhisana/schemas/common.py +399 -0
  9. dhisana/schemas/sales.py +965 -0
  10. dhisana/ui/__init__.py +1 -0
  11. dhisana/ui/components.py +472 -0
  12. dhisana/utils/__init__.py +1 -0
  13. dhisana/utils/add_mapping.py +352 -0
  14. dhisana/utils/agent_tools.py +51 -0
  15. dhisana/utils/apollo_tools.py +1597 -0
  16. dhisana/utils/assistant_tool_tag.py +4 -0
  17. dhisana/utils/built_with_api_tools.py +282 -0
  18. dhisana/utils/cache_output_tools.py +98 -0
  19. dhisana/utils/cache_output_tools_local.py +78 -0
  20. dhisana/utils/check_email_validity_tools.py +717 -0
  21. dhisana/utils/check_for_intent_signal.py +107 -0
  22. dhisana/utils/check_linkedin_url_validity.py +209 -0
  23. dhisana/utils/clay_tools.py +43 -0
  24. dhisana/utils/clean_properties.py +135 -0
  25. dhisana/utils/company_utils.py +60 -0
  26. dhisana/utils/compose_salesnav_query.py +259 -0
  27. dhisana/utils/compose_search_query.py +759 -0
  28. dhisana/utils/compose_three_step_workflow.py +234 -0
  29. dhisana/utils/composite_tools.py +137 -0
  30. dhisana/utils/dataframe_tools.py +237 -0
  31. dhisana/utils/domain_parser.py +45 -0
  32. dhisana/utils/email_body_utils.py +72 -0
  33. dhisana/utils/email_parse_helpers.py +132 -0
  34. dhisana/utils/email_provider.py +375 -0
  35. dhisana/utils/enrich_lead_information.py +933 -0
  36. dhisana/utils/extract_email_content_for_llm.py +101 -0
  37. dhisana/utils/fetch_openai_config.py +129 -0
  38. dhisana/utils/field_validators.py +426 -0
  39. dhisana/utils/g2_tools.py +104 -0
  40. dhisana/utils/generate_content.py +41 -0
  41. dhisana/utils/generate_custom_message.py +271 -0
  42. dhisana/utils/generate_email.py +278 -0
  43. dhisana/utils/generate_email_response.py +465 -0
  44. dhisana/utils/generate_flow.py +102 -0
  45. dhisana/utils/generate_leads_salesnav.py +303 -0
  46. dhisana/utils/generate_linkedin_connect_message.py +224 -0
  47. dhisana/utils/generate_linkedin_response_message.py +317 -0
  48. dhisana/utils/generate_structured_output_internal.py +462 -0
  49. dhisana/utils/google_custom_search.py +267 -0
  50. dhisana/utils/google_oauth_tools.py +727 -0
  51. dhisana/utils/google_workspace_tools.py +1294 -0
  52. dhisana/utils/hubspot_clearbit.py +96 -0
  53. dhisana/utils/hubspot_crm_tools.py +2440 -0
  54. dhisana/utils/instantly_tools.py +149 -0
  55. dhisana/utils/linkedin_crawler.py +168 -0
  56. dhisana/utils/lusha_tools.py +333 -0
  57. dhisana/utils/mailgun_tools.py +156 -0
  58. dhisana/utils/mailreach_tools.py +123 -0
  59. dhisana/utils/microsoft365_tools.py +455 -0
  60. dhisana/utils/openai_assistant_and_file_utils.py +267 -0
  61. dhisana/utils/openai_helpers.py +977 -0
  62. dhisana/utils/openapi_spec_to_tools.py +45 -0
  63. dhisana/utils/openapi_tool/__init__.py +1 -0
  64. dhisana/utils/openapi_tool/api_models.py +633 -0
  65. dhisana/utils/openapi_tool/convert_openai_spec_to_tool.py +271 -0
  66. dhisana/utils/openapi_tool/openapi_tool.py +319 -0
  67. dhisana/utils/parse_linkedin_messages_txt.py +100 -0
  68. dhisana/utils/profile.py +37 -0
  69. dhisana/utils/proxy_curl_tools.py +1226 -0
  70. dhisana/utils/proxycurl_search_leads.py +426 -0
  71. dhisana/utils/python_function_to_tools.py +83 -0
  72. dhisana/utils/research_lead.py +176 -0
  73. dhisana/utils/sales_navigator_crawler.py +1103 -0
  74. dhisana/utils/salesforce_crm_tools.py +477 -0
  75. dhisana/utils/search_router.py +131 -0
  76. dhisana/utils/search_router_jobs.py +51 -0
  77. dhisana/utils/sendgrid_tools.py +162 -0
  78. dhisana/utils/serarch_router_local_business.py +75 -0
  79. dhisana/utils/serpapi_additional_tools.py +290 -0
  80. dhisana/utils/serpapi_google_jobs.py +117 -0
  81. dhisana/utils/serpapi_google_search.py +188 -0
  82. dhisana/utils/serpapi_local_business_search.py +129 -0
  83. dhisana/utils/serpapi_search_tools.py +852 -0
  84. dhisana/utils/serperdev_google_jobs.py +125 -0
  85. dhisana/utils/serperdev_local_business.py +154 -0
  86. dhisana/utils/serperdev_search.py +233 -0
  87. dhisana/utils/smtp_email_tools.py +582 -0
  88. dhisana/utils/test_connect.py +2087 -0
  89. dhisana/utils/trasform_json.py +173 -0
  90. dhisana/utils/web_download_parse_tools.py +189 -0
  91. dhisana/utils/workflow_code_model.py +5 -0
  92. dhisana/utils/zoominfo_tools.py +357 -0
  93. dhisana/workflow/__init__.py +1 -0
  94. dhisana/workflow/agent.py +18 -0
  95. dhisana/workflow/flow.py +44 -0
  96. dhisana/workflow/task.py +43 -0
  97. dhisana/workflow/test.py +90 -0
  98. dhisana-0.0.1.dev243.dist-info/METADATA +43 -0
  99. dhisana-0.0.1.dev243.dist-info/RECORD +102 -0
  100. dhisana-0.0.1.dev243.dist-info/WHEEL +5 -0
  101. dhisana-0.0.1.dev243.dist-info/entry_points.txt +2 -0
  102. dhisana-0.0.1.dev243.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1103 @@
1
+ # Implement the LinkedIn Sales Navigator Client Agent
2
+ # Looks for tasks from service like navigating to a URL, sending connection requests, sending messages, etc.
3
+ # Executes the tasks and sends the results back to the service.
4
+
5
+ import asyncio
6
+ from datetime import datetime
7
+ import json
8
+ import os
9
+ import logging
10
+ import re
11
+ from typing import List, Dict, Any
12
+ import html2text
13
+ from playwright.async_api import async_playwright, Page
14
+ import requests # or aiohttp if you prefer async calls
15
+
16
+ import asyncio
17
+ import logging
18
+ import pyperclip
19
+
20
+ from playwright.async_api import Page
21
+ from bs4 import BeautifulSoup
22
+
23
+
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+ # Configure logging
28
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
29
+ logger = logging.getLogger(__name__)
30
+
31
+ # Global references
32
+ playwright_client = None
33
+ browser = None
34
+ context = None
35
+ page = None
36
+
37
+ SERVICE_URL = os.environ.get("AGENT_SERVICE_URL", "")
38
+ AGENT_ID = os.environ.get("AGENT_ID", "")
39
+ AGENT_API_KEY = os.environ.get("API_KEY", "")
40
+
41
+ # -------------------------------------------------------
42
+ # Command to execute on the page
43
+ # -------------------------------------------------------
44
+ async def navigate_to_url(page: Page, command_args: Dict[str, Any]) -> Dict[str, Any]:
45
+ """Example function for navigating to a given URL."""
46
+ url = command_args.get('url', '')
47
+ if url:
48
+ await page.goto(url)
49
+ await page.wait_for_timeout(2000)
50
+ return {"status": "SUCCESS", "message": f"Navigated to {url}"}
51
+
52
+ async def click_lead_connect_menu(page: Page) -> Dict[str, Any]:
53
+ overflow_button = await page.query_selector('button[aria-label="Open actions overflow menu"]')
54
+ if not overflow_button:
55
+ return {"status": "FAILURE", "message": "Actions overflow menu button not found"}
56
+ await overflow_button.click()
57
+ await page.wait_for_timeout(2000)
58
+ return {"status": "SUCCESS", "message": "click_actions_overflow_menu executed"}
59
+
60
+
61
+ async def goto_url(page: Page, command_args: Dict[str, Any]) -> Dict[str, Any]:
62
+ """
63
+ Navigates to the Sales Navigator search page and performs a search
64
+ with a LinkedIn URL or navigates directly to a Sales Navigator URL.
65
+
66
+ Args:
67
+ page: Playwright Page object.
68
+ command_args: Dictionary containing 'linkedin_url' or 'salesnav_url'.
69
+
70
+ Returns:
71
+ Dictionary with navigation status and message.
72
+ """
73
+ user_linkedin_salesnav_url_by_name = command_args.get("user_linkedin_salesnav_url_by_name", "").strip()
74
+ user_linkedin_salesnav_url = command_args.get("user_linkedin_salesnav_url", "").strip()
75
+ salesnav_url_list_leads = command_args.get("salesnav_url_list_leads", "").strip()
76
+
77
+ try:
78
+ if user_linkedin_salesnav_url:
79
+ # Step 1: Navigate directly to the provided Sales Navigator URL
80
+ await page.goto(user_linkedin_salesnav_url)
81
+ await page.wait_for_selector("body", timeout=5000)
82
+ await page.wait_for_timeout(2000)
83
+ return {"status": "SUCCESS", "message": "Navigated to Sales Navigator URL"}
84
+ elif salesnav_url_list_leads:
85
+ # Step 1: Navigate directly to the provided Sales Navigator URL
86
+ await page.goto(salesnav_url_list_leads)
87
+ await page.wait_for_selector("body", timeout=5000)
88
+ await page.wait_for_timeout(2000)
89
+ return {"status": "SUCCESS", "message": "Navigated to Sales Navigator URL"}
90
+ elif user_linkedin_salesnav_url_by_name:
91
+ # Step 2: Navigate to Sales Navigator search page
92
+ await page.goto(user_linkedin_salesnav_url_by_name)
93
+ await page.wait_for_selector("body", timeout=5000)
94
+ # get current page content with a have href starting with /sales/lead/
95
+ anchors = await page.query_selector_all('a[href^="/sales/lead/"]')
96
+ unique_links = set()
97
+ for anchor in anchors:
98
+ link = await anchor.get_attribute("href")
99
+ if link and link.startswith("/sales/lead/"):
100
+ link = "https://www.linkedin.com" + link
101
+ if link:
102
+ unique_links.add(link)
103
+
104
+ if not unique_links:
105
+ return {"status": "ERROR", "message": "No lead links found in search results"}
106
+
107
+ # go to the first link and return the status
108
+ # TODO add additional logic to select the correct link
109
+ first_link = list(unique_links)[0]
110
+ await page.goto(first_link)
111
+ await page.wait_for_selector("body", timeout=5000)
112
+ await page.wait_for_timeout(2000)
113
+ return {"status": "SUCCESS", "message": "Navigated to first search result"}
114
+ else:
115
+ return {"status": "ERROR", "message": "No URL provided"}
116
+
117
+ except Exception as e:
118
+ return {"status": "ERROR", "message": f"Navigation failed: {str(e)}"}
119
+
120
+ async def send_connection_request(page: Page, command_args: Dict[str, Any]) -> Dict[str, Any]:
121
+
122
+ await goto_url(page, command_args)
123
+ # Not a sales lead page check
124
+ if "/sales/lead/" not in page.url:
125
+ return {"status": "ERROR", "message": "Not a sales lead page"}
126
+
127
+ response = await click_lead_connect_menu(page)
128
+ if response["status"] == "FAILURE":
129
+ return response
130
+
131
+ # Check if there is a Pending button
132
+ pending_button = await page.query_selector('button:has-text("Pending"), a:has-text("Pending")')
133
+ if pending_button:
134
+ return {"status": "SUCCESS", "message": "Connection request is already pending"}
135
+
136
+ # Check if there is a Connect button
137
+ connect_button = await page.query_selector('button:has-text("Connect"), a:has-text("Connect")')
138
+ if not connect_button:
139
+ first_degree = await page.query_selector('span:has-text("1st")')
140
+ if first_degree:
141
+ return {"status": "SUCCESS", "message": "User already connected"}
142
+ else:
143
+ return {"status": "FAILURE", "message": "Connect button not found"}
144
+
145
+ await connect_button.click()
146
+ await page.wait_for_timeout(2000)
147
+
148
+ # Click the Send Invitation button if present
149
+ send_invite_button = await page.query_selector('button:has-text("Send Invitation"), a:has-text("Send Invitation")')
150
+ if not send_invite_button:
151
+ return {"status": "FAILURE", "message": "Send Invitation button not found"}
152
+ await send_invite_button.click()
153
+
154
+ return {"status": "SUCCESS", "message": "Connection requested successfully"}
155
+
156
+ async def view_linkedin_profile(page: Page, command_args: Dict[str, Any]) -> Dict[str, Any]:
157
+ await goto_url(page, command_args)
158
+ if "/sales/lead/" not in page.url:
159
+ return {"status": "ERROR", "message": "Not a sales lead page"}
160
+ response = await click_lead_connect_menu(page)
161
+ if response["status"] == "FAILURE":
162
+ return response
163
+ view_linedin_button = await page.query_selector('button:has-text("View LinkedIn profile"), a:has-text("View LinkedIn profile")')
164
+ if not view_linedin_button:
165
+ return {"status": "FAILURE", "message": "Connect button not found"}
166
+ await view_linedin_button.click()
167
+ return {"status": "SUCCESS", "message": "Connection requested successfully"}
168
+
169
+ async def find_button_by_name(page: Page, button_name: str) -> Any:
170
+ buttons = await page.query_selector_all("button")
171
+ for b in buttons:
172
+ text_content = await b.inner_text()
173
+ if button_name in text_content:
174
+ return b
175
+ return None
176
+
177
+ async def send_linkedin_message(page: Page, command_args: Dict[str, Any]) -> Dict[str, Any]:
178
+ await goto_url(page, command_args)
179
+
180
+ if "/sales/lead/" not in page.url:
181
+ return {"status": "ERROR", "message": "Not a sales lead page"}
182
+
183
+ message_button = await find_button_by_name(page, "Message")
184
+ if not message_button:
185
+ return {"status": "FAILURE", "message": "Message button not found"}
186
+ await message_button.click()
187
+ await page.wait_for_timeout(2000)
188
+
189
+ textarea_message = await page.query_selector('textarea[name="message"]')
190
+ if not textarea_message:
191
+ return {"status": "FAILURE", "message": "Message text area not found"}
192
+
193
+ message = command_args.get("message", "")
194
+ if message:
195
+ await textarea_message.fill(message)
196
+ await page.wait_for_timeout(2000)
197
+ send_button = await find_button_by_name(page, "Send")
198
+ if not send_button:
199
+ return {"status": "FAILURE", "message": "Message Send button not found"}
200
+ await send_button.click()
201
+
202
+ return {"status": "SUCCESS", "message": "Message sent successfully"}
203
+
204
+ def html_to_text(html_content: str) -> str:
205
+ """Converts HTML content to text using html2text."""
206
+ h = html2text.HTML2Text()
207
+ h.ignore_links = False
208
+ h.ignore_images = True
209
+ return h.handle(html_content)
210
+
211
+ async def find_messages_container(page: Page) -> Any:
212
+ section_messages = await page.query_selector('section[data-message-overlay-container]')
213
+ if not section_messages:
214
+ return None
215
+ ul_elements = await section_messages.query_selector_all('ul')
216
+ for ul in ul_elements:
217
+ inner_html = await ul.inner_html()
218
+ if "This is the" in inner_html:
219
+ return ul
220
+ return None
221
+
222
+ async def get_current_messages(page: Page, command_args: Dict[str, Any]) -> Dict[str, Any]:
223
+ await goto_url(page, command_args)
224
+ if "/sales/lead/" not in page.url:
225
+ return {"status": "ERROR", "message": "Not a sales lead page"}
226
+ message_button = await find_button_by_name(page, "Message")
227
+ if not message_button:
228
+ return {"status": "FAILURE", "message": "Message button not found"}
229
+ await message_button.click()
230
+ await page.wait_for_timeout(2000)
231
+ section_messages = await find_messages_container(page)
232
+ if not section_messages:
233
+ return {"status": "FAILURE", "message": "Message container not found"}
234
+ raw_html = await section_messages.inner_html()
235
+ text_messages = html_to_text(raw_html)
236
+ return {
237
+ "status": "SUCCESS",
238
+ "message": "Messages retrieved successfully",
239
+ "data": text_messages
240
+ }
241
+
242
+ # -----------------------------
243
+ # Extraction Helpers
244
+ # -----------------------------
245
+
246
+ async def scroll_to_bottom(page):
247
+ """
248
+ Scrolls to the bottom of the page in a manner similar to the JS code:
249
+ 1. If '#search-results-container' is present, scroll it in steps of 500px until the bottom.
250
+ 2. Otherwise, move the mouse to the page center-right, wheel-scroll by 1/4 of the total page
251
+ height, wait, and then return.
252
+ """
253
+ try:
254
+ # Move the mouse roughly to the right-center of the page
255
+ viewport = page.viewport_size
256
+ if viewport is None:
257
+ logger.error("Could not retrieve viewport size, cannot move the mouse.")
258
+ return
259
+ width, height = viewport["width"], viewport["height"]
260
+ x = (width * 3) / 4
261
+ y = height / 2
262
+ await page.mouse.move(x, y)
263
+
264
+ container = await page.query_selector("#search-results-container")
265
+ if container:
266
+ logger.info("Found '#search-results-container'. Scrolling within the container.")
267
+ max_scroll_attempts = 10
268
+ scroll_count = 0
269
+ last_scroll_top = -1
270
+ while scroll_count < max_scroll_attempts:
271
+ await container.evaluate("el => el.scrollBy(0, 500)")
272
+ await page.wait_for_timeout(3000)
273
+
274
+ scroll_top = await container.evaluate("el => el.scrollTop")
275
+ if scroll_top == last_scroll_top:
276
+ logger.info("Reached the bottom of '#search-results-container'.")
277
+ break
278
+ last_scroll_top = scroll_top
279
+ scroll_count += 1
280
+ else:
281
+ doc_height = await page.evaluate("() => document.body.scrollHeight")
282
+ scroll_amount = doc_height // 4
283
+ logger.info("'#search-results-container' not found. Scrolling the page directly by 1/4.")
284
+ await page.mouse.wheel(0, scroll_amount)
285
+ await page.wait_for_timeout(2000)
286
+
287
+ logger.info("Completed scrolling.")
288
+ except Exception as e:
289
+ logger.error(f"Failed to execute scroll_to_bottom: {e}")
290
+
291
+ ## Parsing HTML and sending relevant content back to service
292
+ def cleanup_html(html_content: str) -> str:
293
+ """
294
+ Cleans up the HTML content by removing <script>, <style>, <svg>, <meta>, <code> tags
295
+ and inline styles/classes. This mimics the JS approach of removing unneeded items.
296
+ """
297
+ soup = BeautifulSoup(html_content, "html.parser")
298
+
299
+ # Remove script, style, svg, meta, code elements
300
+ for tag in soup(["script", "style", "svg", "meta", "code"]):
301
+ tag.decompose()
302
+
303
+ # Remove inline style and class attributes
304
+ for attr_tag in soup.select("[style]"):
305
+ del attr_tag["style"]
306
+ for attr_tag in soup.select("[class]"):
307
+ del attr_tag["class"]
308
+
309
+ # Convert back to string
310
+ cleaned_html = str(soup)
311
+ return cleaned_html
312
+
313
+
314
+ async def extract_leads_from_current_page(page):
315
+ """
316
+ Extract leads from the current search or list page.
317
+ Mirrors the logic from JS: looks for 'div[data-x-search-result="LEAD"]'
318
+ or 'tr[data-row-id]' in /sales/lists/people pages. Extracts data-anonymize attributes,
319
+ job title / tenure, about / experience, and cleans up the Sales Navigator links
320
+ to match the JS approach (removing everything after the first comma, removing query params).
321
+ """
322
+ # Grab the HTML
323
+ html_content = await page.content()
324
+ cleaned_html = cleanup_html(html_content)
325
+
326
+ soup = BeautifulSoup(cleaned_html, "html.parser")
327
+ url = page.url
328
+
329
+ # Select the correct DOM elements based on URL
330
+ if "/sales/search/people" in url:
331
+ lead_divs = soup.select('div[data-x-search-result="LEAD"]')
332
+ elif "/sales/lists/people" in url:
333
+ lead_divs = soup.select('tr[data-row-id]')
334
+ else:
335
+ lead_divs = soup.select('div[data-x-search-result="LEAD"]')
336
+
337
+ leads = []
338
+ if not lead_divs:
339
+ logger.info("No lead divs found on page.")
340
+ return [], "FAIL"
341
+
342
+ for div in lead_divs:
343
+ lead = {}
344
+
345
+ # data-anonymize elements
346
+ person_name_el = div.select_one('[data-anonymize="person-name"]')
347
+ title_el = div.select_one('[data-anonymize="title"]')
348
+ location_el = div.select_one('[data-anonymize="location"]')
349
+ job_title_el = div.select_one('[data-anonymize="job-title"]')
350
+ company_name_el = div.select_one('[data-anonymize="company-name"]')
351
+
352
+ lead["full_name"] = person_name_el.get_text(strip=True) if person_name_el else ""
353
+ lead["organization_name"] = company_name_el.get_text(strip=True) if company_name_el else ""
354
+ lead["lead_location"] = location_el.get_text(strip=True) if location_el else ""
355
+
356
+ # Match the JS logic for job_title vs job_tenure
357
+ if "/sales/search/people" in url:
358
+ lead["job_title"] = title_el.get_text(strip=True) if title_el else ""
359
+ lead["job_tenure"] = job_title_el.get_text(strip=True) if job_title_el else ""
360
+ elif "/sales/lists/people" in url:
361
+ lead["job_title"] = job_title_el.get_text(strip=True) if job_title_el else ""
362
+ lead["job_tenure"] = ""
363
+
364
+ # Initialize these if you want placeholders (as in JS)
365
+ lead["user_linkedin_url"] = ""
366
+ lead["organization_linkedin_url"] = ""
367
+
368
+ # About / Experience
369
+ dt_elements = div.find_all("dt")
370
+ for dt in dt_elements:
371
+ label = dt.get_text(strip=True).lower()
372
+ dd = dt.find_next_sibling("dd")
373
+ if label == "about:":
374
+ lead["about"] = dd.get_text(strip=True) if dd else ""
375
+ elif label == "experience:":
376
+ if dd:
377
+ # Join text from child spans
378
+ spans = dd.find_all("span")
379
+ lead["experience"] = " ".join(
380
+ s.get_text(" ", strip=True) for s in spans
381
+ )
382
+ else:
383
+ lead["experience"] = ""
384
+
385
+ # -- Sales Navigator link extraction (matching the JS approach) --
386
+ lead_link_el = div.select_one('a[href^="/sales/lead/"]')
387
+ if lead_link_el:
388
+ href = lead_link_el.get("href") or ""
389
+ # Example JS logic: match = href.match(/^\/sales\/lead\/([^?]+)/)
390
+ match = re.match(r"^/sales/lead/([^?]+)", href)
391
+ if match:
392
+ linkedin_id = match.group(1)
393
+ # Remove everything after the first comma, if any
394
+ comma_index = linkedin_id.find(",")
395
+ if comma_index != -1:
396
+ linkedin_id = linkedin_id[:comma_index]
397
+ lead["user_linkedin_salesnav_url"] = f"https://www.linkedin.com/sales/lead/{linkedin_id}"
398
+ else:
399
+ # Fallback if we didn't match or want to keep the full URL
400
+ lead["user_linkedin_salesnav_url"] = f"https://www.linkedin.com{href}"
401
+
402
+ company_link_el = div.select_one('a[href^="/sales/company/"]')
403
+ if company_link_el:
404
+ href = company_link_el.get("href") or ""
405
+ # Similar approach for company
406
+ match = re.match(r"^/sales/company/([^?]+)", href)
407
+ if match:
408
+ linkedin_id = match.group(1)
409
+ # Remove everything after the first comma
410
+ comma_index = linkedin_id.find(",")
411
+ if comma_index != -1:
412
+ linkedin_id = linkedin_id[:comma_index]
413
+ lead["organization_linkedin_salesnav_url "] = f"https://www.linkedin.com/sales/company/{linkedin_id}"
414
+ else:
415
+ lead["organization_linkedin_salesnav_url "] = f"https://www.linkedin.com{href}"
416
+
417
+ leads.append(lead)
418
+
419
+ if not leads:
420
+ return [], "FAIL"
421
+
422
+ return leads, "SUCCESS"
423
+
424
+
425
+ async def extract_lead_from_current_page(page: Page):
426
+ """
427
+ Extract a single lead from a lead's detail page, mirroring the JS code logic
428
+ from 'encrichLeadWithProfileAndCompanyInfo'. This includes:
429
+ - Clicking the overflow menu button before extraction
430
+ - Parsing user LinkedIn URL via regex
431
+ - Extracting phone, email, experience, education
432
+ - Gathering all data-anonymize elements
433
+ Returns ( [lead_dict], "SUCCESS" ) or ( [], "FAIL" ).
434
+ """
435
+
436
+ # 1) Attempt to click the overflow menu button (if it exists).
437
+ button_selector = 'button[data-x--lead-actions-bar-overflow-menu]'
438
+ button = await page.query_selector(button_selector)
439
+ if button:
440
+ await button.click()
441
+ # Wait briefly for the menu's DOM updates to appear
442
+ await page.wait_for_timeout(2000)
443
+ else:
444
+ # Not necessarily a fail; the page might still contain the required data
445
+ # but we can log a warning if desired.
446
+ # logger.warning("Overflow menu not found")
447
+ pass
448
+
449
+ # 2) Now extract the page’s content
450
+ html_content = await page.content()
451
+ cleaned_html = cleanup_html(html_content)
452
+ soup = BeautifulSoup(cleaned_html, "html.parser")
453
+
454
+ # 4) Regex to match e.g. https://www.linkedin.com/in/johndoe
455
+ profile_regex = r"https:\/\/www\.linkedin\.com\/in\/[a-zA-Z0-9-]+"
456
+ profile_match = re.search(profile_regex, cleaned_html)
457
+ user_linkedin_url = profile_match.group(0) if profile_match else ""
458
+
459
+ # 5) Extract key elements
460
+ person_name_el = soup.select_one('[data-anonymize="person-name"]')
461
+ company_name_el = soup.select_one('[data-anonymize="company-name"]')
462
+ phone_el = soup.select_one('[data-anonymize="phone"]')
463
+ email_el = soup.select_one('[data-anonymize="email"]')
464
+ experience_el = soup.select_one('[data-x--lead--experience-section]')
465
+ education_el = soup.select_one('[data-sn-view-name="feature-lead-education"]')
466
+
467
+ # 6) Collect any other [data-anonymize] elements
468
+ anonymize_elements = soup.select('[data-anonymize]')
469
+ lead_information = [el.get_text(strip=True) for el in anonymize_elements]
470
+
471
+ # 7) Build the lead dictionary
472
+ lead_data = {
473
+ "full_name": person_name_el.get_text(strip=True) if person_name_el else "",
474
+ "organization_name": company_name_el.get_text(strip=True) if company_name_el else "",
475
+ "user_linkedin_url": user_linkedin_url,
476
+ "phone": phone_el.get_text(strip=True) if phone_el else "",
477
+ "email": email_el.get_text(strip=True) if email_el else "",
478
+ "experience": experience_el.get_text(strip=True) if experience_el else "",
479
+ "education": education_el.get_text(strip=True) if education_el else "",
480
+ # or json.dumps(lead_information) if you want a string
481
+ "lead_information": lead_information,
482
+ }
483
+
484
+ # 8) Return the single-lead list
485
+ return [lead_data], "SUCCESS"
486
+
487
+
488
+
489
+ async def extract_accounts_from_current_page(page: Page):
490
+ """
491
+ Extract multiple companies (accounts) from the current page.
492
+ Similar approach to extract_leads_from_current_page,
493
+ but adapted for company DOM structure if needed.
494
+ """
495
+ html_content = await page.content()
496
+ cleaned_html = cleanup_html(html_content)
497
+ soup = BeautifulSoup(cleaned_html, "html.parser")
498
+
499
+ # Example placeholders:
500
+ accounts = []
501
+ # If LinkedIn's accounts page uses data-x-search-result="COMPANY", then something like:
502
+ company_divs = soup.select('div[data-x-search-result="COMPANY"]')
503
+ for div in company_divs:
504
+ company = {}
505
+ # Parse out company name, location, etc.
506
+ # For example:
507
+ name_el = div.select_one('[data-anonymize="company-name"]')
508
+ company["company_name"] = name_el.get_text(strip=True) if name_el else ""
509
+ # Add more fields here ...
510
+ accounts.append(company)
511
+
512
+ if not accounts:
513
+ return [], "FAIL"
514
+ return accounts, "SUCCESS"
515
+
516
+
517
+ async def extract_account_from_current_page(page: Page):
518
+ """
519
+ Extract a single company (account) from an account detail page, mirroring the JS code logic.
520
+ Includes:
521
+ - Locating the 'Visit website' link
522
+ - Collecting data-anonymize properties
523
+ Returns ([company_dict], "SUCCESS") or ([], "FAIL").
524
+ """
525
+
526
+ # 1) Attempt to click the overflow menu button (if it exists).
527
+ button_selector = 'button[data-x--account-actions--overflow-menu]'
528
+ button = await page.query_selector(button_selector)
529
+ if button:
530
+ await button.click()
531
+ # Wait briefly for the menu's DOM updates to appear
532
+ await page.wait_for_timeout(2000)
533
+
534
+ # 2) Check for a button whose innerHTML has the text "Copy LinkedIn.com URL".
535
+ # If found, click it, then read from the clipboard.
536
+ copy_linkedin_button = await page.query_selector('button:has-text("Copy LinkedIn.com URL")')
537
+ if copy_linkedin_button:
538
+ await copy_linkedin_button.click()
539
+ # Wait briefly for the clipboard to be updated
540
+ await page.wait_for_timeout(1000)
541
+ copied_text = pyperclip.paste().strip()
542
+
543
+ # 3) Get the HTML content and parse it
544
+ html_content = await page.content()
545
+ cleaned_html = cleanup_html(html_content)
546
+ soup = BeautifulSoup(cleaned_html, "html.parser")
547
+
548
+ # 4) Check for a required element to validate it's truly a company page
549
+ name_el = soup.select_one('[data-anonymize="company-name"]')
550
+ if not name_el:
551
+ return [], "FAIL"
552
+
553
+ # 5) Locate the "Visit website" link (if present).
554
+ # We'll look for an <a> whose text includes "Visit website" (case-insensitive)
555
+ visit_website_el = soup.find("a", string=lambda text: text and "visit website" in text.lower())
556
+ company_website_url = visit_website_el.get("href", "") if visit_website_el else ""
557
+
558
+ # 6) Gather any data-anonymize properties in the page
559
+ anonymize_elements = soup.select('[data-anonymize]')
560
+ company_info = [el.get_text(strip=True) for el in anonymize_elements]
561
+
562
+ # 7) Build the company dictionary
563
+ company = {
564
+ "company_name": name_el.get_text(strip=True),
565
+ "company_website_in_linkedin_page": company_website_url,
566
+ "company_info": company_info
567
+ }
568
+
569
+ # If we successfully copied the LinkedIn.com URL, place it into the dictionary
570
+ if 'copied_text' in locals() and "linkedin.com/company/" in copied_text:
571
+ company["organization_linkedin_url"] = copied_text
572
+
573
+ return [company], "SUCCESS"
574
+
575
+
576
+
577
+ async def extract_from_page_with_pagination(page: Page, command_args: Dict[str, Any]):
578
+ """
579
+ Extracts leads or accounts data from Sales Navigator with pagination.
580
+ Mirrors the JS approach:
581
+ 1) scroll/parse,
582
+ 2) if next button is found & not disabled, click next,
583
+ 3) repeat up to max_pages or until no more data.
584
+ """
585
+ leads_data = []
586
+ current_page = 1
587
+ url = page.url
588
+ max_pages = command_args.get("max_pages", 2)
589
+
590
+ while current_page <= max_pages:
591
+ await asyncio.sleep(3)
592
+ logger.info(f"Processing page {current_page}")
593
+ await scroll_to_bottom(page)
594
+ await asyncio.sleep(2)
595
+
596
+ if "/sales/search/people" in url or "/sales/lists/people" in url:
597
+ page_items, status = await extract_leads_from_current_page(page)
598
+ elif "/sales/search/company" in url:
599
+ page_items, status = await extract_accounts_from_current_page(page)
600
+ elif "/sales/lead/" in url:
601
+ page_items, status = await extract_lead_from_current_page(page)
602
+ elif "/sales/company/" in url:
603
+ page_items, status = await extract_account_from_current_page(page)
604
+ else:
605
+ logger.warning("URL does not match known patterns. Exiting pagination.")
606
+ break
607
+
608
+ if status == "FAIL" or not page_items:
609
+ logger.info("Extraction failed or no items found. Ending pagination.")
610
+ break
611
+
612
+ leads_data.extend(page_items)
613
+
614
+ # Attempt to click the "Next" button, as in the JS code that checks 'button[aria-label="Next"]'
615
+ try:
616
+ next_buttons = page.locator("button[aria-label='Next']")
617
+ button_count = await next_buttons.count()
618
+
619
+ if button_count == 0:
620
+ logger.info("Next button not found. End of pagination.")
621
+ break
622
+
623
+ next_button = next_buttons.first
624
+ if await next_button.is_disabled():
625
+ logger.info("Next button is disabled. End of pagination.")
626
+ break
627
+
628
+ await next_button.wait_for(state="visible", timeout=2000)
629
+ await next_button.click()
630
+ await page.wait_for_load_state('load')
631
+ current_page += 1
632
+
633
+ except asyncio.TimeoutError:
634
+ logger.error("Timeout while waiting for the Next button. Ending pagination.")
635
+ break
636
+ except Exception as e:
637
+ logger.error(f"Failed to navigate to next page: {e}")
638
+ break
639
+
640
+ return leads_data
641
+
642
+ async def extract_leads(page: Page, command_args: Dict[str, Any]) -> Dict[str, Any]:
643
+ await goto_url(page, command_args)
644
+ data = await extract_from_page_with_pagination(page, command_args)
645
+ return {
646
+ "status": "SUCCESS",
647
+ "message": "Lead page HTML retrieved",
648
+ "data": data,
649
+ }
650
+
651
+ async def extract_companies(page: Page, command_args: Dict[str, Any]) -> Dict[str, Any]:
652
+ await goto_url(page, command_args)
653
+ data = await extract_from_page_with_pagination(page, command_args)
654
+ return {
655
+ "status": "SUCCESS",
656
+ "message": "Lead page HTML retrieved",
657
+ "data": data,
658
+ }
659
+
660
+ async def extract_lead(page: Page, command_args: Dict[str, Any]) -> Dict[str, Any]:
661
+ await goto_url(page, command_args)
662
+ if "/sales/lead/" not in page.url:
663
+ return {"status": "ERROR", "message": "Not a sales lead page"}
664
+ data = await extract_from_page_with_pagination(page, command_args)
665
+ return {
666
+ "status": "SUCCESS",
667
+ "message": "Lead page HTML retrieved",
668
+ "data": data,
669
+ }
670
+
671
+ async def extract_company(page: Page, command_args: Dict[str, Any]) -> Dict[str, Any]:
672
+ await goto_url(page, command_args)
673
+ if "/sales/company/" not in page.url:
674
+ return {"status": "ERROR", "message": "Not a sales company page"}
675
+ data = await extract_from_page_with_pagination(page, command_args)
676
+ return {
677
+ "status": "SUCCESS",
678
+ "message": "Company page HTML retrieved",
679
+ "data": data,
680
+ }
681
+
682
+ async def get_connection_status(page: Page, command_args: Dict[str, Any]) -> Dict[str, Any]:
683
+ await goto_url(page, command_args)
684
+ if "/sales/lead/" not in page.url:
685
+ return {"status": "ERROR", "message": "Not a sales lead page"}
686
+ first_degree = await page.query_selector('span:has-text("1st")')
687
+ second_degree = await page.query_selector('span:has-text("2nd")')
688
+ if first_degree:
689
+ connection_status = "1st degree connection"
690
+ elif second_degree:
691
+ connection_status = "2nd degree connection"
692
+ else:
693
+ connection_status = "Not connected"
694
+ return {
695
+ "status": "SUCCESS",
696
+ "message": "Connection status retrieved",
697
+ "data": connection_status,
698
+ }
699
+
700
+ async def get_activities_of_user(page: Page, command_args: Dict[str, Any]) -> Dict[str, Any]:
701
+ return {"status": "SUCCESS", "message": "get_activities_of_user executed"}
702
+
703
+ async def like_post_on_page(page: Page, command_args: Dict[str, Any]) -> Dict[str, str]:
704
+ try:
705
+ # Wait for the iframe to load
706
+ iframe = await page.wait_for_selector('iframe', timeout=4000)
707
+ if not iframe:
708
+ return {"status": "ERROR", "message": "Iframe not found"}
709
+
710
+ # Get the iframe content
711
+ frame = await iframe.content_frame()
712
+ if not frame:
713
+ return {"status": "ERROR", "message": "Iframe content not found"}
714
+
715
+ activity_section = await frame.query_selector('.fie-impression-container')
716
+ if not activity_section:
717
+ return {"status": "ERROR", "message": "Activity section not found"}
718
+
719
+ facepile_section = await activity_section.query_selector("#reactors-facepile-id")
720
+ if not facepile_section:
721
+ return {"status": "ERROR", "message": "Facepile section not found"}
722
+
723
+ facepile_text = await facepile_section.inner_html()
724
+ reaction_count = facepile_text.count("linkedin.com/in/")
725
+ min_reaction_count = command_args.get("min_reaction_count", 5)
726
+
727
+ # Check all spans to ensure none have "mo ago" in their inner text
728
+ spans = await frame.query_selector_all('span')
729
+ for span in spans:
730
+ span_text = await span.inner_text()
731
+ if "mo ago" in span_text:
732
+ return {"status": "ERROR", "message": "Post is too old"}
733
+
734
+ if reaction_count > min_reaction_count:
735
+ like_button = await frame.query_selector('button[aria-label="React Like"][aria-pressed]')
736
+ if like_button:
737
+ aria_pressed = await like_button.get_attribute('aria-pressed')
738
+ if not aria_pressed or aria_pressed == "false":
739
+ await like_button.click()
740
+
741
+ return {"status": "SUCCESS", "message": "like_post executed"}
742
+ except Exception as e:
743
+ print(f"Exception occurred: {e}")
744
+ return {"status": "ERROR", "message": f"Exception occurred: {e}"}
745
+
746
+
747
+ async def like_recent_post(page: Page, command_args: Dict[str, Any]) -> Dict[str, Any]:
748
+ await goto_url(page, command_args)
749
+ if "sales/lead/" not in page.url:
750
+ return {"status": "ERROR", "message": "Not a sales lead page"}
751
+
752
+ try:
753
+ activity_button = await find_button_by_name(page, "See all activities")
754
+ if activity_button:
755
+ await activity_button.click()
756
+ await page.wait_for_selector('[aria-labelledby="recent-activity-panel-header"]', timeout=5000)
757
+ await page.wait_for_timeout(4000)
758
+ else:
759
+ return {"status": "FAILURE", "message": "No Activities Button"}
760
+
761
+ activity_section = await page.query_selector('[aria-labelledby="recent-activity-panel-header"]')
762
+ if not activity_section:
763
+ return {"status": "ERROR", "message": "Recent activity panel not found"}
764
+
765
+ item_button = await activity_section.query_selector('button[data-x--recent-activity-side-panel--item-button]')
766
+ if not item_button:
767
+ return {"status": "ERROR", "message": "Activity item button not found"}
768
+ await item_button.click()
769
+ await page.wait_for_timeout(3000)
770
+
771
+ react_result = await like_post_on_page(page, command_args)
772
+
773
+ return react_result
774
+ except Exception as e:
775
+ return {"status": "ERROR", "message": str(e)}
776
+
777
+ # Service to do health check.
778
+ async def health_check(command_args: Dict[str, Any]) -> Dict[str, Any]:
779
+ return {"status": "SUCCESS", "message": "Health check successful"}
780
+
781
+ command_to_function_mapping = {
782
+ "navigate_to_url": navigate_to_url,
783
+ "view_linkedin_profile": view_linkedin_profile,
784
+ "send_connection_request": send_connection_request,
785
+ "send_linkedin_message": send_linkedin_message,
786
+ "get_current_messages": get_current_messages,
787
+ "extract_leads_information": extract_leads,
788
+ "extract_lead_information": extract_lead,
789
+ "extract_company_information": extract_company,
790
+ "extract_companies_information": extract_companies,
791
+ "get_activities_of_user": get_activities_of_user,
792
+ "like_recent_post": like_recent_post,
793
+ "get_lead_connection_status": get_connection_status,
794
+ "health_check": health_check,
795
+ }
796
+
797
+ # -----------------------------
798
+ # Login Helpers
799
+ # -----------------------------
800
+ async def login_to_linkedin(page: Page, email: str, password: str, headless: bool):
801
+ """
802
+ Logs into LinkedIn using the provided email and password.
803
+ If credentials are not provided, waits for user to log in manually.
804
+ """
805
+ await page.goto("https://www.linkedin.com/uas/login?session_redirect=%2Fsales")
806
+ await page.wait_for_load_state('load')
807
+
808
+ if email and password:
809
+ await page.get_by_label("Email or Phone").click()
810
+ await page.get_by_label("Email or Phone").fill(email)
811
+ await page.get_by_label("Password").click()
812
+ await page.get_by_label("Password").fill(password)
813
+ await page.locator("#organic-div form").get_by_role("button", name="Sign in", exact=True).click()
814
+ await page.wait_for_load_state('load')
815
+ else:
816
+ logger.info("Waiting for user to log in manually...")
817
+ try:
818
+ await page.wait_for_url(lambda url: url == "https://www.linkedin.com/sales/home", timeout=0)
819
+ logger.info("User logged in successfully.")
820
+ except:
821
+ logger.error("Timeout waiting for user to log in.")
822
+ return 'FAIL'
823
+
824
+ if "checkpoint/challenge" in page.url:
825
+ if not headless:
826
+ logger.warning("Captcha page encountered! Human intervention is needed.")
827
+ while "checkpoint/challenge" in page.url:
828
+ await asyncio.sleep(5)
829
+ await page.wait_for_load_state('load')
830
+ if "checkpoint/challenge" not in page.url:
831
+ logger.info("Captcha solved. Continuing with the process.")
832
+ break
833
+ else:
834
+ logger.error("Captcha not solved. Exiting.")
835
+ return 'FAIL'
836
+ await asyncio.sleep(3)
837
+ else:
838
+ logger.error("Captcha page encountered! Aborting due to headless mode.")
839
+ return 'FAIL'
840
+
841
+ return 'SUCCESS'
842
+
843
+
844
+
845
+
846
+ # ------------------------------------------------
847
+ # Service Polling & Command Execution (UPDATED)
848
+ # ------------------------------------------------
849
+ async def poll_service() -> List[Dict[str, Any]]:
850
+ """
851
+ Fetch command(s) from the service for a specific agent_id.
852
+ """
853
+ try:
854
+ # Make sure we pass the agent_id as a query param
855
+ url = f"{SERVICE_URL}/get_agent_tasks?agent_id={AGENT_ID}&api_key={AGENT_API_KEY}"
856
+ response = requests.get(url)
857
+ if response.status_code == 200:
858
+ tasks = response.json() # expecting a list of tasks
859
+ logger.info(f"Polled {len(tasks)} task(s) from service.")
860
+ return tasks
861
+ else:
862
+ logger.error(f"Failed to poll tasks. Status code: {response.status_code}")
863
+ except Exception as e:
864
+ logger.error(f"Error polling tasks: {e}")
865
+ return []
866
+
867
+ async def update_agent_health_status(page) -> Dict[str, Any]:
868
+ """
869
+ Update the health status of the agent.
870
+ """
871
+ try:
872
+ # Make sure we pass the agent_id as a query param
873
+ log_status = await ensure_user_is_logged_in(page)
874
+ if log_status:
875
+ status = "LIVE_LOGGED_IN"
876
+ else:
877
+ status = "LIVE_NOT_LOGGED_IN"
878
+ update_url = f"{SERVICE_URL}/update_agent_health?api_key={AGENT_API_KEY}"
879
+ update_response = requests.post(update_url, params={"agent_id": AGENT_ID, "status": status})
880
+
881
+ if update_response.status_code == 200:
882
+ logger.info(f"Updated agent health status to {status}.")
883
+ return {
884
+ "status": "OK",
885
+ "message": f"Agent health status updated to {status}."
886
+ }
887
+ else:
888
+ logger.error(f"Failed to update agent health status. Status code: {update_response.status_code}")
889
+ except Exception as e:
890
+ logger.error(f"Error updating agent health status: {e}")
891
+ return {
892
+ "status": "ERROR",
893
+ "message": "Failed to update agent health status."
894
+ }
895
+
896
+ async def send_command_result_to_service(command_request_id: str, result: Dict[str, Any]) -> None:
897
+ """
898
+ Send the result of a command execution back to the service.
899
+ """
900
+ try:
901
+ # The service endpoint expects a query param for agent_id and a JSON body
902
+ url = f"{SERVICE_URL}/agent_task_result?agent_id={AGENT_ID}&api_key={AGENT_API_KEY}"
903
+ payload = {
904
+ "command_request_id": command_request_id,
905
+ "result": result
906
+ }
907
+ response = requests.post(url, json=payload)
908
+ if response.status_code != 200 and response.status_code != 201:
909
+ logger.error(f"Failed to send result. Status code: {response.status_code}")
910
+ else:
911
+ logger.info(f"Result for command_request_id={command_request_id} sent successfully.")
912
+ except Exception as e:
913
+ logger.error(f"Error sending command result: {e}")
914
+
915
+
916
+ async def ensure_user_is_logged_in(page: Page) -> bool:
917
+ """
918
+ Checks if user is logged in. If on captcha or login page,
919
+ waits for user intervention. Returns True if logged in, False otherwise.
920
+ """
921
+ if "checkpoint/challenge" in page.url or "login" in page.url:
922
+ logger.warning("User is not fully logged in or is on a captcha page. Waiting for user intervention...")
923
+ try:
924
+ await page.wait_for_url("https://www.linkedin.com/sales/home", timeout=600_000)
925
+ logger.info("User completed captcha/login process.")
926
+ return True
927
+ except:
928
+ logger.error("User did not finish captcha/login in time.")
929
+ return False
930
+ return True
931
+
932
+
933
+ async def execute_command(page: Page, command_data: Dict[str, Any]) -> Dict[str, Any]:
934
+ """
935
+ Executes a single command. Expects structure:
936
+ {
937
+ "command_request_id": "123",
938
+ "command_name": "navigate_to_url",
939
+ "command_args": {"url": "https://www.linkedin.com"},
940
+ }
941
+ """
942
+ command_request_id = command_data.get("command_request_id")
943
+ command_name = command_data.get("command_name")
944
+ command_args = command_data.get("command_args", {})
945
+
946
+ if not command_name or command_name not in command_to_function_mapping:
947
+ return {"status": "FAIL", "message": "Invalid command"}
948
+
949
+ is_logged_in = await ensure_user_is_logged_in(page)
950
+ if not is_logged_in:
951
+ return {"status": "FAIL", "message": "Not logged in or captcha not solved"}
952
+
953
+ command_func = command_to_function_mapping[command_name]
954
+ try:
955
+ logger.info(f"Executing command: {command_name}")
956
+ result = await command_func(page, command_args)
957
+ return result
958
+ except Exception as e:
959
+ logger.error(f"Error executing command {command_name}: {e}")
960
+ return {"status": "FAIL", "message": str(e), "command_request_id": command_request_id}
961
+
962
+ def log_to_journal(task, message):
963
+ log_path = "/tmp/dhisana_ai/sales_nav_logs.log"
964
+ with open(log_path, "a") as lf:
965
+ lf.write(f"{datetime.now().isoformat()} | agent_id: {AGENT_ID} | cmd_id: {task.get('command_request_id', '')} | task: {json.dumps(task)} | message: {message}\n")
966
+
967
+ ## Check throttling limits for linked in
968
+ def check_and_update_throttling_limits(task: Dict[str, Any]) -> Dict[str, Any]:
969
+ # Make sure directory exists
970
+ os.makedirs("/tmp/dhisana_ai", exist_ok=True)
971
+
972
+ # Logs
973
+ log_to_journal(task, "task request")
974
+
975
+ # File and lookup keys
976
+ filepath = "/tmp/dhisana_ai/salesnav_throttlinglimits.json"
977
+ agent_id = AGENT_ID
978
+ cmd_id = task.get("command_name", "")
979
+ today = datetime.now().strftime("%Y-%m-%d")
980
+
981
+ # Defined limits
982
+ daily_limits = {
983
+ "send_connection_request": 10,
984
+ "view_linkedin_profile": 500,
985
+ "navigate_to_url": 500,
986
+ "like_recent_post": 10,
987
+ "send_linkedin_message": 10
988
+ }
989
+ limit = daily_limits.get(cmd_id, 25)
990
+
991
+ # Load or init data
992
+ if os.path.exists(filepath):
993
+ with open(filepath, "r") as f:
994
+ try:
995
+ data = json.load(f)
996
+ except:
997
+ data = {}
998
+ else:
999
+ data = {}
1000
+
1001
+ if agent_id not in data:
1002
+ data[agent_id] = {}
1003
+ if today not in data[agent_id]:
1004
+ data[agent_id][today] = {}
1005
+
1006
+ usage = data[agent_id][today].get(cmd_id, 0)
1007
+ if usage >= limit:
1008
+ return {"status": "ERROR", "message": "Throttling limit reached"}
1009
+
1010
+ # Increment counter
1011
+ data[agent_id][today][cmd_id] = usage + 1
1012
+
1013
+ # Keep only last 7 days
1014
+ days = sorted(data[agent_id].keys(), reverse=True)
1015
+ if len(days) > 7:
1016
+ for old_day in days[7:]:
1017
+ del data[agent_id][old_day]
1018
+
1019
+ # Save
1020
+ with open(filepath, "w") as f:
1021
+ json.dump(data, f)
1022
+
1023
+ return {"status": "SUCCESS", "message": "Throttling limits checked"}
1024
+
1025
+
1026
+ async def get_tasks_and_execute(page: Page):
1027
+ """
1028
+ Continuously polls the service for tasks, executes them,
1029
+ and sends back the result.
1030
+ - Wait 30 seconds between polls
1031
+ - Wait 10 seconds between each command execution
1032
+ """
1033
+ while True:
1034
+ await update_agent_health_status(page)
1035
+ is_user_loggedin = await ensure_user_is_logged_in(page)
1036
+ if is_user_loggedin:
1037
+ tasks = await poll_service()
1038
+ if not tasks:
1039
+ logger.info("No tasks found. Will check again in 30 seconds.")
1040
+ else:
1041
+ logger.info(f"Received {len(tasks)} task(s). Processing...")
1042
+
1043
+ for task_data in tasks:
1044
+ # Example task_data structure sent from the service:
1045
+ # {
1046
+ # "command_request_id": "123",
1047
+ # "command_name": "navigate_to_url",
1048
+ # "command_args": {"url": "https://www.linkedin.com"},
1049
+ # }
1050
+ result_throttling = check_and_update_throttling_limits(task_data)
1051
+ if result_throttling.get("status") == "SUCCESS":
1052
+ result = await execute_command(page, task_data)
1053
+ else:
1054
+ result = result_throttling
1055
+
1056
+ cmd_id = task_data.get("command_request_id", "")
1057
+ log_to_journal(task_data, f"task response -- {json.dumps(result)}")
1058
+ await send_command_result_to_service(cmd_id, result)
1059
+
1060
+ if result_throttling.get("status") == "SUCCESS":
1061
+ await asyncio.sleep(10)
1062
+ else:
1063
+ break
1064
+
1065
+ await asyncio.sleep(30)
1066
+
1067
+
1068
+ # -----------------------------
1069
+ # Main Entry Point
1070
+ # -----------------------------
1071
+ async def initialize_agent():
1072
+ """
1073
+ Initializes the Playwright browser and logs in to LinkedIn.
1074
+ Then starts the loop to get tasks and execute them.
1075
+ """
1076
+ global browser, context, page
1077
+
1078
+ email = os.environ.get("LINKEDIN_EMAIL", "")
1079
+ password = os.environ.get("LINKEDIN_PASSWORD", "")
1080
+
1081
+
1082
+ async with async_playwright() as p:
1083
+ # Launch the browser
1084
+ browser = await p.chromium.launch(headless=False)
1085
+ context = await browser.new_context()
1086
+ page = await context.new_page()
1087
+
1088
+ # Login to LinkedIn
1089
+ login_status = await login_to_linkedin(page, email, password, headless=False)
1090
+ if login_status == 'FAIL':
1091
+ logger.error("Login failed due to captcha or incorrect credentials. Exiting.")
1092
+ return
1093
+
1094
+ # After successful login, start the poll-execute loop
1095
+ await get_tasks_and_execute(page)
1096
+
1097
+ await browser.close()
1098
+
1099
+ if __name__ == "__main__":
1100
+ asyncio.run(initialize_agent())
1101
+
1102
+
1103
+