dhisana 0.0.1.dev212__py3-none-any.whl → 0.0.1.dev214__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -294,7 +294,11 @@ async def list_emails_async(
294
294
 
295
295
  # --- future providers go here ---
296
296
 
297
- raise RuntimeError("No suitable inbox provider configured for this sender.")
297
+ logging.warning(
298
+ "No suitable inbox provider configured for sender %s; returning empty list.",
299
+ query_email_context.sender_email,
300
+ )
301
+ return []
298
302
 
299
303
 
300
304
  # ─────────────────────────────────────────────────────────────────────────────
@@ -34,6 +34,7 @@ from dhisana.utils.proxy_curl_tools import (
34
34
  from dhisana.utils.research_lead import research_company_with_full_info_ai, research_lead_with_full_info_ai
35
35
  from dhisana.utils.serpapi_search_tools import (
36
36
  find_organization_linkedin_url_with_google_search,
37
+ find_user_linkedin_url_by_email_google,
37
38
  find_user_linkedin_url_google,
38
39
  find_user_linkedin_url_with_serper,
39
40
  get_company_website_from_linkedin_url,
@@ -550,6 +551,7 @@ async def enrich_user_info(
550
551
  location = input_properties.get("lead_location", "") or ""
551
552
  org_name = (input_properties.get("organization_name", "") or "").strip()
552
553
  org_domain = (input_properties.get("primary_domain_of_organization", "") or "").strip()
554
+ email = (input_properties.get("email") or "").strip()
553
555
 
554
556
  if full_name and (org_name or org_domain or title):
555
557
  # This function does a google-based search for the user's LinkedIn
@@ -565,6 +567,29 @@ async def enrich_user_info(
565
567
  if found_linkedin_url:
566
568
  user_linkedin_url = found_linkedin_url
567
569
  input_properties["user_linkedin_url"] = user_linkedin_url
570
+ if not user_linkedin_url and email:
571
+ # If we have an email but no name, try searching by email
572
+ email_lookup_result = await find_user_linkedin_url_by_email_google(
573
+ email=email,
574
+ user_name=full_name,
575
+ user_title=title,
576
+ user_location=location,
577
+ user_company=org_name,
578
+ tool_config=tool_config,
579
+ )
580
+ if email_lookup_result and email_lookup_result.get("linkedin_url"):
581
+ user_linkedin_url = email_lookup_result["linkedin_url"]
582
+ input_properties["user_linkedin_url"] = user_linkedin_url
583
+ confidence = email_lookup_result.get("confidence", 0.0)
584
+ reasoning = email_lookup_result.get("reasoning", "")
585
+ input_properties["user_linkedin_url_confidence"] = confidence
586
+ input_properties["user_linkedin_url_reasoning"] = reasoning
587
+
588
+ additional_properties = input_properties.get("additional_properties") or {}
589
+ additional_properties["user_linkedin_url_confidence"] = confidence
590
+ if reasoning:
591
+ additional_properties["user_linkedin_url_reasoning"] = reasoning
592
+ input_properties["additional_properties"] = additional_properties
568
593
 
569
594
  # 3) Final fallback: if STILL no user_linkedin_url,
570
595
  # but user_linkedin_salesnav_url is present, use proxy
@@ -361,9 +361,20 @@ async def enrich_organization_info_from_proxycurl(
361
361
  logger.warning(msg)
362
362
  await asyncio.sleep(30)
363
363
  return {"error": msg}
364
+ elif response.status == 404:
365
+ error_text = await response.text()
366
+ logger.warning(
367
+ f"Proxycurl organization profile not found for LinkedIn URL {standardized_url}: {error_text}"
368
+ )
369
+ cache_output(
370
+ "enrich_organization_info_from_proxycurl", standardized_url, {}
371
+ )
372
+ return {}
364
373
  else:
365
374
  error_text = await response.text()
366
- logger.error(f"Error from Proxycurl organization info fetch by URL: {error_text}")
375
+ logger.error(
376
+ f"Error from Proxycurl organization info fetch by URL: {error_text}"
377
+ )
367
378
  return {}
368
379
  except Exception as e:
369
380
  logger.exception("Exception occurred while fetching organization info from Proxycurl by LinkedIn URL.")
@@ -1,6 +1,6 @@
1
1
  import json
2
2
  import re
3
- from typing import Dict, List, Optional
3
+ from typing import Any, Dict, List, Optional, Set
4
4
  from urllib.parse import urlparse
5
5
  import urllib.parse
6
6
  import aiohttp
@@ -35,6 +35,12 @@ class LeadSearchResult(BaseModel):
35
35
  user_linkedin_url: str = ""
36
36
 
37
37
 
38
+ class LinkedinCandidateChoice(BaseModel):
39
+ chosen_link: str = ""
40
+ confidence: float = 0.0
41
+ reasoning: str = ""
42
+
43
+
38
44
  async def get_structured_output(text: str, tool_config: Optional[List[Dict]] = None) -> LeadSearchResult:
39
45
  """Parse text snippet into ``LeadSearchResult`` using OpenAI."""
40
46
 
@@ -82,6 +88,65 @@ async def find_user_linkedin_url_with_serper(
82
88
  return None
83
89
 
84
90
 
91
+ async def pick_best_linkedin_candidate_with_llm(
92
+ email: str,
93
+ user_name: str,
94
+ user_title: str,
95
+ user_location: str,
96
+ user_company: str,
97
+ candidates: List[Dict],
98
+ tool_config: Optional[List[Dict]] = None,
99
+ ) -> Optional[LinkedinCandidateChoice]:
100
+ """Ask the LLM to assess candidate LinkedIn URLs and pick the best match."""
101
+
102
+ if not candidates:
103
+ return None
104
+
105
+ candidates_sorted = candidates[-3:]
106
+ candidate_lines = []
107
+ for idx, candidate in enumerate(candidates_sorted, start=1):
108
+ candidate_lines.append(
109
+ "\n".join(
110
+ [
111
+ f"Candidate {idx}:",
112
+ f" Link: {candidate.get('link', '')}",
113
+ f" Title: {candidate.get('title', '')}",
114
+ f" Snippet: {candidate.get('snippet', '')}",
115
+ f" Subtitle: {candidate.get('subtitle', '')}",
116
+ f" Query: {candidate.get('query', '')}",
117
+ ]
118
+ )
119
+ )
120
+
121
+ prompt = (
122
+ "You are validating LinkedIn profile matches for a lead enrichment workflow.\n"
123
+ "Given the lead context and candidate search results, pick the most likely LinkedIn profile.\n"
124
+ "If no candidate seems appropriate, return an empty link and confidence 0.\n"
125
+ "Consider whether the email, name, company, title, or location aligns with the candidate.\n"
126
+ "Lead context:\n"
127
+ f"- Email: {email or 'unknown'}\n"
128
+ f"- Name: {user_name or 'unknown'}\n"
129
+ f"- Title: {user_title or 'unknown'}\n"
130
+ f"- Company: {user_company or 'unknown'}\n"
131
+ f"- Location: {user_location or 'unknown'}\n\n"
132
+ "Candidates:\n"
133
+ f"{chr(10).join(candidate_lines)}\n\n"
134
+ "Return JSON with fields: chosen_link (string), confidence (0-1 float), reasoning (short string)."
135
+ )
136
+
137
+ result, status = await get_structured_output_internal(
138
+ prompt,
139
+ LinkedinCandidateChoice,
140
+ model="gpt-4.1-mini",
141
+ tool_config=tool_config,
142
+ )
143
+
144
+ if status != "SUCCESS" or result is None:
145
+ return None
146
+
147
+ return result
148
+
149
+
85
150
  @assistant_tool
86
151
  async def get_company_domain_from_google_search(
87
152
  company_name: str,
@@ -304,6 +369,221 @@ async def find_user_linkedin_url_google(
304
369
  return ""
305
370
 
306
371
 
372
+ @assistant_tool
373
+ async def find_user_linkedin_url_by_email_google(
374
+ email: str,
375
+ user_name: str = "",
376
+ user_title: str = "",
377
+ user_location: str = "",
378
+ user_company: str = "",
379
+ tool_config: Optional[List[Dict]] = None,
380
+ ) -> Optional[Dict[str, Any]]:
381
+ """
382
+ Find the LinkedIn URL for a user based primarily on their email address.
383
+
384
+ Additional profile hints (name, title, location, company) improve query precision
385
+ when supplied. Returns a dict with the best LinkedIn URL, LLM confidence score,
386
+ and short reasoning when a match clears the confidence threshold; otherwise ``None``.
387
+ """
388
+ logger.info("Entering find_user_linkedin_url_by_email_google")
389
+
390
+ if not email:
391
+ logger.warning("No email provided.")
392
+ return None
393
+
394
+ normalized_email = email.strip().lower()
395
+ email_local_part = normalized_email.split("@")[0] if "@" in normalized_email else normalized_email
396
+ email_local_humanized = re.sub(r"[._-]+", " ", email_local_part).strip()
397
+
398
+ queries: List[str] = []
399
+
400
+ def add_query(query: str) -> None:
401
+ query = query.strip()
402
+ if query and query not in queries:
403
+ queries.append(query)
404
+
405
+ def add_query_parts(*parts: str) -> None:
406
+ tokens = [part.strip() for part in parts if part and part.strip()]
407
+ if not tokens:
408
+ return
409
+ add_query(" ".join(tokens))
410
+
411
+ enriched_terms = []
412
+ if user_name:
413
+ enriched_terms.append(f'"{user_name}"')
414
+ if user_company:
415
+ enriched_terms.append(f'"{user_company}"')
416
+ if user_title:
417
+ enriched_terms.append(f'"{user_title}"')
418
+ if user_location:
419
+ enriched_terms.append(f'"{user_location}"')
420
+ base_hint = " ".join(enriched_terms)
421
+
422
+ # Prioritise the direct email search variants before broader fallbacks.
423
+ add_query_parts(normalized_email, "linkedin.com/in", base_hint)
424
+ add_query_parts(normalized_email, "linkedin.com", base_hint)
425
+ add_query_parts(normalized_email, "linkedin", base_hint)
426
+ add_query_parts(normalized_email, base_hint)
427
+ add_query(f'"{normalized_email}" "linkedin.com/in" {base_hint}')
428
+ add_query(f'"{normalized_email}" "linkedin.com" {base_hint}')
429
+ add_query(f'"{normalized_email}" linkedin {base_hint}')
430
+
431
+ if email_local_part and email_local_part != normalized_email:
432
+ add_query_parts(email_local_part, "linkedin.com/in", base_hint)
433
+ add_query_parts(email_local_part, "linkedin.com", base_hint)
434
+ add_query_parts(email_local_part, "linkedin", base_hint)
435
+ add_query(f'"{email_local_part}" "linkedin.com/in" {base_hint}')
436
+ add_query(f'"{email_local_part}" "linkedin.com" {base_hint}')
437
+
438
+ if email_local_humanized and email_local_humanized not in {email_local_part, normalized_email}:
439
+ add_query_parts(email_local_humanized, "linkedin", base_hint)
440
+ add_query(f'"{email_local_humanized}" linkedin {base_hint}')
441
+
442
+ if normalized_email:
443
+ add_query(f'site:linkedin.com/in "{normalized_email}" {base_hint}')
444
+
445
+ if email_local_part:
446
+ add_query(f'site:linkedin.com/in "{email_local_part}" {base_hint}')
447
+
448
+ if email_local_humanized and email_local_humanized != email_local_part:
449
+ add_query(f'site:linkedin.com/in "{email_local_humanized}" {base_hint}')
450
+
451
+ if base_hint:
452
+ lookup_hint = user_name or email_local_humanized or email_local_part or normalized_email
453
+ add_query(
454
+ f'site:linkedin.com/in "{normalized_email}" {base_hint} '
455
+ f'intitle:"{lookup_hint}" -intitle:"profiles"'
456
+ )
457
+ if email_local_humanized:
458
+ add_query(
459
+ f'site:linkedin.com/in "{email_local_humanized}" {base_hint} '
460
+ f'intitle:"{lookup_hint}" -intitle:"profiles"'
461
+ )
462
+
463
+ candidate_records: List[Dict[str, str]] = []
464
+ seen_links: Set[str] = set()
465
+ best_llm_choice: Optional[LinkedinCandidateChoice] = None
466
+ best_llm_link: str = ""
467
+ HIGH_CONFIDENCE_THRESHOLD = 0.8
468
+ MIN_CONFIDENCE_THRESHOLD = 0.75
469
+
470
+ async def evaluate_with_llm() -> Optional[LinkedinCandidateChoice]:
471
+ nonlocal best_llm_choice, best_llm_link
472
+
473
+ llm_choice = await pick_best_linkedin_candidate_with_llm(
474
+ email=email,
475
+ user_name=user_name,
476
+ user_title=user_title,
477
+ user_location=user_location,
478
+ user_company=user_company,
479
+ candidates=candidate_records,
480
+ tool_config=tool_config,
481
+ )
482
+
483
+ if not llm_choice or not llm_choice.chosen_link:
484
+ return None
485
+
486
+ chosen_link = extract_user_linkedin_page(llm_choice.chosen_link)
487
+ if not chosen_link:
488
+ return None
489
+
490
+ llm_choice.chosen_link = chosen_link
491
+
492
+ if best_llm_choice is None or llm_choice.confidence > best_llm_choice.confidence:
493
+ best_llm_choice = llm_choice
494
+ best_llm_link = chosen_link
495
+ logger.debug(
496
+ "LLM updated best candidate: %s (confidence %.2f) reason: %s",
497
+ chosen_link,
498
+ llm_choice.confidence,
499
+ llm_choice.reasoning,
500
+ )
501
+
502
+ if llm_choice.confidence >= HIGH_CONFIDENCE_THRESHOLD:
503
+ logger.info(
504
+ "Returning LinkedIn user page by email via LLM scoring: %s (confidence %.2f)",
505
+ chosen_link,
506
+ llm_choice.confidence,
507
+ )
508
+ return llm_choice
509
+
510
+ return None
511
+
512
+ async with aiohttp.ClientSession() as session:
513
+ for query in queries:
514
+ query = query.strip()
515
+ if not query:
516
+ continue
517
+ logger.debug(f"Searching with query: {query}")
518
+
519
+ try:
520
+ results = await search_google_with_tools(query, 5, tool_config=tool_config)
521
+ except Exception:
522
+ logger.exception("Error searching for LinkedIn user URL by email.")
523
+ continue
524
+
525
+ if not isinstance(results, list) or len(results) == 0:
526
+ logger.debug("No results for this query, moving to next.")
527
+ continue
528
+
529
+ for result_item in results:
530
+ try:
531
+ result_json = json.loads(result_item)
532
+ except (json.JSONDecodeError, IndexError):
533
+ logger.debug("Failed to parse JSON from the search result.")
534
+ continue
535
+
536
+ link = result_json.get('link', '')
537
+ if not link:
538
+ continue
539
+
540
+ parsed_url = urlparse(link)
541
+ if 'linkedin.com/in' in (parsed_url.netloc + parsed_url.path):
542
+ link = extract_user_linkedin_page(link)
543
+ if not link or link in seen_links:
544
+ continue
545
+
546
+ title = result_json.get('title', '')
547
+ snippet = result_json.get('snippet', '')
548
+ subtitle = result_json.get('subtitle', '')
549
+
550
+ candidate_records.append(
551
+ {
552
+ "link": link,
553
+ "title": title,
554
+ "snippet": snippet,
555
+ "subtitle": subtitle,
556
+ "query": query,
557
+ }
558
+ )
559
+ if len(candidate_records) > 6:
560
+ candidate_records.pop(0)
561
+ seen_links.add(link)
562
+
563
+ high_conf_choice = await evaluate_with_llm()
564
+ if high_conf_choice:
565
+ return {
566
+ "linkedin_url": high_conf_choice.chosen_link,
567
+ "confidence": high_conf_choice.confidence,
568
+ "reasoning": high_conf_choice.reasoning,
569
+ }
570
+
571
+ if best_llm_choice and best_llm_link and best_llm_choice.confidence >= MIN_CONFIDENCE_THRESHOLD:
572
+ logger.info(
573
+ "Returning LinkedIn user page by email via LLM scoring (best overall): %s (confidence %.2f)",
574
+ best_llm_link,
575
+ best_llm_choice.confidence,
576
+ )
577
+ return {
578
+ "linkedin_url": best_llm_link,
579
+ "confidence": best_llm_choice.confidence,
580
+ "reasoning": best_llm_choice.reasoning,
581
+ }
582
+
583
+ logger.info("No matching LinkedIn user page found using email queries.")
584
+ return None
585
+
586
+
307
587
  @assistant_tool
308
588
  async def find_user_linkedin_url_by_job_title_google(
309
589
  user_title: str,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dhisana
3
- Version: 0.0.1.dev212
3
+ Version: 0.0.1.dev214
4
4
  Summary: A Python SDK for Dhisana AI Platform
5
5
  Home-page: https://github.com/dhisana-ai/dhisana-python-sdk
6
6
  Author: Admin
@@ -30,8 +30,8 @@ dhisana/utils/composite_tools.py,sha256=ZlwHCp7PXjYFUWUEeR_fTF0Z4Wg-4F6eBi1reE3F
30
30
  dhisana/utils/dataframe_tools.py,sha256=jxyvyXAMKxccST_W6o6FnBqAsvp7mGNOD6HV5V6xgeA,9242
31
31
  dhisana/utils/domain_parser.py,sha256=Kw5MPP06wK2azWQzuSiOE-DffOezLqDyF-L9JEBsMSU,1206
32
32
  dhisana/utils/email_parse_helpers.py,sha256=LIdm1B1IyGSW50y8EkxOk6YRjvxO2SJTgTKPLxYls_o,4613
33
- dhisana/utils/email_provider.py,sha256=oYjk-9yIgRzcPoPwnYjPvChPn0J78T5p6CoqXFPI-zk,14264
34
- dhisana/utils/enrich_lead_information.py,sha256=hZxSstIErqxXG40j5YyzTYTCSsTEf8YxRF25DU5-s2k,37302
33
+ dhisana/utils/email_provider.py,sha256=spjbNdnaVfCZEUw62EEHKijuXjI7vTVNqsftxJ15Erw,14352
34
+ dhisana/utils/enrich_lead_information.py,sha256=DzhaAO5scOcQ95oSgIyxkYRbz96gbXoASjsgrAalOo8,38730
35
35
  dhisana/utils/extract_email_content_for_llm.py,sha256=SQmMZ3YJtm3ZI44XiWEVAItcAwrsSSy1QzDne7LTu_Q,3713
36
36
  dhisana/utils/fetch_openai_config.py,sha256=LjWdFuUeTNeAW106pb7DLXZNElos2PlmXRe6bHZJ2hw,5159
37
37
  dhisana/utils/field_validators.py,sha256=BZgNCpBG264aRqNUu_J67c6zfr15zlAaIw2XRy8J7DY,11809
@@ -59,7 +59,7 @@ dhisana/utils/openai_helpers.py,sha256=NkTqbdql31GmwTcRd90KsSHpEoAFGpiimchjVTG5R
59
59
  dhisana/utils/openapi_spec_to_tools.py,sha256=oBLVq3WeDWvW9O02NCvY8bxQURQdKwHJHGcX8bC_b2I,1926
60
60
  dhisana/utils/parse_linkedin_messages_txt.py,sha256=g3N_ac70mAEuDDQ7Ott6mkOaBwI3ZvcsJD3R9RlYwPQ,3320
61
61
  dhisana/utils/profile.py,sha256=12IhefaLp3j74zzBzVRe50_KWqtWZ_cdzUKlYNy9T2Y,1192
62
- dhisana/utils/proxy_curl_tools.py,sha256=7xXRj-7ATJhGIYHMzyHSBLJDXzvMLBuqdvPCC4CKENo,49535
62
+ dhisana/utils/proxy_curl_tools.py,sha256=twsNziN3b2GfnXpNM-97iA2b9FtT84P340RhzlIeTkM,50075
63
63
  dhisana/utils/proxycurl_search_leads.py,sha256=6PlraPNYQ4fIDzTYnY-T2g_ip5fPkqHigbGoPD8ZosQ,16131
64
64
  dhisana/utils/python_function_to_tools.py,sha256=jypddM6WTlIQmRWnqAugYJXvaPYaXaMgWAZRYeeGlj4,2682
65
65
  dhisana/utils/research_lead.py,sha256=i7xk3edNzYKeJ_-JzKXwGL-NeeApZuWpx4vd4Uvguw4,7009
@@ -73,7 +73,7 @@ dhisana/utils/serpapi_additional_tools.py,sha256=Xb1tc_oK-IjI9ZrEruYhFg8UJMLHQDa
73
73
  dhisana/utils/serpapi_google_jobs.py,sha256=HUJFZEW8UvYqsW0sWlEDXgI_IUomh5fTkzRJzEgsDGc,4509
74
74
  dhisana/utils/serpapi_google_search.py,sha256=B3sVq2OXdrYmPbH7cjQN4RFoek96qgKzXIayKXn0HLU,7318
75
75
  dhisana/utils/serpapi_local_business_search.py,sha256=vinmuXLaQ_0BpEdwnONZ2vLTq5xnRh6ICmPbnpckSN4,5775
76
- dhisana/utils/serpapi_search_tools.py,sha256=vqUSJugmzQ8NFAWHPnr2skgbf_OTfcRNl6QI2CNojV0,21146
76
+ dhisana/utils/serpapi_search_tools.py,sha256=MglMPN9wHkGAHb7YAQXvEDsWK3RGS-zu3wUIvZAYxfo,31738
77
77
  dhisana/utils/serperdev_google_jobs.py,sha256=m5_2f_5y79FOFZz1A_go6m0hIUfbbAoZ0YTjUMO2BSI,4508
78
78
  dhisana/utils/serperdev_local_business.py,sha256=JoZfTg58Hojv61cyuwA2lcnPdLT1lawnWaBNrUYWnuQ,6447
79
79
  dhisana/utils/serperdev_search.py,sha256=Uv38XN5CKPeD8gipRKjz4PGwB7189MDeRvzKSxgp2nU,7687
@@ -92,8 +92,8 @@ dhisana/workflow/agent.py,sha256=esv7_i_XuMkV2j1nz_UlsHov_m6X5WZZiZm_tG4OBHU,565
92
92
  dhisana/workflow/flow.py,sha256=xWE3qQbM7j2B3FH8XnY3zOL_QXX4LbTW4ArndnEYJE0,1638
93
93
  dhisana/workflow/task.py,sha256=HlWz9mtrwLYByoSnePOemBUBrMEcj7KbgNjEE1oF5wo,1830
94
94
  dhisana/workflow/test.py,sha256=kwW8jWqSBNcRmoyaxlTuZCMOpGJpTbJQgHI7gSjwdzM,3399
95
- dhisana-0.0.1.dev212.dist-info/METADATA,sha256=yH0honmI__h7xgbmEvlByX6-EzyVFbqpgF1KT_SHs6c,1190
96
- dhisana-0.0.1.dev212.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
97
- dhisana-0.0.1.dev212.dist-info/entry_points.txt,sha256=jujxteZmNI9EkEaK-pOCoWuBujU8TCevdkfl9ZcKHek,49
98
- dhisana-0.0.1.dev212.dist-info/top_level.txt,sha256=NETTHt6YifG_P7XtRHbQiXZlgSFk9Qh9aR-ng1XTf4s,8
99
- dhisana-0.0.1.dev212.dist-info/RECORD,,
95
+ dhisana-0.0.1.dev214.dist-info/METADATA,sha256=qfSUHgTeVbhjitTsj_yrgiqhlFxrVLr45hEYd_S-o2w,1190
96
+ dhisana-0.0.1.dev214.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
97
+ dhisana-0.0.1.dev214.dist-info/entry_points.txt,sha256=jujxteZmNI9EkEaK-pOCoWuBujU8TCevdkfl9ZcKHek,49
98
+ dhisana-0.0.1.dev214.dist-info/top_level.txt,sha256=NETTHt6YifG_P7XtRHbQiXZlgSFk9Qh9aR-ng1XTf4s,8
99
+ dhisana-0.0.1.dev214.dist-info/RECORD,,