dhisana 0.0.1.dev116__py3-none-any.whl → 0.0.1.dev236__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dhisana/schemas/common.py +10 -1
- dhisana/schemas/sales.py +203 -22
- dhisana/utils/add_mapping.py +0 -2
- dhisana/utils/apollo_tools.py +739 -119
- dhisana/utils/built_with_api_tools.py +4 -2
- dhisana/utils/check_email_validity_tools.py +35 -18
- dhisana/utils/check_for_intent_signal.py +1 -2
- dhisana/utils/check_linkedin_url_validity.py +34 -8
- dhisana/utils/clay_tools.py +3 -2
- dhisana/utils/clean_properties.py +1 -4
- dhisana/utils/compose_salesnav_query.py +0 -1
- dhisana/utils/compose_search_query.py +7 -3
- dhisana/utils/composite_tools.py +0 -1
- dhisana/utils/dataframe_tools.py +2 -2
- dhisana/utils/email_body_utils.py +72 -0
- dhisana/utils/email_provider.py +174 -35
- dhisana/utils/enrich_lead_information.py +183 -53
- dhisana/utils/fetch_openai_config.py +129 -0
- dhisana/utils/field_validators.py +1 -1
- dhisana/utils/g2_tools.py +0 -1
- dhisana/utils/generate_content.py +0 -1
- dhisana/utils/generate_email.py +68 -23
- dhisana/utils/generate_email_response.py +294 -46
- dhisana/utils/generate_flow.py +0 -1
- dhisana/utils/generate_linkedin_connect_message.py +9 -2
- dhisana/utils/generate_linkedin_response_message.py +137 -66
- dhisana/utils/generate_structured_output_internal.py +317 -164
- dhisana/utils/google_custom_search.py +150 -44
- dhisana/utils/google_oauth_tools.py +721 -0
- dhisana/utils/google_workspace_tools.py +278 -54
- dhisana/utils/hubspot_clearbit.py +3 -1
- dhisana/utils/hubspot_crm_tools.py +718 -272
- dhisana/utils/instantly_tools.py +3 -1
- dhisana/utils/lusha_tools.py +10 -7
- dhisana/utils/mailgun_tools.py +150 -0
- dhisana/utils/microsoft365_tools.py +447 -0
- dhisana/utils/openai_assistant_and_file_utils.py +121 -177
- dhisana/utils/openai_helpers.py +8 -6
- dhisana/utils/parse_linkedin_messages_txt.py +1 -3
- dhisana/utils/profile.py +37 -0
- dhisana/utils/proxy_curl_tools.py +377 -76
- dhisana/utils/proxycurl_search_leads.py +426 -0
- dhisana/utils/research_lead.py +3 -3
- dhisana/utils/sales_navigator_crawler.py +1 -6
- dhisana/utils/salesforce_crm_tools.py +323 -50
- dhisana/utils/search_router.py +131 -0
- dhisana/utils/search_router_jobs.py +51 -0
- dhisana/utils/sendgrid_tools.py +126 -91
- dhisana/utils/serarch_router_local_business.py +75 -0
- dhisana/utils/serpapi_additional_tools.py +290 -0
- dhisana/utils/serpapi_google_jobs.py +117 -0
- dhisana/utils/serpapi_google_search.py +188 -0
- dhisana/utils/serpapi_local_business_search.py +129 -0
- dhisana/utils/serpapi_search_tools.py +360 -432
- dhisana/utils/serperdev_google_jobs.py +125 -0
- dhisana/utils/serperdev_local_business.py +154 -0
- dhisana/utils/serperdev_search.py +233 -0
- dhisana/utils/smtp_email_tools.py +178 -18
- dhisana/utils/test_connect.py +1603 -130
- dhisana/utils/trasform_json.py +3 -3
- dhisana/utils/web_download_parse_tools.py +0 -1
- dhisana/utils/zoominfo_tools.py +2 -3
- dhisana/workflow/test.py +1 -1
- {dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/METADATA +1 -1
- dhisana-0.0.1.dev236.dist-info/RECORD +100 -0
- {dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/WHEEL +1 -1
- dhisana-0.0.1.dev116.dist-info/RECORD +0 -83
- {dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/entry_points.txt +0 -0
- {dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/top_level.txt +0 -0
|
@@ -9,7 +9,6 @@ from typing import Any, Dict, List, Optional
|
|
|
9
9
|
from urllib.parse import urlparse
|
|
10
10
|
|
|
11
11
|
from pydantic import BaseModel, Field
|
|
12
|
-
import tldextract
|
|
13
12
|
import mdformat
|
|
14
13
|
|
|
15
14
|
from dhisana.utils.check_email_validity_tools import process_email_properties
|
|
@@ -35,8 +34,9 @@ from dhisana.utils.proxy_curl_tools import (
|
|
|
35
34
|
from dhisana.utils.research_lead import research_company_with_full_info_ai, research_lead_with_full_info_ai
|
|
36
35
|
from dhisana.utils.serpapi_search_tools import (
|
|
37
36
|
find_organization_linkedin_url_with_google_search,
|
|
37
|
+
find_user_linkedin_url_by_email_google,
|
|
38
38
|
find_user_linkedin_url_google,
|
|
39
|
-
|
|
39
|
+
find_user_linkedin_url_with_serper,
|
|
40
40
|
get_company_website_from_linkedin_url,
|
|
41
41
|
)
|
|
42
42
|
|
|
@@ -162,7 +162,7 @@ async def get_clean_lead_info_with_llm(lead_info_str: str, tool_config: Optional
|
|
|
162
162
|
lead_info, status = await get_structured_output_internal(
|
|
163
163
|
prompt,
|
|
164
164
|
BasicLeadInformation,
|
|
165
|
-
model="gpt-
|
|
165
|
+
model="gpt-5.1-chat",
|
|
166
166
|
tool_config=tool_config
|
|
167
167
|
)
|
|
168
168
|
if status == "ERROR":
|
|
@@ -261,16 +261,21 @@ async def validate_and_cleanup(
|
|
|
261
261
|
or is_invalid_name(first_name)
|
|
262
262
|
or is_invalid_name(last_name)
|
|
263
263
|
):
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
264
|
+
# Check if we have a valid LinkedIn URL - if so, skip LLM as ProxyCurl will fill the data
|
|
265
|
+
user_linkedin_url = cloned_properties.get("user_linkedin_url", "").strip()
|
|
266
|
+
if not user_linkedin_url:
|
|
267
|
+
lead_info_str = str(cloned_properties)
|
|
268
|
+
logger.info(
|
|
269
|
+
"Detected invalid name fields. Using LLM to infer/correct name fields."
|
|
270
|
+
)
|
|
271
|
+
# Attempt LLM-based cleanup
|
|
272
|
+
new_lead_info = await get_clean_lead_info_with_llm(lead_info_str, tool_config=tool_config)
|
|
273
|
+
if new_lead_info:
|
|
274
|
+
cloned_properties["full_name"] = new_lead_info.get("full_name", "")
|
|
275
|
+
cloned_properties["first_name"] = new_lead_info.get("first_name", "")
|
|
276
|
+
cloned_properties["last_name"] = new_lead_info.get("last_name", "")
|
|
277
|
+
else:
|
|
278
|
+
logger.info("Valid LinkedIn URL found. Skipping LLM cleanup as ProxyCurl will enrich the data.")
|
|
274
279
|
else:
|
|
275
280
|
# Use the cheaper logic
|
|
276
281
|
cloned_properties = cleanup_user_name(cloned_properties)
|
|
@@ -308,12 +313,16 @@ async def enrich_lead_information(
|
|
|
308
313
|
user_properties: Dict[str, Any],
|
|
309
314
|
use_strict_check: bool = True,
|
|
310
315
|
get_valid_email: bool = True,
|
|
316
|
+
company_research_instructions: str = "",
|
|
317
|
+
lead_research_instructions: str = "",
|
|
318
|
+
enrich_company_information: bool = True,
|
|
319
|
+
enrich_lead_information: bool = True,
|
|
311
320
|
tool_config: Optional[List[Dict[str, Any]]] = None,
|
|
312
321
|
) -> Dict[str, Any]:
|
|
313
322
|
logger.debug("Starting enrich_lead_information with user_properties: %s", user_properties)
|
|
314
323
|
cloned_properties = dict(user_properties)
|
|
315
324
|
|
|
316
|
-
cloned_properties = await validate_and_cleanup(cloned_properties)
|
|
325
|
+
cloned_properties = await validate_and_cleanup(cloned_properties, tool_config=tool_config, use_strict_check=use_strict_check)
|
|
317
326
|
|
|
318
327
|
cloned_properties = await enrich_user_info(
|
|
319
328
|
input_properties=cloned_properties,
|
|
@@ -336,29 +345,68 @@ async def enrich_lead_information(
|
|
|
336
345
|
use_strict_check=use_strict_check,
|
|
337
346
|
tool_config=tool_config,
|
|
338
347
|
)
|
|
339
|
-
|
|
348
|
+
|
|
340
349
|
if get_valid_email:
|
|
341
350
|
await process_email_properties(cloned_properties, tool_config)
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
351
|
+
|
|
352
|
+
# ------------------------------------------------------------------
|
|
353
|
+
# Supplement missing follower count or name information using Serper
|
|
354
|
+
# ------------------------------------------------------------------
|
|
355
|
+
linkedin_url = cloned_properties.get("user_linkedin_url", "").strip()
|
|
356
|
+
follower_count = cloned_properties.get("linkedin_follower_count")
|
|
357
|
+
first_name = cloned_properties.get("first_name")
|
|
358
|
+
if (
|
|
359
|
+
linkedin_url
|
|
360
|
+
and (follower_count is None or (isinstance(follower_count, str) and not follower_count.strip()) or not first_name)
|
|
361
|
+
):
|
|
362
|
+
serper_result = await find_user_linkedin_url_with_serper(
|
|
363
|
+
linkedin_url, tool_config=tool_config
|
|
364
|
+
)
|
|
365
|
+
if serper_result:
|
|
366
|
+
if follower_count is None or (
|
|
367
|
+
isinstance(follower_count, str) and not follower_count.strip()
|
|
368
|
+
):
|
|
369
|
+
cloned_properties["linkedin_follower_count"] = serper_result.get(
|
|
370
|
+
"linkedin_follower_count", 0
|
|
371
|
+
)
|
|
372
|
+
if not first_name:
|
|
373
|
+
cloned_properties["first_name"] = serper_result.get("first_name", "")
|
|
374
|
+
cloned_properties["last_name"] = serper_result.get("last_name", "")
|
|
375
|
+
|
|
376
|
+
cloned_properties = await validate_and_cleanup(
|
|
377
|
+
cloned_properties, tool_config=tool_config, use_strict_check=use_strict_check
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
research_summary = cloned_properties.get("research_summary", "")
|
|
381
|
+
|
|
382
|
+
if enrich_lead_information:
|
|
383
|
+
summary = await research_lead_with_full_info_ai(
|
|
384
|
+
cloned_properties, lead_research_instructions, tool_config=tool_config
|
|
385
|
+
)
|
|
386
|
+
if summary:
|
|
387
|
+
research_summary = summary.get("research_summary", "")
|
|
388
|
+
|
|
389
|
+
if enrich_company_information:
|
|
390
|
+
company_company_properties = {
|
|
391
|
+
"organization_name": cloned_properties.get("organization_name", ""),
|
|
392
|
+
"primary_domain_of_organization": cloned_properties.get("primary_domain_of_organization", ""),
|
|
393
|
+
"organization_website": cloned_properties.get("organization_website", ""),
|
|
394
|
+
}
|
|
395
|
+
company_summary = await research_company_with_full_info_ai(
|
|
396
|
+
company_company_properties,
|
|
397
|
+
company_research_instructions,
|
|
398
|
+
tool_config=tool_config,
|
|
399
|
+
)
|
|
400
|
+
if company_summary:
|
|
401
|
+
markdown_text = research_summary + "\n\n#### " + company_summary.get(
|
|
402
|
+
"research_summary", ""
|
|
403
|
+
)
|
|
404
|
+
formatted_markdown = mdformat.text(markdown_text)
|
|
405
|
+
research_summary = re.sub(
|
|
406
|
+
r'^(#{1,6})\s+', '##### ', formatted_markdown, flags=re.MULTILINE
|
|
407
|
+
)
|
|
408
|
+
|
|
409
|
+
cloned_properties["research_summary"] = research_summary
|
|
362
410
|
return cloned_properties
|
|
363
411
|
|
|
364
412
|
|
|
@@ -374,6 +422,34 @@ class UserInfoFromGithubProfileId(BaseModel):
|
|
|
374
422
|
location: str
|
|
375
423
|
|
|
376
424
|
|
|
425
|
+
def extract_id_from_salesnav_url(url_key: str) -> str:
|
|
426
|
+
"""
|
|
427
|
+
Extract the Sales Navigator lead ID from a URL like
|
|
428
|
+
'https://www.linkedin.com/sales/lead/<ID>?...'
|
|
429
|
+
"""
|
|
430
|
+
if not url_key:
|
|
431
|
+
return ""
|
|
432
|
+
match = re.search(r"linkedin\.com/sales/lead/([^/?#,]+)", url_key, re.IGNORECASE)
|
|
433
|
+
if not match:
|
|
434
|
+
return ""
|
|
435
|
+
# strip out any non-word or hyphen chars
|
|
436
|
+
return re.sub(r"[^\w-]", "", match.group(1))
|
|
437
|
+
|
|
438
|
+
def proxy_linkedin_url(user_linkedin_salesnav_url: str) -> str:
|
|
439
|
+
"""
|
|
440
|
+
Given a Sales Navigator URL, return the corresponding public LinkedIn URL.
|
|
441
|
+
Raises ValueError if the ID cannot be extracted.
|
|
442
|
+
"""
|
|
443
|
+
salesnav_id = extract_id_from_salesnav_url(user_linkedin_salesnav_url)
|
|
444
|
+
if not salesnav_id:
|
|
445
|
+
raise ValueError("Could not extract ID from Sales Nav URL.")
|
|
446
|
+
return f"https://www.linkedin.com/in/{salesnav_id}"
|
|
447
|
+
|
|
448
|
+
# -------------------------------------------------------------------
|
|
449
|
+
# (Pseudo) get_structured_output_internal, find_user_linkedin_url_google
|
|
450
|
+
# and other references assumed to exist in your environment.
|
|
451
|
+
# -------------------------------------------------------------------
|
|
452
|
+
|
|
377
453
|
async def get_user_linkedin_url_from_github_profile(
|
|
378
454
|
github_profile_id: str,
|
|
379
455
|
lead_properties: dict,
|
|
@@ -384,7 +460,6 @@ async def get_user_linkedin_url_from_github_profile(
|
|
|
384
460
|
Attempt to locate a user's LinkedIn profile URL from their GitHub profile ID via web search.
|
|
385
461
|
Also gather basic user info (first/last name) if possible.
|
|
386
462
|
"""
|
|
387
|
-
|
|
388
463
|
instructions = f"""
|
|
389
464
|
Give user information from user GitHub handle; try to locate the LinkedIn profile URL
|
|
390
465
|
for the user using web search.
|
|
@@ -414,10 +489,11 @@ async def get_user_linkedin_url_from_github_profile(
|
|
|
414
489
|
}}
|
|
415
490
|
"""
|
|
416
491
|
|
|
492
|
+
# Example call to structured output function
|
|
417
493
|
response, status = await get_structured_output_internal(
|
|
418
494
|
instructions,
|
|
419
495
|
UserInfoFromGithubProfileId,
|
|
420
|
-
model="gpt-
|
|
496
|
+
model="gpt-5.1-chat",
|
|
421
497
|
use_web_search=True,
|
|
422
498
|
tool_config=tool_config
|
|
423
499
|
)
|
|
@@ -426,26 +502,23 @@ async def get_user_linkedin_url_from_github_profile(
|
|
|
426
502
|
else:
|
|
427
503
|
return {}
|
|
428
504
|
|
|
429
|
-
|
|
430
505
|
async def enrich_user_info(
|
|
431
506
|
input_properties: Dict[str, Any],
|
|
432
507
|
use_strict_check: bool,
|
|
433
508
|
tool_config: Optional[List[Dict[str, Any]]] = None,
|
|
434
509
|
) -> Dict[str, Any]:
|
|
435
510
|
"""
|
|
436
|
-
Attempt to find or fix a user's LinkedIn URL using name, title, location,
|
|
437
|
-
or GitHub profile handle if present.
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
:param use_strict_check: Whether to use strict matching during searches.
|
|
441
|
-
:param tool_config: Optional list of tool configurations dicts.
|
|
442
|
-
:return: Updated dictionary with user LinkedIn URL if found.
|
|
511
|
+
Attempt to find or fix a user's LinkedIn URL using name, title, location,
|
|
512
|
+
company info or GitHub profile handle if present. If still not found,
|
|
513
|
+
but user_linkedin_salesnav_url exists, we fall back to creating a
|
|
514
|
+
proxy URL from the Sales Navigator link.
|
|
443
515
|
"""
|
|
444
516
|
logger.debug("Starting enrich_user_info for: %s", input_properties.get("full_name"))
|
|
445
517
|
user_linkedin_url = (input_properties.get("user_linkedin_url") or "").strip()
|
|
446
518
|
input_properties["linkedin_url_match"] = False
|
|
447
519
|
github_profile_id = (input_properties.get("github_profile_id") or "").strip()
|
|
448
520
|
|
|
521
|
+
# 1) If we do not have a user_linkedin_url, try getting it from GitHub
|
|
449
522
|
if not user_linkedin_url:
|
|
450
523
|
if github_profile_id:
|
|
451
524
|
response = await get_user_linkedin_url_from_github_profile(
|
|
@@ -467,7 +540,7 @@ async def enrich_user_info(
|
|
|
467
540
|
input_properties["lead_location"] = response.get("location", "")
|
|
468
541
|
return input_properties
|
|
469
542
|
|
|
470
|
-
# If still no LinkedIn URL, try
|
|
543
|
+
# 2) If still no LinkedIn URL, try name/title/org searching
|
|
471
544
|
full_name = (input_properties.get("full_name") or "").strip()
|
|
472
545
|
if not full_name:
|
|
473
546
|
first_name = (input_properties.get("first_name", "") or "").strip()
|
|
@@ -478,9 +551,11 @@ async def enrich_user_info(
|
|
|
478
551
|
location = input_properties.get("lead_location", "") or ""
|
|
479
552
|
org_name = (input_properties.get("organization_name", "") or "").strip()
|
|
480
553
|
org_domain = (input_properties.get("primary_domain_of_organization", "") or "").strip()
|
|
554
|
+
email = (input_properties.get("email") or "").strip()
|
|
481
555
|
|
|
482
556
|
if full_name and (org_name or org_domain or title):
|
|
483
|
-
|
|
557
|
+
# This function does a google-based search for the user's LinkedIn
|
|
558
|
+
found_linkedin_url = await find_user_linkedin_url_google(
|
|
484
559
|
user_name=full_name,
|
|
485
560
|
user_title=title,
|
|
486
561
|
user_location=location,
|
|
@@ -489,11 +564,50 @@ async def enrich_user_info(
|
|
|
489
564
|
use_strict_check=use_strict_check,
|
|
490
565
|
tool_config=tool_config,
|
|
491
566
|
)
|
|
492
|
-
|
|
567
|
+
if found_linkedin_url:
|
|
568
|
+
user_linkedin_url = found_linkedin_url
|
|
569
|
+
input_properties["user_linkedin_url"] = user_linkedin_url
|
|
570
|
+
if not user_linkedin_url and email:
|
|
571
|
+
# If we have an email but no name, try searching by email
|
|
572
|
+
email_lookup_result = await find_user_linkedin_url_by_email_google(
|
|
573
|
+
email=email,
|
|
574
|
+
user_name=full_name,
|
|
575
|
+
user_title=title,
|
|
576
|
+
user_location=location,
|
|
577
|
+
user_company=org_name,
|
|
578
|
+
tool_config=tool_config,
|
|
579
|
+
)
|
|
580
|
+
if email_lookup_result and email_lookup_result.get("linkedin_url"):
|
|
581
|
+
user_linkedin_url = email_lookup_result["linkedin_url"]
|
|
582
|
+
input_properties["user_linkedin_url"] = user_linkedin_url
|
|
583
|
+
confidence = email_lookup_result.get("confidence", 0.0)
|
|
584
|
+
reasoning = email_lookup_result.get("reasoning", "")
|
|
585
|
+
input_properties["user_linkedin_url_confidence"] = confidence
|
|
586
|
+
input_properties["user_linkedin_url_reasoning"] = reasoning
|
|
587
|
+
|
|
588
|
+
additional_properties = input_properties.get("additional_properties") or {}
|
|
589
|
+
additional_properties["user_linkedin_url_confidence"] = confidence
|
|
590
|
+
if reasoning:
|
|
591
|
+
additional_properties["user_linkedin_url_reasoning"] = reasoning
|
|
592
|
+
input_properties["additional_properties"] = additional_properties
|
|
593
|
+
|
|
594
|
+
# 3) Final fallback: if STILL no user_linkedin_url,
|
|
595
|
+
# but user_linkedin_salesnav_url is present, use proxy
|
|
596
|
+
if not input_properties.get("user_linkedin_url"):
|
|
597
|
+
salesnav_url = input_properties.get("user_linkedin_salesnav_url", "")
|
|
598
|
+
if salesnav_url:
|
|
599
|
+
try:
|
|
600
|
+
proxy_url = proxy_linkedin_url(salesnav_url)
|
|
601
|
+
input_properties["user_linkedin_url"] = proxy_url
|
|
602
|
+
logger.debug("Falling back to proxy LinkedIn URL from SalesNav: %s", proxy_url)
|
|
603
|
+
except ValueError:
|
|
604
|
+
# If we can't parse an ID from the sales nav URL, skip
|
|
605
|
+
logger.warning("Could not parse ID from user_linkedin_salesnav_url: %s", salesnav_url)
|
|
493
606
|
|
|
494
607
|
return input_properties
|
|
495
608
|
|
|
496
609
|
|
|
610
|
+
|
|
497
611
|
async def enrich_with_provider(
|
|
498
612
|
cloned_properties: Dict[str, Any],
|
|
499
613
|
tool_config: Optional[List[Dict[str, Any]]],
|
|
@@ -651,16 +765,32 @@ async def enrich_organization_info_from_company_url(
|
|
|
651
765
|
organization_linkedin_url: str,
|
|
652
766
|
use_strict_check: bool = True,
|
|
653
767
|
tool_config: Optional[List[Dict[str, Any]]] = None,
|
|
768
|
+
categories: Optional[bool] = None,
|
|
769
|
+
funding_data: Optional[bool] = None,
|
|
770
|
+
exit_data: Optional[bool] = None,
|
|
771
|
+
acquisitions: Optional[bool] = None,
|
|
772
|
+
extra: Optional[bool] = None,
|
|
773
|
+
use_cache: Optional[str] = "if-present",
|
|
774
|
+
fallback_to_cache: Optional[str] = "on-error",
|
|
654
775
|
) -> Dict[str, Any]:
|
|
655
776
|
"""
|
|
656
777
|
Given an organization LinkedIn URL, attempt to enrich its data (e.g. name, website)
|
|
657
|
-
via ProxyCurl.
|
|
778
|
+
via ProxyCurl. Additional Proxycurl Company API boolean flags (categories, funding_data, etc.)
|
|
779
|
+
can be supplied to control the returned payload (True -> "include"). If data is found,
|
|
780
|
+
set domain, then return the dict. Otherwise, return {}.
|
|
658
781
|
"""
|
|
659
782
|
|
|
660
783
|
# Call ProxyCurl to enrich
|
|
661
784
|
company_data = await enrich_organization_info_from_proxycurl(
|
|
662
785
|
organization_linkedin_url=organization_linkedin_url,
|
|
663
|
-
tool_config=tool_config
|
|
786
|
+
tool_config=tool_config,
|
|
787
|
+
categories=categories,
|
|
788
|
+
funding_data=funding_data,
|
|
789
|
+
exit_data=exit_data,
|
|
790
|
+
acquisitions=acquisitions,
|
|
791
|
+
extra=extra,
|
|
792
|
+
use_cache=use_cache,
|
|
793
|
+
fallback_to_cache=fallback_to_cache,
|
|
664
794
|
)
|
|
665
795
|
|
|
666
796
|
# If ProxyCurl returned any data, set domain, then return
|
|
@@ -698,7 +828,7 @@ async def enrich_organization_info_from_job_url(
|
|
|
698
828
|
job_info = await enrich_job_info_from_proxycurl(
|
|
699
829
|
normalized_job_url, tool_config=tool_config
|
|
700
830
|
)
|
|
701
|
-
except Exception
|
|
831
|
+
except Exception:
|
|
702
832
|
logger.exception("Exception occurred while fetching job info from Proxycurl.")
|
|
703
833
|
return {}
|
|
704
834
|
|
|
@@ -789,7 +919,7 @@ async def get_company_domain_from_llm_web_search(
|
|
|
789
919
|
response, status = await get_structured_output_internal(
|
|
790
920
|
instructions,
|
|
791
921
|
CompanyInfoFromName,
|
|
792
|
-
model="gpt-
|
|
922
|
+
model="gpt-5.1-chat",
|
|
793
923
|
use_web_search=True,
|
|
794
924
|
tool_config=tool_config
|
|
795
925
|
)
|
|
@@ -798,6 +928,6 @@ async def get_company_domain_from_llm_web_search(
|
|
|
798
928
|
return response.model_dump()
|
|
799
929
|
else:
|
|
800
930
|
return {}
|
|
801
|
-
except Exception
|
|
931
|
+
except Exception:
|
|
802
932
|
logger.exception("Exception during get_company_domain_from_llm_web_search.")
|
|
803
933
|
return {}
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Unified OpenAI / Azure OpenAI helper (no env-fallback for secrets)
|
|
3
|
+
=================================================================
|
|
4
|
+
|
|
5
|
+
Resolution order
|
|
6
|
+
----------------
|
|
7
|
+
1. If `tool_config` has a **"openai"** block → public OpenAI
|
|
8
|
+
2. Else if it has an **"azure_openai"** block → Azure OpenAI
|
|
9
|
+
3. Otherwise → raise ValueError
|
|
10
|
+
|
|
11
|
+
`api_key` **and** `endpoint` (for Azure) must therefore be supplied in
|
|
12
|
+
`tool_config`. They will never be read from the host environment.
|
|
13
|
+
|
|
14
|
+
Optional:
|
|
15
|
+
• `AZURE_OPENAI_API_VERSION` – defaults to 2025-03-01-preview
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import os
|
|
21
|
+
from typing import Dict, List, Optional, Tuple, Union
|
|
22
|
+
|
|
23
|
+
from openai import AsyncOpenAI, OpenAI, AzureOpenAI, AsyncAzureOpenAI
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
27
|
+
# 1. Helpers: config parsing
|
|
28
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
29
|
+
|
|
30
|
+
def _extract_config(
|
|
31
|
+
tool_config: Optional[List[Dict]], provider_name: str
|
|
32
|
+
) -> Dict[str, str]:
|
|
33
|
+
"""Return the config map for the requested provider name, else {}."""
|
|
34
|
+
if not tool_config:
|
|
35
|
+
return {}
|
|
36
|
+
block = next((b for b in tool_config if b.get("name") == provider_name), {})
|
|
37
|
+
return {entry["name"]: entry["value"] for entry in block.get("configuration", []) if entry}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _discover_credentials(
|
|
41
|
+
tool_config: Optional[List[Dict]] = None,
|
|
42
|
+
) -> Tuple[str, str, Optional[str]]:
|
|
43
|
+
"""
|
|
44
|
+
Return (provider, api_key, endpoint_or_None).
|
|
45
|
+
|
|
46
|
+
provider ∈ {"public", "azure"}
|
|
47
|
+
"""
|
|
48
|
+
# 1️⃣ Public OpenAI
|
|
49
|
+
openai_cfg = _extract_config(tool_config, "openai")
|
|
50
|
+
if openai_cfg:
|
|
51
|
+
key = openai_cfg.get("apiKey")
|
|
52
|
+
if not key:
|
|
53
|
+
raise ValueError(
|
|
54
|
+
"OpenAI integration is not configured. Please configure the connection to OpenAI in Integrations."
|
|
55
|
+
)
|
|
56
|
+
return "public", key, None
|
|
57
|
+
|
|
58
|
+
# 2️⃣ Azure OpenAI
|
|
59
|
+
azure_cfg = _extract_config(tool_config, "azure_openai")
|
|
60
|
+
if azure_cfg:
|
|
61
|
+
key = azure_cfg.get("apiKey")
|
|
62
|
+
endpoint = azure_cfg.get("endpoint")
|
|
63
|
+
if not key or not endpoint:
|
|
64
|
+
raise ValueError(
|
|
65
|
+
"Azure OpenAI integration is not configured. Please configure the connection to Azure OpenAI in Integrations."
|
|
66
|
+
)
|
|
67
|
+
return "azure", key, endpoint
|
|
68
|
+
|
|
69
|
+
# 3️⃣ Neither block present → error
|
|
70
|
+
raise ValueError(
|
|
71
|
+
"OpenAI integration is not configured. Please configure the connection to OpenAI in Integrations."
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
76
|
+
# 2. Client factories
|
|
77
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
78
|
+
|
|
79
|
+
def _api_version() -> str:
|
|
80
|
+
"""Return the Azure API version (env-controlled, no secret)."""
|
|
81
|
+
return os.getenv("AZURE_OPENAI_API_VERSION", "2025-03-01-preview")
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def create_openai_client(
|
|
85
|
+
tool_config: Optional[List[Dict]] = None,
|
|
86
|
+
) -> Union[OpenAI, AzureOpenAI]:
|
|
87
|
+
"""
|
|
88
|
+
Return a *synchronous* client:
|
|
89
|
+
• openai.OpenAI – public service
|
|
90
|
+
• openai.AzureOpenAI – Azure
|
|
91
|
+
"""
|
|
92
|
+
provider, key, endpoint = _discover_credentials(tool_config)
|
|
93
|
+
|
|
94
|
+
if provider == "public":
|
|
95
|
+
return OpenAI(api_key=key)
|
|
96
|
+
|
|
97
|
+
# Azure
|
|
98
|
+
return AzureOpenAI(api_key=key, azure_endpoint=endpoint, api_version=_api_version())
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def create_async_openai_client(
|
|
102
|
+
tool_config: Optional[List[Dict]] = None,
|
|
103
|
+
) -> AsyncOpenAI:
|
|
104
|
+
"""
|
|
105
|
+
Return an *async* client (AsyncOpenAI).
|
|
106
|
+
|
|
107
|
+
For Azure we pass both `azure_endpoint` and `api_version`.
|
|
108
|
+
"""
|
|
109
|
+
provider, key, endpoint = _discover_credentials(tool_config)
|
|
110
|
+
|
|
111
|
+
if provider == "public":
|
|
112
|
+
return AsyncOpenAI(api_key=key)
|
|
113
|
+
|
|
114
|
+
return AsyncAzureOpenAI(
|
|
115
|
+
api_key=key,
|
|
116
|
+
azure_endpoint=endpoint,
|
|
117
|
+
api_version=_api_version(),
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
123
|
+
# 3. Convenience helper (legacy)
|
|
124
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
125
|
+
|
|
126
|
+
def get_openai_access_token(tool_config: Optional[List[Dict]] = None) -> str:
|
|
127
|
+
"""Return just the API key (legacy helper)."""
|
|
128
|
+
_, key, _ = _discover_credentials(tool_config)
|
|
129
|
+
return key
|
dhisana/utils/g2_tools.py
CHANGED