dhisana 0.0.1.dev277__tar.gz → 0.0.1.dev279__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/PKG-INFO +1 -1
  2. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/setup.py +1 -1
  3. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/apollo_tools.py +405 -9
  4. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/enrich_lead_information.py +82 -16
  5. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/test_connect.py +197 -0
  6. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana.egg-info/PKG-INFO +1 -1
  7. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/README.md +0 -0
  8. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/pyproject.toml +0 -0
  9. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/setup.cfg +0 -0
  10. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/__init__.py +0 -0
  11. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/cli/__init__.py +0 -0
  12. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/cli/cli.py +0 -0
  13. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/cli/datasets.py +0 -0
  14. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/cli/models.py +0 -0
  15. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/cli/predictions.py +0 -0
  16. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/schemas/__init__.py +0 -0
  17. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/schemas/common.py +0 -0
  18. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/schemas/sales.py +0 -0
  19. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/ui/__init__.py +0 -0
  20. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/ui/components.py +0 -0
  21. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/__init__.py +0 -0
  22. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/add_mapping.py +0 -0
  23. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/agent_tools.py +0 -0
  24. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/assistant_tool_tag.py +0 -0
  25. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/built_with_api_tools.py +0 -0
  26. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/cache_output_tools.py +0 -0
  27. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/cache_output_tools_local.py +0 -0
  28. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/check_email_validity_tools.py +0 -0
  29. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/check_for_intent_signal.py +0 -0
  30. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/check_linkedin_url_validity.py +0 -0
  31. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/clay_tools.py +0 -0
  32. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/clean_properties.py +0 -0
  33. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/company_utils.py +0 -0
  34. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/compose_salesnav_query.py +0 -0
  35. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/compose_search_query.py +0 -0
  36. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/compose_three_step_workflow.py +0 -0
  37. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/composite_tools.py +0 -0
  38. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/dataframe_tools.py +0 -0
  39. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/domain_parser.py +0 -0
  40. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/email_body_utils.py +0 -0
  41. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/email_parse_helpers.py +0 -0
  42. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/email_provider.py +0 -0
  43. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/extract_email_content_for_llm.py +0 -0
  44. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/fetch_openai_config.py +0 -0
  45. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/field_validators.py +0 -0
  46. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/g2_tools.py +0 -0
  47. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/generate_content.py +0 -0
  48. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/generate_custom_message.py +0 -0
  49. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/generate_email.py +0 -0
  50. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/generate_email_response.py +0 -0
  51. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/generate_flow.py +0 -0
  52. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/generate_leads_salesnav.py +0 -0
  53. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/generate_linkedin_connect_message.py +0 -0
  54. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/generate_linkedin_response_message.py +0 -0
  55. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/generate_structured_output_internal.py +0 -0
  56. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/google_custom_search.py +0 -0
  57. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/google_oauth_tools.py +0 -0
  58. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/google_workspace_tools.py +0 -0
  59. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/hubspot_clearbit.py +0 -0
  60. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/hubspot_crm_tools.py +0 -0
  61. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/instantly_tools.py +0 -0
  62. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/linkedin_crawler.py +0 -0
  63. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/lusha_tools.py +0 -0
  64. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/mailgun_tools.py +0 -0
  65. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/mailreach_tools.py +0 -0
  66. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/microsoft365_tools.py +0 -0
  67. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/openai_assistant_and_file_utils.py +0 -0
  68. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/openai_helpers.py +0 -0
  69. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/openapi_spec_to_tools.py +0 -0
  70. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/openapi_tool/__init__.py +0 -0
  71. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/openapi_tool/api_models.py +0 -0
  72. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/openapi_tool/convert_openai_spec_to_tool.py +0 -0
  73. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/openapi_tool/openapi_tool.py +0 -0
  74. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/parse_linkedin_messages_txt.py +0 -0
  75. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/profile.py +0 -0
  76. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/proxy_curl_tools.py +0 -0
  77. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/proxycurl_search_leads.py +0 -0
  78. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/python_function_to_tools.py +0 -0
  79. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/research_lead.py +0 -0
  80. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/sales_navigator_crawler.py +0 -0
  81. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/salesforce_crm_tools.py +0 -0
  82. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/search_router.py +0 -0
  83. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/search_router_jobs.py +0 -0
  84. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/sendgrid_tools.py +0 -0
  85. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/serarch_router_local_business.py +0 -0
  86. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/serpapi_additional_tools.py +0 -0
  87. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/serpapi_google_jobs.py +0 -0
  88. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/serpapi_google_search.py +0 -0
  89. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/serpapi_local_business_search.py +0 -0
  90. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/serpapi_search_tools.py +0 -0
  91. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/serperdev_google_jobs.py +0 -0
  92. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/serperdev_local_business.py +0 -0
  93. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/serperdev_search.py +0 -0
  94. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/smtp_email_tools.py +0 -0
  95. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/trasform_json.py +0 -0
  96. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/web_download_parse_tools.py +0 -0
  97. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/workflow_code_model.py +0 -0
  98. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/utils/zoominfo_tools.py +0 -0
  99. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/workflow/__init__.py +0 -0
  100. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/workflow/agent.py +0 -0
  101. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/workflow/flow.py +0 -0
  102. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/workflow/task.py +0 -0
  103. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana/workflow/test.py +0 -0
  104. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana.egg-info/SOURCES.txt +0 -0
  105. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana.egg-info/dependency_links.txt +0 -0
  106. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana.egg-info/entry_points.txt +0 -0
  107. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana.egg-info/requires.txt +0 -0
  108. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/src/dhisana.egg-info/top_level.txt +0 -0
  109. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/tests/test_agent_tools.py +0 -0
  110. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/tests/test_apollo_company_search.py +0 -0
  111. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/tests/test_apollo_lead_search.py +0 -0
  112. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/tests/test_connectivity.py +0 -0
  113. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/tests/test_email_body_utils.py +0 -0
  114. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/tests/test_google_document.py +0 -0
  115. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/tests/test_hubspot_call_logs.py +0 -0
  116. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/tests/test_linkedin_serper.py +0 -0
  117. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/tests/test_mailreach.py +0 -0
  118. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/tests/test_mcp_connectivity.py +0 -0
  119. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/tests/test_proxycurl_get_company_search_id.py +0 -0
  120. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/tests/test_proxycurl_job_count.py +0 -0
  121. {dhisana-0.0.1.dev277 → dhisana-0.0.1.dev279}/tests/test_structured_output_with_mcp.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dhisana
3
- Version: 0.0.1.dev277
3
+ Version: 0.0.1.dev279
4
4
  Summary: A Python SDK for Dhisana AI Platform
5
5
  Home-page: https://github.com/dhisana-ai/dhisana-python-sdk
6
6
  Author: Admin
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name='dhisana',
5
- version='0.0.1-dev277',
5
+ version='0.0.1-dev279',
6
6
  description='A Python SDK for Dhisana AI Platform',
7
7
  author='Admin',
8
8
  author_email='contact@dhisana.ai',
@@ -1345,7 +1345,14 @@ def fill_in_company_properties(company_data: dict) -> dict:
1345
1345
  if annual_revenue is None:
1346
1346
  annual_revenue = _parse_compact_number(company_data.get("organization_revenue_printed"))
1347
1347
 
1348
- company_size = company_data.get("estimated_num_employees")
1348
+ # Try multiple fields for company size/employee count
1349
+ company_size = (
1350
+ company_data.get("estimated_num_employees")
1351
+ or company_data.get("num_employees")
1352
+ or company_data.get("employee_count")
1353
+ or company_data.get("employees_count")
1354
+ or company_data.get("headcount")
1355
+ )
1349
1356
  if company_size is not None:
1350
1357
  try:
1351
1358
  company_size = int(company_size)
@@ -1373,10 +1380,14 @@ def fill_in_company_properties(company_data: dict) -> dict:
1373
1380
  or company_data.get("sanitized_phone")
1374
1381
  )
1375
1382
 
1383
+ # Try multiple fields for industry
1376
1384
  industry = company_data.get("industry")
1377
1385
  if not industry and isinstance(company_data.get("industries"), list):
1378
1386
  industries = [str(x).strip() for x in company_data["industries"] if str(x).strip()]
1379
1387
  industry = industries[0] if industries else None
1388
+ # Some Apollo responses have industry_tag_id but not industry name
1389
+ if not industry and company_data.get("industry_tag_id"):
1390
+ industry = company_data.get("industry_tag_id")
1380
1391
 
1381
1392
  billing_street = (
1382
1393
  company_data.get("street_address")
@@ -1385,19 +1396,42 @@ def fill_in_company_properties(company_data: dict) -> dict:
1385
1396
  or company_data.get("raw_address")
1386
1397
  )
1387
1398
 
1399
+ # Determine ownership from publicly traded info
1400
+ ownership = company_data.get("ownership")
1401
+ if not ownership:
1402
+ if company_data.get("publicly_traded_symbol") or company_data.get("publicly_traded_exchange"):
1403
+ ownership = "public"
1404
+
1405
+ # Parse market cap
1406
+ market_cap = _parse_compact_number(company_data.get("market_cap"))
1407
+
1408
+ # Build account dictionary with ProxyCurl-compatible field names
1388
1409
  account: Dict[str, Any] = {
1389
- "name": company_data.get("name"),
1390
- "domain": company_data.get("primary_domain"),
1391
- "website": company_data.get("website_url"),
1410
+ # Primary identifiers - use ProxyCurl-compatible names
1411
+ "name": company_data.get("name"), # Keep for backward compatibility
1412
+ "organization_name": company_data.get("name"), # ProxyCurl-compatible
1413
+ "domain": company_data.get("primary_domain"), # Keep for backward compatibility
1414
+ "primary_domain_of_organization": company_data.get("primary_domain"), # ProxyCurl-compatible
1415
+ "website": company_data.get("website_url"), # Keep for backward compatibility
1416
+ "organization_website": company_data.get("website_url"), # ProxyCurl-compatible
1417
+ "organization_linkedin_url": company_data.get("linkedin_url"),
1418
+
1419
+ # Contact info
1392
1420
  "phone": phone,
1393
1421
  "fax": company_data.get("fax") or company_data.get("fax_number"),
1394
- "industry": industry,
1395
- "company_size": company_size,
1422
+
1423
+ # Business details - use ProxyCurl-compatible names
1424
+ "industry": industry, # Keep for backward compatibility
1425
+ "organization_industry": industry, # ProxyCurl-compatible
1426
+ "company_size": company_size, # Keep for backward compatibility
1427
+ "organization_size": company_size, # ProxyCurl-compatible
1396
1428
  "founded_year": founded_year,
1397
1429
  "annual_revenue": annual_revenue,
1398
1430
  "type": company_data.get("type") or company_data.get("organization_type"),
1399
- "ownership": company_data.get("ownership"),
1400
- "organization_linkedin_url": company_data.get("linkedin_url"),
1431
+ "ownership": ownership,
1432
+ "description": company_data.get("description") or company_data.get("short_description"),
1433
+
1434
+ # Address info
1401
1435
  "billing_street": billing_street,
1402
1436
  "billing_city": company_data.get("city"),
1403
1437
  "billing_state": company_data.get("state"),
@@ -1405,20 +1439,44 @@ def fill_in_company_properties(company_data: dict) -> dict:
1405
1439
  or company_data.get("zip")
1406
1440
  or company_data.get("zipcode"),
1407
1441
  "billing_country": company_data.get("country"),
1408
- "description": company_data.get("description"),
1442
+
1443
+ # Build organization_hq_location like ProxyCurl does
1444
+ "organization_hq_location": ", ".join(filter(None, [
1445
+ company_data.get("city"),
1446
+ company_data.get("state"),
1447
+ company_data.get("country")
1448
+ ])) or None,
1449
+
1450
+ # Other fields
1409
1451
  "keywords": _parse_keywords(company_data.get("keywords")),
1410
1452
  "tags": [],
1411
1453
  "notes": [],
1412
1454
  "additional_properties": {
1413
1455
  "apollo_organization_id": company_data.get("id"),
1456
+ "logo_url": company_data.get("logo_url"),
1414
1457
  "facebook_url": company_data.get("facebook_url"),
1415
1458
  "twitter_url": company_data.get("twitter_url"),
1459
+ "angellist_url": company_data.get("angellist_url"),
1460
+ "crunchbase_url": company_data.get("crunchbase_url"),
1461
+ "blog_url": company_data.get("blog_url"),
1416
1462
  "funding_stage": company_data.get("latest_funding_stage"),
1417
1463
  "total_funding": company_data.get("total_funding"),
1418
1464
  "technology_names": company_data.get("technology_names"),
1419
1465
  "primary_phone": primary_phone if isinstance(primary_phone, dict) else None,
1420
1466
  "raw_address": company_data.get("raw_address"),
1421
1467
  "organization_revenue_printed": company_data.get("organization_revenue_printed"),
1468
+ "publicly_traded_symbol": company_data.get("publicly_traded_symbol"),
1469
+ "publicly_traded_exchange": company_data.get("publicly_traded_exchange"),
1470
+ "market_cap": market_cap,
1471
+ "market_cap_printed": company_data.get("market_cap"),
1472
+ "sic_codes": company_data.get("sic_codes"),
1473
+ "naics_codes": company_data.get("naics_codes"),
1474
+ "languages": company_data.get("languages"),
1475
+ "alexa_ranking": company_data.get("alexa_ranking"),
1476
+ "linkedin_uid": company_data.get("linkedin_uid"),
1477
+ "headcount_6_month_growth": company_data.get("organization_headcount_six_month_growth"),
1478
+ "headcount_12_month_growth": company_data.get("organization_headcount_twelve_month_growth"),
1479
+ "headcount_24_month_growth": company_data.get("organization_headcount_twenty_four_month_growth"),
1422
1480
  "apollo_organization_data": json.dumps(cleanup_properties(company_data)),
1423
1481
  },
1424
1482
  "research_summary": None,
@@ -1726,3 +1784,341 @@ async def search_companies_with_apollo_page(
1726
1784
  "next_page": current_page + 1 if has_next_page else None,
1727
1785
  "results": companies
1728
1786
  }
1787
+
1788
+
1789
+ def _extract_domain_from_url(url: str) -> Optional[str]:
1790
+ """
1791
+ Extract domain from a URL.
1792
+
1793
+ Args:
1794
+ url: The URL to extract domain from
1795
+
1796
+ Returns:
1797
+ The extracted domain or None if extraction fails
1798
+ """
1799
+ if not url:
1800
+ return None
1801
+
1802
+ try:
1803
+ # Handle URLs without scheme
1804
+ if not url.startswith(('http://', 'https://')):
1805
+ url = 'https://' + url
1806
+
1807
+ parsed = urlparse(url)
1808
+ domain = parsed.netloc or parsed.path.split('/')[0]
1809
+
1810
+ # Remove www. prefix if present
1811
+ if domain.startswith('www.'):
1812
+ domain = domain[4:]
1813
+
1814
+ return domain if domain else None
1815
+ except Exception:
1816
+ return None
1817
+
1818
+
1819
+ def _extract_linkedin_company_identifier(linkedin_url: str) -> Optional[str]:
1820
+ """
1821
+ Extract the company identifier from a LinkedIn company URL.
1822
+
1823
+ Args:
1824
+ linkedin_url: LinkedIn company URL (e.g., https://www.linkedin.com/company/microsoft)
1825
+
1826
+ Returns:
1827
+ The company identifier (e.g., 'microsoft') or None if extraction fails
1828
+ """
1829
+ if not linkedin_url:
1830
+ return None
1831
+
1832
+ try:
1833
+ # Normalize the URL
1834
+ url = linkedin_url.strip().rstrip('/')
1835
+
1836
+ # Handle various LinkedIn URL formats
1837
+ # https://www.linkedin.com/company/microsoft
1838
+ # https://linkedin.com/company/microsoft/
1839
+ # linkedin.com/company/microsoft
1840
+
1841
+ if not url.startswith(('http://', 'https://')):
1842
+ url = 'https://' + url
1843
+
1844
+ parsed = urlparse(url)
1845
+ path_parts = [p for p in parsed.path.split('/') if p]
1846
+
1847
+ # Look for 'company' in path and get the next segment
1848
+ if 'company' in path_parts:
1849
+ company_idx = path_parts.index('company')
1850
+ if company_idx + 1 < len(path_parts):
1851
+ return path_parts[company_idx + 1]
1852
+
1853
+ return None
1854
+ except Exception:
1855
+ return None
1856
+
1857
+
1858
+ @assistant_tool
1859
+ @backoff.on_exception(
1860
+ backoff.expo,
1861
+ aiohttp.ClientResponseError,
1862
+ max_tries=2,
1863
+ giveup=lambda e: e.status != 429,
1864
+ factor=10,
1865
+ )
1866
+ async def search_organization_by_linkedin_or_domain(
1867
+ linkedin_url: Optional[str] = None,
1868
+ domain: Optional[str] = None,
1869
+ tool_config: Optional[List[Dict]] = None,
1870
+ ) -> Dict[str, Any]:
1871
+ """
1872
+ Search for an organization in Apollo using LinkedIn URL or domain and return
1873
+ standardized organization information.
1874
+
1875
+ This function uses Apollo's mixed_companies/search endpoint to find companies
1876
+ by their LinkedIn URL or domain, then transforms the result into a standardized
1877
+ organization information format.
1878
+
1879
+ Parameters:
1880
+ - **linkedin_url** (*str*, optional): LinkedIn company URL
1881
+ (e.g., https://www.linkedin.com/company/microsoft)
1882
+ - **domain** (*str*, optional): Company domain (e.g., microsoft.com)
1883
+
1884
+ At least one of linkedin_url or domain must be provided.
1885
+
1886
+ Returns:
1887
+ - **dict**: Standardized organization information containing:
1888
+ - name: Company name
1889
+ - domain: Primary domain
1890
+ - website: Company website URL
1891
+ - phone: Primary phone number
1892
+ - industry: Primary industry
1893
+ - company_size: Number of employees
1894
+ - founded_year: Year company was founded
1895
+ - annual_revenue: Annual revenue
1896
+ - organization_linkedin_url: LinkedIn company URL
1897
+ - billing_street, billing_city, billing_state, billing_zip, billing_country: Address info
1898
+ - description: Company description
1899
+ - keywords: List of keywords/tags
1900
+ - additional_properties: Additional Apollo-specific data
1901
+ - error: Error message if search fails
1902
+ """
1903
+ logger.info("Entering search_organization_by_linkedin_or_domain")
1904
+
1905
+ if not linkedin_url and not domain:
1906
+ logger.warning("No linkedin_url or domain provided. At least one is required.")
1907
+ return {'error': "At least one of linkedin_url or domain must be provided"}
1908
+
1909
+ token, is_oauth = get_apollo_access_token(tool_config)
1910
+
1911
+ headers = {
1912
+ "Content-Type": "application/json",
1913
+ "Cache-Control": "no-cache",
1914
+ }
1915
+ if is_oauth:
1916
+ headers["Authorization"] = f"Bearer {token}"
1917
+ else:
1918
+ headers["X-Api-Key"] = token
1919
+
1920
+ # Build the search payload
1921
+ payload: Dict[str, Any] = {
1922
+ "page": 1,
1923
+ "per_page": 25, # Get more results to improve matching
1924
+ }
1925
+
1926
+ # Add LinkedIn URL filter if provided
1927
+ if linkedin_url:
1928
+ # Extract the company identifier for keyword search
1929
+ company_identifier = _extract_linkedin_company_identifier(linkedin_url)
1930
+
1931
+ # Normalize the LinkedIn URL for matching
1932
+ normalized_linkedin = linkedin_url.strip().rstrip('/')
1933
+ if not normalized_linkedin.startswith(('http://', 'https://')):
1934
+ normalized_linkedin = 'https://' + normalized_linkedin
1935
+
1936
+ # Use q_organization_name for better search results
1937
+ # The company identifier from LinkedIn URL is usually the company name
1938
+ if company_identifier:
1939
+ payload["q_organization_name"] = company_identifier
1940
+
1941
+ # Add domain filter if provided
1942
+ if domain:
1943
+ # Clean the domain (remove http://, https://, www., etc.)
1944
+ clean_domain = _extract_domain_from_url(domain) or domain
1945
+ payload["q_organization_domains_list"] = [clean_domain]
1946
+
1947
+ url = "https://api.apollo.io/api/v1/mixed_companies/search"
1948
+ logger.debug(f"Making POST request to Apollo organization search with payload: {json.dumps(payload, indent=2)}")
1949
+
1950
+ async with aiohttp.ClientSession() as session:
1951
+ try:
1952
+ async with session.post(url, headers=headers, json=payload) as response:
1953
+ logger.debug(f"Received response status: {response.status}")
1954
+
1955
+ if response.status == 200:
1956
+ result = await response.json()
1957
+
1958
+ # Extract organizations from response
1959
+ organizations = result.get("organizations", [])
1960
+ accounts = result.get("accounts", [])
1961
+ all_results = organizations + accounts
1962
+
1963
+ if not all_results:
1964
+ logger.info("No organizations found matching the criteria.")
1965
+ return {
1966
+ 'error': 'No organizations found matching the provided criteria',
1967
+ 'search_criteria': {
1968
+ 'linkedin_url': linkedin_url,
1969
+ 'domain': domain
1970
+ }
1971
+ }
1972
+
1973
+ # Get the best matching organization with confidence tracking
1974
+ best_match = None
1975
+ match_confidence = None
1976
+ match_reason = None
1977
+
1978
+ # If we have a domain, try to find exact match first (highest confidence)
1979
+ if domain:
1980
+ clean_domain = _extract_domain_from_url(domain) or domain
1981
+ for org in all_results:
1982
+ org_domain = org.get("primary_domain", "")
1983
+ if org_domain and org_domain.lower() == clean_domain.lower():
1984
+ best_match = org
1985
+ match_confidence = "high"
1986
+ match_reason = f"exact_domain_match: {org_domain}"
1987
+ logger.info(f"Found exact domain match: {org.get('name')} with domain {org_domain}")
1988
+ break
1989
+
1990
+ # If we have LinkedIn URL, try to find exact match
1991
+ if not best_match and linkedin_url:
1992
+ # Extract company identifier from the input URL
1993
+ input_company_id = _extract_linkedin_company_identifier(linkedin_url)
1994
+
1995
+ for org in all_results:
1996
+ org_linkedin = org.get("linkedin_url", "")
1997
+ org_name = org.get("name", "").lower()
1998
+ org_domain = org.get("primary_domain", "")
1999
+
2000
+ if org_linkedin:
2001
+ # Extract company identifier from org's LinkedIn URL
2002
+ org_company_id = _extract_linkedin_company_identifier(org_linkedin)
2003
+
2004
+ # Match by company identifier (e.g., 'walmart' matches 'walmart')
2005
+ if input_company_id and org_company_id:
2006
+ if input_company_id.lower() == org_company_id.lower():
2007
+ best_match = org
2008
+ match_confidence = "high"
2009
+ match_reason = f"linkedin_identifier_match: {org_company_id}"
2010
+ logger.info(f"Found LinkedIn identifier match: {org.get('name')} with identifier {org_company_id}")
2011
+ break
2012
+
2013
+ # Also try direct URL comparison
2014
+ normalized_input = linkedin_url.lower().rstrip('/').replace('www.', '')
2015
+ normalized_org = org_linkedin.lower().rstrip('/').replace('www.', '')
2016
+ if normalized_input in normalized_org or normalized_org in normalized_input:
2017
+ best_match = org
2018
+ match_confidence = "high"
2019
+ match_reason = f"linkedin_url_match: {org_linkedin}"
2020
+ logger.info(f"Found LinkedIn URL match: {org.get('name')}")
2021
+ break
2022
+
2023
+ # Secondary match: company name contains the identifier
2024
+ if not best_match and input_company_id:
2025
+ # Check if the org name contains the identifier or vice versa
2026
+ input_id_lower = input_company_id.lower().replace('-', ' ').replace('_', ' ')
2027
+ org_name_normalized = org_name.replace('-', ' ').replace('_', ' ')
2028
+
2029
+ if input_id_lower == org_name_normalized or input_id_lower in org_name_normalized:
2030
+ best_match = org
2031
+ match_confidence = "medium"
2032
+ match_reason = f"name_contains_identifier: {org_name}"
2033
+ logger.info(f"Found name match: {org.get('name')} matches identifier {input_company_id}")
2034
+ break
2035
+
2036
+ # If still no match and we searched by LinkedIn, return error if no exact match found
2037
+ if not best_match and linkedin_url and not domain:
2038
+ input_company_id = _extract_linkedin_company_identifier(linkedin_url)
2039
+ logger.warning(f"No organization found matching LinkedIn URL: {linkedin_url}")
2040
+ # Log what we did find for debugging
2041
+ found_orgs = [{"name": org.get("name"), "linkedin": org.get("linkedin_url"), "domain": org.get("primary_domain")} for org in all_results[:5]]
2042
+ logger.debug(f"Found organizations (first 5): {found_orgs}")
2043
+ return {
2044
+ 'error': f'No organization found matching LinkedIn company: {input_company_id or linkedin_url}',
2045
+ 'search_criteria': {
2046
+ 'linkedin_url': linkedin_url,
2047
+ 'domain': domain
2048
+ },
2049
+ 'total_results_returned': len(all_results)
2050
+ }
2051
+
2052
+ # Fall back to first result only if we have other criteria (domain was provided)
2053
+ if not best_match:
2054
+ best_match = all_results[0]
2055
+ match_confidence = "low"
2056
+ match_reason = "fallback_to_first_result"
2057
+ logger.warning(f"Using fallback match (first result): {best_match.get('name')}")
2058
+
2059
+ # Get the organization ID to fetch full details
2060
+ organization_id = best_match.get("id")
2061
+ full_org_details = best_match # Default to search result
2062
+
2063
+ # Fetch full organization details using the organization ID
2064
+ if organization_id:
2065
+ logger.info(f"Fetching full organization details for ID: {organization_id}")
2066
+ try:
2067
+ full_details = await get_organization_details_from_apollo(
2068
+ organization_id=organization_id,
2069
+ tool_config=tool_config,
2070
+ )
2071
+ if full_details and not full_details.get("error"):
2072
+ # Merge the full details with the search result
2073
+ # Full details from organization endpoint has more data
2074
+ full_org_details = full_details
2075
+ logger.info(f"Successfully fetched full organization details for {full_org_details.get('name')}")
2076
+ else:
2077
+ logger.warning(f"Could not fetch full organization details: {full_details.get('error', 'Unknown error')}")
2078
+ except Exception as e:
2079
+ logger.warning(f"Error fetching full organization details: {e}")
2080
+
2081
+ # Transform to standardized format using the full details
2082
+ standardized_org = fill_in_company_properties(full_org_details)
2083
+
2084
+ # Add logo_url to additional_properties if available
2085
+ if full_org_details.get("logo_url"):
2086
+ standardized_org["additional_properties"]["logo_url"] = full_org_details.get("logo_url")
2087
+
2088
+ # Add search metadata
2089
+ standardized_org['search_criteria'] = {
2090
+ 'linkedin_url': linkedin_url,
2091
+ 'domain': domain
2092
+ }
2093
+ standardized_org['total_matches_found'] = len(all_results)
2094
+ standardized_org['match_confidence'] = match_confidence
2095
+ standardized_org['match_reason'] = match_reason
2096
+
2097
+ # Log the matched organization details for verification
2098
+ logger.info(f"Successfully found organization: {standardized_org.get('name')} "
2099
+ f"(domain: {standardized_org.get('domain')}, "
2100
+ f"linkedin: {standardized_org.get('organization_linkedin_url')}, "
2101
+ f"confidence: {match_confidence})")
2102
+ return standardized_org
2103
+
2104
+ elif response.status == 429:
2105
+ msg = "Rate limit exceeded"
2106
+ logger.warning(msg)
2107
+ await asyncio.sleep(30)
2108
+ raise aiohttp.ClientResponseError(
2109
+ request_info=response.request_info,
2110
+ history=response.history,
2111
+ status=response.status,
2112
+ message=msg,
2113
+ headers=response.headers
2114
+ )
2115
+ else:
2116
+ result = await response.json()
2117
+ logger.warning(f"search_organization_by_linkedin_or_domain error: {result}")
2118
+ return {'error': result}
2119
+
2120
+ except aiohttp.ClientResponseError:
2121
+ raise
2122
+ except Exception as e:
2123
+ logger.exception("Exception occurred while searching for organization in Apollo.")
2124
+ return {'error': str(e)}
@@ -22,7 +22,7 @@ from dhisana.utils.field_validators import (
22
22
  validation_organization_domain,
23
23
  validate_website_url
24
24
  )
25
- from dhisana.utils.apollo_tools import enrich_user_info_with_apollo, enrich_person_info_from_apollo
25
+ from dhisana.utils.apollo_tools import enrich_user_info_with_apollo, enrich_person_info_from_apollo, search_organization_by_linkedin_or_domain
26
26
  from dhisana.utils.assistant_tool_tag import assistant_tool
27
27
  from dhisana.utils.domain_parser import get_domain_from_website, is_excluded_domain
28
28
  from dhisana.utils.generate_structured_output_internal import get_structured_output_internal
@@ -804,25 +804,91 @@ async def enrich_organization_info_from_company_url(
804
804
  ) -> Dict[str, Any]:
805
805
  """
806
806
  Given an organization LinkedIn URL, attempt to enrich its data (e.g. name, website)
807
- via ProxyCurl. Additional Proxycurl Company API boolean flags (categories, funding_data, etc.)
807
+ first via Apollo API, then fallback to ProxyCurl if Apollo doesn't return results.
808
+ Additional Proxycurl Company API boolean flags (categories, funding_data, etc.)
808
809
  can be supplied to control the returned payload (True -> "include"). If data is found,
809
810
  set domain, then return the dict. Otherwise, return {}.
810
811
  """
812
+ company_data = None
813
+ apollo_website = None
814
+ apollo_domain = None
811
815
 
812
- # Call ProxyCurl to enrich
813
- company_data = await enrich_organization_info_from_proxycurl(
814
- organization_linkedin_url=organization_linkedin_url,
815
- tool_config=tool_config,
816
- categories=categories,
817
- funding_data=funding_data,
818
- exit_data=exit_data,
819
- acquisitions=acquisitions,
820
- extra=extra,
821
- use_cache=use_cache,
822
- fallback_to_cache=fallback_to_cache,
823
- )
824
-
825
- # If ProxyCurl returned any data, set domain, then return
816
+ # First, try Apollo API to get company information
817
+ try:
818
+ logger.debug(f"Attempting Apollo lookup for organization LinkedIn URL: {organization_linkedin_url}")
819
+ apollo_result = await search_organization_by_linkedin_or_domain(
820
+ linkedin_url=organization_linkedin_url,
821
+ tool_config=tool_config,
822
+ )
823
+ if apollo_result and not apollo_result.get("error"):
824
+ logger.debug(f"Apollo returned company data: {apollo_result.get('organization_name')}")
825
+ # Store Apollo's website and domain for later use
826
+ apollo_website = apollo_result.get("organization_website")
827
+ apollo_domain = apollo_result.get("primary_domain_of_organization")
828
+
829
+ # If Apollo returned valid data, use it directly
830
+ # Apollo now returns ProxyCurl-compatible field names
831
+ if apollo_result.get("organization_name"):
832
+ company_data = {
833
+ # Primary identifiers
834
+ "organization_name": apollo_result.get("organization_name", ""),
835
+ "organization_linkedin_url": apollo_result.get("organization_linkedin_url", organization_linkedin_url),
836
+ "organization_website": apollo_result.get("organization_website", ""),
837
+ "primary_domain_of_organization": apollo_result.get("primary_domain_of_organization", ""),
838
+
839
+ # Contact info
840
+ "phone": apollo_result.get("phone", ""),
841
+ "fax": apollo_result.get("fax", ""),
842
+
843
+ # Business details - use ProxyCurl-compatible names
844
+ "organization_industry": apollo_result.get("organization_industry", ""),
845
+ "industry": apollo_result.get("industry", ""), # Keep for backward compatibility
846
+ "organization_size": apollo_result.get("organization_size"),
847
+ "company_size": apollo_result.get("company_size"), # Keep for backward compatibility
848
+ "founded_year": apollo_result.get("founded_year"),
849
+ "annual_revenue": apollo_result.get("annual_revenue"),
850
+ "type": apollo_result.get("type", ""),
851
+ "ownership": apollo_result.get("ownership", ""),
852
+ "description": apollo_result.get("description", ""),
853
+
854
+ # Location info
855
+ "organization_hq_location": apollo_result.get("organization_hq_location", ""),
856
+ "billing_street": apollo_result.get("billing_street", ""),
857
+ "billing_city": apollo_result.get("billing_city", ""),
858
+ "billing_state": apollo_result.get("billing_state", ""),
859
+ "billing_zip": apollo_result.get("billing_zip", ""),
860
+ "billing_country": apollo_result.get("billing_country", ""),
861
+
862
+ # Other fields
863
+ "keywords": apollo_result.get("keywords", []),
864
+ "additional_properties": apollo_result.get("additional_properties", {}),
865
+ }
866
+ except Exception as e:
867
+ logger.warning(f"Apollo lookup failed for {organization_linkedin_url}: {e}")
868
+
869
+ # If Apollo didn't return data, fallback to ProxyCurl
870
+ if not company_data:
871
+ logger.debug(f"Falling back to ProxyCurl for organization LinkedIn URL: {organization_linkedin_url}")
872
+ company_data = await enrich_organization_info_from_proxycurl(
873
+ organization_linkedin_url=organization_linkedin_url,
874
+ tool_config=tool_config,
875
+ categories=categories,
876
+ funding_data=funding_data,
877
+ exit_data=exit_data,
878
+ acquisitions=acquisitions,
879
+ extra=extra,
880
+ use_cache=use_cache,
881
+ fallback_to_cache=fallback_to_cache,
882
+ )
883
+
884
+ # If ProxyCurl returned data but Apollo had better website/domain info, use Apollo's
885
+ if company_data and isinstance(company_data, dict):
886
+ if apollo_website and not company_data.get("organization_website"):
887
+ company_data["organization_website"] = apollo_website
888
+ if apollo_domain and not company_data.get("primary_domain_of_organization"):
889
+ company_data["primary_domain_of_organization"] = apollo_domain
890
+
891
+ # If we have company data, set domain and get research summary
826
892
  if company_data and isinstance(company_data, dict):
827
893
  await set_organization_domain(company_data, use_strict_check, tool_config)
828
894
  summary = await research_company_with_full_info_ai(company_data, "", tool_config=tool_config)