dhisana 0.0.1.dev276__tar.gz → 0.0.1.dev278__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/PKG-INFO +1 -1
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/setup.py +1 -1
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/apollo_tools.py +405 -9
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/enrich_lead_information.py +113 -18
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana.egg-info/PKG-INFO +1 -1
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/README.md +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/pyproject.toml +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/setup.cfg +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/__init__.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/cli/__init__.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/cli/cli.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/cli/datasets.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/cli/models.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/cli/predictions.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/schemas/__init__.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/schemas/common.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/schemas/sales.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/ui/__init__.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/ui/components.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/__init__.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/add_mapping.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/agent_tools.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/assistant_tool_tag.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/built_with_api_tools.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/cache_output_tools.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/cache_output_tools_local.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/check_email_validity_tools.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/check_for_intent_signal.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/check_linkedin_url_validity.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/clay_tools.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/clean_properties.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/company_utils.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/compose_salesnav_query.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/compose_search_query.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/compose_three_step_workflow.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/composite_tools.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/dataframe_tools.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/domain_parser.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/email_body_utils.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/email_parse_helpers.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/email_provider.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/extract_email_content_for_llm.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/fetch_openai_config.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/field_validators.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/g2_tools.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/generate_content.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/generate_custom_message.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/generate_email.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/generate_email_response.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/generate_flow.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/generate_leads_salesnav.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/generate_linkedin_connect_message.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/generate_linkedin_response_message.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/generate_structured_output_internal.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/google_custom_search.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/google_oauth_tools.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/google_workspace_tools.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/hubspot_clearbit.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/hubspot_crm_tools.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/instantly_tools.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/linkedin_crawler.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/lusha_tools.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/mailgun_tools.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/mailreach_tools.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/microsoft365_tools.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/openai_assistant_and_file_utils.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/openai_helpers.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/openapi_spec_to_tools.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/openapi_tool/__init__.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/openapi_tool/api_models.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/openapi_tool/convert_openai_spec_to_tool.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/openapi_tool/openapi_tool.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/parse_linkedin_messages_txt.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/profile.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/proxy_curl_tools.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/proxycurl_search_leads.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/python_function_to_tools.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/research_lead.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/sales_navigator_crawler.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/salesforce_crm_tools.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/search_router.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/search_router_jobs.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/sendgrid_tools.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/serarch_router_local_business.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/serpapi_additional_tools.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/serpapi_google_jobs.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/serpapi_google_search.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/serpapi_local_business_search.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/serpapi_search_tools.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/serperdev_google_jobs.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/serperdev_local_business.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/serperdev_search.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/smtp_email_tools.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/test_connect.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/trasform_json.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/web_download_parse_tools.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/workflow_code_model.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/zoominfo_tools.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/workflow/__init__.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/workflow/agent.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/workflow/flow.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/workflow/task.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/workflow/test.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana.egg-info/SOURCES.txt +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana.egg-info/dependency_links.txt +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana.egg-info/entry_points.txt +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana.egg-info/requires.txt +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana.egg-info/top_level.txt +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/tests/test_agent_tools.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/tests/test_apollo_company_search.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/tests/test_apollo_lead_search.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/tests/test_connectivity.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/tests/test_email_body_utils.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/tests/test_google_document.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/tests/test_hubspot_call_logs.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/tests/test_linkedin_serper.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/tests/test_mailreach.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/tests/test_mcp_connectivity.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/tests/test_proxycurl_get_company_search_id.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/tests/test_proxycurl_job_count.py +0 -0
- {dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/tests/test_structured_output_with_mcp.py +0 -0
|
@@ -1345,7 +1345,14 @@ def fill_in_company_properties(company_data: dict) -> dict:
|
|
|
1345
1345
|
if annual_revenue is None:
|
|
1346
1346
|
annual_revenue = _parse_compact_number(company_data.get("organization_revenue_printed"))
|
|
1347
1347
|
|
|
1348
|
-
|
|
1348
|
+
# Try multiple fields for company size/employee count
|
|
1349
|
+
company_size = (
|
|
1350
|
+
company_data.get("estimated_num_employees")
|
|
1351
|
+
or company_data.get("num_employees")
|
|
1352
|
+
or company_data.get("employee_count")
|
|
1353
|
+
or company_data.get("employees_count")
|
|
1354
|
+
or company_data.get("headcount")
|
|
1355
|
+
)
|
|
1349
1356
|
if company_size is not None:
|
|
1350
1357
|
try:
|
|
1351
1358
|
company_size = int(company_size)
|
|
@@ -1373,10 +1380,14 @@ def fill_in_company_properties(company_data: dict) -> dict:
|
|
|
1373
1380
|
or company_data.get("sanitized_phone")
|
|
1374
1381
|
)
|
|
1375
1382
|
|
|
1383
|
+
# Try multiple fields for industry
|
|
1376
1384
|
industry = company_data.get("industry")
|
|
1377
1385
|
if not industry and isinstance(company_data.get("industries"), list):
|
|
1378
1386
|
industries = [str(x).strip() for x in company_data["industries"] if str(x).strip()]
|
|
1379
1387
|
industry = industries[0] if industries else None
|
|
1388
|
+
# Some Apollo responses have industry_tag_id but not industry name
|
|
1389
|
+
if not industry and company_data.get("industry_tag_id"):
|
|
1390
|
+
industry = company_data.get("industry_tag_id")
|
|
1380
1391
|
|
|
1381
1392
|
billing_street = (
|
|
1382
1393
|
company_data.get("street_address")
|
|
@@ -1385,19 +1396,42 @@ def fill_in_company_properties(company_data: dict) -> dict:
|
|
|
1385
1396
|
or company_data.get("raw_address")
|
|
1386
1397
|
)
|
|
1387
1398
|
|
|
1399
|
+
# Determine ownership from publicly traded info
|
|
1400
|
+
ownership = company_data.get("ownership")
|
|
1401
|
+
if not ownership:
|
|
1402
|
+
if company_data.get("publicly_traded_symbol") or company_data.get("publicly_traded_exchange"):
|
|
1403
|
+
ownership = "public"
|
|
1404
|
+
|
|
1405
|
+
# Parse market cap
|
|
1406
|
+
market_cap = _parse_compact_number(company_data.get("market_cap"))
|
|
1407
|
+
|
|
1408
|
+
# Build account dictionary with ProxyCurl-compatible field names
|
|
1388
1409
|
account: Dict[str, Any] = {
|
|
1389
|
-
|
|
1390
|
-
"
|
|
1391
|
-
"
|
|
1410
|
+
# Primary identifiers - use ProxyCurl-compatible names
|
|
1411
|
+
"name": company_data.get("name"), # Keep for backward compatibility
|
|
1412
|
+
"organization_name": company_data.get("name"), # ProxyCurl-compatible
|
|
1413
|
+
"domain": company_data.get("primary_domain"), # Keep for backward compatibility
|
|
1414
|
+
"primary_domain_of_organization": company_data.get("primary_domain"), # ProxyCurl-compatible
|
|
1415
|
+
"website": company_data.get("website_url"), # Keep for backward compatibility
|
|
1416
|
+
"organization_website": company_data.get("website_url"), # ProxyCurl-compatible
|
|
1417
|
+
"organization_linkedin_url": company_data.get("linkedin_url"),
|
|
1418
|
+
|
|
1419
|
+
# Contact info
|
|
1392
1420
|
"phone": phone,
|
|
1393
1421
|
"fax": company_data.get("fax") or company_data.get("fax_number"),
|
|
1394
|
-
|
|
1395
|
-
|
|
1422
|
+
|
|
1423
|
+
# Business details - use ProxyCurl-compatible names
|
|
1424
|
+
"industry": industry, # Keep for backward compatibility
|
|
1425
|
+
"organization_industry": industry, # ProxyCurl-compatible
|
|
1426
|
+
"company_size": company_size, # Keep for backward compatibility
|
|
1427
|
+
"organization_size": company_size, # ProxyCurl-compatible
|
|
1396
1428
|
"founded_year": founded_year,
|
|
1397
1429
|
"annual_revenue": annual_revenue,
|
|
1398
1430
|
"type": company_data.get("type") or company_data.get("organization_type"),
|
|
1399
|
-
"ownership":
|
|
1400
|
-
"
|
|
1431
|
+
"ownership": ownership,
|
|
1432
|
+
"description": company_data.get("description") or company_data.get("short_description"),
|
|
1433
|
+
|
|
1434
|
+
# Address info
|
|
1401
1435
|
"billing_street": billing_street,
|
|
1402
1436
|
"billing_city": company_data.get("city"),
|
|
1403
1437
|
"billing_state": company_data.get("state"),
|
|
@@ -1405,20 +1439,44 @@ def fill_in_company_properties(company_data: dict) -> dict:
|
|
|
1405
1439
|
or company_data.get("zip")
|
|
1406
1440
|
or company_data.get("zipcode"),
|
|
1407
1441
|
"billing_country": company_data.get("country"),
|
|
1408
|
-
|
|
1442
|
+
|
|
1443
|
+
# Build organization_hq_location like ProxyCurl does
|
|
1444
|
+
"organization_hq_location": ", ".join(filter(None, [
|
|
1445
|
+
company_data.get("city"),
|
|
1446
|
+
company_data.get("state"),
|
|
1447
|
+
company_data.get("country")
|
|
1448
|
+
])) or None,
|
|
1449
|
+
|
|
1450
|
+
# Other fields
|
|
1409
1451
|
"keywords": _parse_keywords(company_data.get("keywords")),
|
|
1410
1452
|
"tags": [],
|
|
1411
1453
|
"notes": [],
|
|
1412
1454
|
"additional_properties": {
|
|
1413
1455
|
"apollo_organization_id": company_data.get("id"),
|
|
1456
|
+
"logo_url": company_data.get("logo_url"),
|
|
1414
1457
|
"facebook_url": company_data.get("facebook_url"),
|
|
1415
1458
|
"twitter_url": company_data.get("twitter_url"),
|
|
1459
|
+
"angellist_url": company_data.get("angellist_url"),
|
|
1460
|
+
"crunchbase_url": company_data.get("crunchbase_url"),
|
|
1461
|
+
"blog_url": company_data.get("blog_url"),
|
|
1416
1462
|
"funding_stage": company_data.get("latest_funding_stage"),
|
|
1417
1463
|
"total_funding": company_data.get("total_funding"),
|
|
1418
1464
|
"technology_names": company_data.get("technology_names"),
|
|
1419
1465
|
"primary_phone": primary_phone if isinstance(primary_phone, dict) else None,
|
|
1420
1466
|
"raw_address": company_data.get("raw_address"),
|
|
1421
1467
|
"organization_revenue_printed": company_data.get("organization_revenue_printed"),
|
|
1468
|
+
"publicly_traded_symbol": company_data.get("publicly_traded_symbol"),
|
|
1469
|
+
"publicly_traded_exchange": company_data.get("publicly_traded_exchange"),
|
|
1470
|
+
"market_cap": market_cap,
|
|
1471
|
+
"market_cap_printed": company_data.get("market_cap"),
|
|
1472
|
+
"sic_codes": company_data.get("sic_codes"),
|
|
1473
|
+
"naics_codes": company_data.get("naics_codes"),
|
|
1474
|
+
"languages": company_data.get("languages"),
|
|
1475
|
+
"alexa_ranking": company_data.get("alexa_ranking"),
|
|
1476
|
+
"linkedin_uid": company_data.get("linkedin_uid"),
|
|
1477
|
+
"headcount_6_month_growth": company_data.get("organization_headcount_six_month_growth"),
|
|
1478
|
+
"headcount_12_month_growth": company_data.get("organization_headcount_twelve_month_growth"),
|
|
1479
|
+
"headcount_24_month_growth": company_data.get("organization_headcount_twenty_four_month_growth"),
|
|
1422
1480
|
"apollo_organization_data": json.dumps(cleanup_properties(company_data)),
|
|
1423
1481
|
},
|
|
1424
1482
|
"research_summary": None,
|
|
@@ -1726,3 +1784,341 @@ async def search_companies_with_apollo_page(
|
|
|
1726
1784
|
"next_page": current_page + 1 if has_next_page else None,
|
|
1727
1785
|
"results": companies
|
|
1728
1786
|
}
|
|
1787
|
+
|
|
1788
|
+
|
|
1789
|
+
def _extract_domain_from_url(url: str) -> Optional[str]:
|
|
1790
|
+
"""
|
|
1791
|
+
Extract domain from a URL.
|
|
1792
|
+
|
|
1793
|
+
Args:
|
|
1794
|
+
url: The URL to extract domain from
|
|
1795
|
+
|
|
1796
|
+
Returns:
|
|
1797
|
+
The extracted domain or None if extraction fails
|
|
1798
|
+
"""
|
|
1799
|
+
if not url:
|
|
1800
|
+
return None
|
|
1801
|
+
|
|
1802
|
+
try:
|
|
1803
|
+
# Handle URLs without scheme
|
|
1804
|
+
if not url.startswith(('http://', 'https://')):
|
|
1805
|
+
url = 'https://' + url
|
|
1806
|
+
|
|
1807
|
+
parsed = urlparse(url)
|
|
1808
|
+
domain = parsed.netloc or parsed.path.split('/')[0]
|
|
1809
|
+
|
|
1810
|
+
# Remove www. prefix if present
|
|
1811
|
+
if domain.startswith('www.'):
|
|
1812
|
+
domain = domain[4:]
|
|
1813
|
+
|
|
1814
|
+
return domain if domain else None
|
|
1815
|
+
except Exception:
|
|
1816
|
+
return None
|
|
1817
|
+
|
|
1818
|
+
|
|
1819
|
+
def _extract_linkedin_company_identifier(linkedin_url: str) -> Optional[str]:
|
|
1820
|
+
"""
|
|
1821
|
+
Extract the company identifier from a LinkedIn company URL.
|
|
1822
|
+
|
|
1823
|
+
Args:
|
|
1824
|
+
linkedin_url: LinkedIn company URL (e.g., https://www.linkedin.com/company/microsoft)
|
|
1825
|
+
|
|
1826
|
+
Returns:
|
|
1827
|
+
The company identifier (e.g., 'microsoft') or None if extraction fails
|
|
1828
|
+
"""
|
|
1829
|
+
if not linkedin_url:
|
|
1830
|
+
return None
|
|
1831
|
+
|
|
1832
|
+
try:
|
|
1833
|
+
# Normalize the URL
|
|
1834
|
+
url = linkedin_url.strip().rstrip('/')
|
|
1835
|
+
|
|
1836
|
+
# Handle various LinkedIn URL formats
|
|
1837
|
+
# https://www.linkedin.com/company/microsoft
|
|
1838
|
+
# https://linkedin.com/company/microsoft/
|
|
1839
|
+
# linkedin.com/company/microsoft
|
|
1840
|
+
|
|
1841
|
+
if not url.startswith(('http://', 'https://')):
|
|
1842
|
+
url = 'https://' + url
|
|
1843
|
+
|
|
1844
|
+
parsed = urlparse(url)
|
|
1845
|
+
path_parts = [p for p in parsed.path.split('/') if p]
|
|
1846
|
+
|
|
1847
|
+
# Look for 'company' in path and get the next segment
|
|
1848
|
+
if 'company' in path_parts:
|
|
1849
|
+
company_idx = path_parts.index('company')
|
|
1850
|
+
if company_idx + 1 < len(path_parts):
|
|
1851
|
+
return path_parts[company_idx + 1]
|
|
1852
|
+
|
|
1853
|
+
return None
|
|
1854
|
+
except Exception:
|
|
1855
|
+
return None
|
|
1856
|
+
|
|
1857
|
+
|
|
1858
|
+
@assistant_tool
|
|
1859
|
+
@backoff.on_exception(
|
|
1860
|
+
backoff.expo,
|
|
1861
|
+
aiohttp.ClientResponseError,
|
|
1862
|
+
max_tries=2,
|
|
1863
|
+
giveup=lambda e: e.status != 429,
|
|
1864
|
+
factor=10,
|
|
1865
|
+
)
|
|
1866
|
+
async def search_organization_by_linkedin_or_domain(
|
|
1867
|
+
linkedin_url: Optional[str] = None,
|
|
1868
|
+
domain: Optional[str] = None,
|
|
1869
|
+
tool_config: Optional[List[Dict]] = None,
|
|
1870
|
+
) -> Dict[str, Any]:
|
|
1871
|
+
"""
|
|
1872
|
+
Search for an organization in Apollo using LinkedIn URL or domain and return
|
|
1873
|
+
standardized organization information.
|
|
1874
|
+
|
|
1875
|
+
This function uses Apollo's mixed_companies/search endpoint to find companies
|
|
1876
|
+
by their LinkedIn URL or domain, then transforms the result into a standardized
|
|
1877
|
+
organization information format.
|
|
1878
|
+
|
|
1879
|
+
Parameters:
|
|
1880
|
+
- **linkedin_url** (*str*, optional): LinkedIn company URL
|
|
1881
|
+
(e.g., https://www.linkedin.com/company/microsoft)
|
|
1882
|
+
- **domain** (*str*, optional): Company domain (e.g., microsoft.com)
|
|
1883
|
+
|
|
1884
|
+
At least one of linkedin_url or domain must be provided.
|
|
1885
|
+
|
|
1886
|
+
Returns:
|
|
1887
|
+
- **dict**: Standardized organization information containing:
|
|
1888
|
+
- name: Company name
|
|
1889
|
+
- domain: Primary domain
|
|
1890
|
+
- website: Company website URL
|
|
1891
|
+
- phone: Primary phone number
|
|
1892
|
+
- industry: Primary industry
|
|
1893
|
+
- company_size: Number of employees
|
|
1894
|
+
- founded_year: Year company was founded
|
|
1895
|
+
- annual_revenue: Annual revenue
|
|
1896
|
+
- organization_linkedin_url: LinkedIn company URL
|
|
1897
|
+
- billing_street, billing_city, billing_state, billing_zip, billing_country: Address info
|
|
1898
|
+
- description: Company description
|
|
1899
|
+
- keywords: List of keywords/tags
|
|
1900
|
+
- additional_properties: Additional Apollo-specific data
|
|
1901
|
+
- error: Error message if search fails
|
|
1902
|
+
"""
|
|
1903
|
+
logger.info("Entering search_organization_by_linkedin_or_domain")
|
|
1904
|
+
|
|
1905
|
+
if not linkedin_url and not domain:
|
|
1906
|
+
logger.warning("No linkedin_url or domain provided. At least one is required.")
|
|
1907
|
+
return {'error': "At least one of linkedin_url or domain must be provided"}
|
|
1908
|
+
|
|
1909
|
+
token, is_oauth = get_apollo_access_token(tool_config)
|
|
1910
|
+
|
|
1911
|
+
headers = {
|
|
1912
|
+
"Content-Type": "application/json",
|
|
1913
|
+
"Cache-Control": "no-cache",
|
|
1914
|
+
}
|
|
1915
|
+
if is_oauth:
|
|
1916
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
1917
|
+
else:
|
|
1918
|
+
headers["X-Api-Key"] = token
|
|
1919
|
+
|
|
1920
|
+
# Build the search payload
|
|
1921
|
+
payload: Dict[str, Any] = {
|
|
1922
|
+
"page": 1,
|
|
1923
|
+
"per_page": 25, # Get more results to improve matching
|
|
1924
|
+
}
|
|
1925
|
+
|
|
1926
|
+
# Add LinkedIn URL filter if provided
|
|
1927
|
+
if linkedin_url:
|
|
1928
|
+
# Extract the company identifier for keyword search
|
|
1929
|
+
company_identifier = _extract_linkedin_company_identifier(linkedin_url)
|
|
1930
|
+
|
|
1931
|
+
# Normalize the LinkedIn URL for matching
|
|
1932
|
+
normalized_linkedin = linkedin_url.strip().rstrip('/')
|
|
1933
|
+
if not normalized_linkedin.startswith(('http://', 'https://')):
|
|
1934
|
+
normalized_linkedin = 'https://' + normalized_linkedin
|
|
1935
|
+
|
|
1936
|
+
# Use q_organization_name for better search results
|
|
1937
|
+
# The company identifier from LinkedIn URL is usually the company name
|
|
1938
|
+
if company_identifier:
|
|
1939
|
+
payload["q_organization_name"] = company_identifier
|
|
1940
|
+
|
|
1941
|
+
# Add domain filter if provided
|
|
1942
|
+
if domain:
|
|
1943
|
+
# Clean the domain (remove http://, https://, www., etc.)
|
|
1944
|
+
clean_domain = _extract_domain_from_url(domain) or domain
|
|
1945
|
+
payload["q_organization_domains_list"] = [clean_domain]
|
|
1946
|
+
|
|
1947
|
+
url = "https://api.apollo.io/api/v1/mixed_companies/search"
|
|
1948
|
+
logger.debug(f"Making POST request to Apollo organization search with payload: {json.dumps(payload, indent=2)}")
|
|
1949
|
+
|
|
1950
|
+
async with aiohttp.ClientSession() as session:
|
|
1951
|
+
try:
|
|
1952
|
+
async with session.post(url, headers=headers, json=payload) as response:
|
|
1953
|
+
logger.debug(f"Received response status: {response.status}")
|
|
1954
|
+
|
|
1955
|
+
if response.status == 200:
|
|
1956
|
+
result = await response.json()
|
|
1957
|
+
|
|
1958
|
+
# Extract organizations from response
|
|
1959
|
+
organizations = result.get("organizations", [])
|
|
1960
|
+
accounts = result.get("accounts", [])
|
|
1961
|
+
all_results = organizations + accounts
|
|
1962
|
+
|
|
1963
|
+
if not all_results:
|
|
1964
|
+
logger.info("No organizations found matching the criteria.")
|
|
1965
|
+
return {
|
|
1966
|
+
'error': 'No organizations found matching the provided criteria',
|
|
1967
|
+
'search_criteria': {
|
|
1968
|
+
'linkedin_url': linkedin_url,
|
|
1969
|
+
'domain': domain
|
|
1970
|
+
}
|
|
1971
|
+
}
|
|
1972
|
+
|
|
1973
|
+
# Get the best matching organization with confidence tracking
|
|
1974
|
+
best_match = None
|
|
1975
|
+
match_confidence = None
|
|
1976
|
+
match_reason = None
|
|
1977
|
+
|
|
1978
|
+
# If we have a domain, try to find exact match first (highest confidence)
|
|
1979
|
+
if domain:
|
|
1980
|
+
clean_domain = _extract_domain_from_url(domain) or domain
|
|
1981
|
+
for org in all_results:
|
|
1982
|
+
org_domain = org.get("primary_domain", "")
|
|
1983
|
+
if org_domain and org_domain.lower() == clean_domain.lower():
|
|
1984
|
+
best_match = org
|
|
1985
|
+
match_confidence = "high"
|
|
1986
|
+
match_reason = f"exact_domain_match: {org_domain}"
|
|
1987
|
+
logger.info(f"Found exact domain match: {org.get('name')} with domain {org_domain}")
|
|
1988
|
+
break
|
|
1989
|
+
|
|
1990
|
+
# If we have LinkedIn URL, try to find exact match
|
|
1991
|
+
if not best_match and linkedin_url:
|
|
1992
|
+
# Extract company identifier from the input URL
|
|
1993
|
+
input_company_id = _extract_linkedin_company_identifier(linkedin_url)
|
|
1994
|
+
|
|
1995
|
+
for org in all_results:
|
|
1996
|
+
org_linkedin = org.get("linkedin_url", "")
|
|
1997
|
+
org_name = org.get("name", "").lower()
|
|
1998
|
+
org_domain = org.get("primary_domain", "")
|
|
1999
|
+
|
|
2000
|
+
if org_linkedin:
|
|
2001
|
+
# Extract company identifier from org's LinkedIn URL
|
|
2002
|
+
org_company_id = _extract_linkedin_company_identifier(org_linkedin)
|
|
2003
|
+
|
|
2004
|
+
# Match by company identifier (e.g., 'walmart' matches 'walmart')
|
|
2005
|
+
if input_company_id and org_company_id:
|
|
2006
|
+
if input_company_id.lower() == org_company_id.lower():
|
|
2007
|
+
best_match = org
|
|
2008
|
+
match_confidence = "high"
|
|
2009
|
+
match_reason = f"linkedin_identifier_match: {org_company_id}"
|
|
2010
|
+
logger.info(f"Found LinkedIn identifier match: {org.get('name')} with identifier {org_company_id}")
|
|
2011
|
+
break
|
|
2012
|
+
|
|
2013
|
+
# Also try direct URL comparison
|
|
2014
|
+
normalized_input = linkedin_url.lower().rstrip('/').replace('www.', '')
|
|
2015
|
+
normalized_org = org_linkedin.lower().rstrip('/').replace('www.', '')
|
|
2016
|
+
if normalized_input in normalized_org or normalized_org in normalized_input:
|
|
2017
|
+
best_match = org
|
|
2018
|
+
match_confidence = "high"
|
|
2019
|
+
match_reason = f"linkedin_url_match: {org_linkedin}"
|
|
2020
|
+
logger.info(f"Found LinkedIn URL match: {org.get('name')}")
|
|
2021
|
+
break
|
|
2022
|
+
|
|
2023
|
+
# Secondary match: company name contains the identifier
|
|
2024
|
+
if not best_match and input_company_id:
|
|
2025
|
+
# Check if the org name contains the identifier or vice versa
|
|
2026
|
+
input_id_lower = input_company_id.lower().replace('-', ' ').replace('_', ' ')
|
|
2027
|
+
org_name_normalized = org_name.replace('-', ' ').replace('_', ' ')
|
|
2028
|
+
|
|
2029
|
+
if input_id_lower == org_name_normalized or input_id_lower in org_name_normalized:
|
|
2030
|
+
best_match = org
|
|
2031
|
+
match_confidence = "medium"
|
|
2032
|
+
match_reason = f"name_contains_identifier: {org_name}"
|
|
2033
|
+
logger.info(f"Found name match: {org.get('name')} matches identifier {input_company_id}")
|
|
2034
|
+
break
|
|
2035
|
+
|
|
2036
|
+
# If still no match and we searched by LinkedIn, return error if no exact match found
|
|
2037
|
+
if not best_match and linkedin_url and not domain:
|
|
2038
|
+
input_company_id = _extract_linkedin_company_identifier(linkedin_url)
|
|
2039
|
+
logger.warning(f"No organization found matching LinkedIn URL: {linkedin_url}")
|
|
2040
|
+
# Log what we did find for debugging
|
|
2041
|
+
found_orgs = [{"name": org.get("name"), "linkedin": org.get("linkedin_url"), "domain": org.get("primary_domain")} for org in all_results[:5]]
|
|
2042
|
+
logger.debug(f"Found organizations (first 5): {found_orgs}")
|
|
2043
|
+
return {
|
|
2044
|
+
'error': f'No organization found matching LinkedIn company: {input_company_id or linkedin_url}',
|
|
2045
|
+
'search_criteria': {
|
|
2046
|
+
'linkedin_url': linkedin_url,
|
|
2047
|
+
'domain': domain
|
|
2048
|
+
},
|
|
2049
|
+
'total_results_returned': len(all_results)
|
|
2050
|
+
}
|
|
2051
|
+
|
|
2052
|
+
# Fall back to first result only if we have other criteria (domain was provided)
|
|
2053
|
+
if not best_match:
|
|
2054
|
+
best_match = all_results[0]
|
|
2055
|
+
match_confidence = "low"
|
|
2056
|
+
match_reason = "fallback_to_first_result"
|
|
2057
|
+
logger.warning(f"Using fallback match (first result): {best_match.get('name')}")
|
|
2058
|
+
|
|
2059
|
+
# Get the organization ID to fetch full details
|
|
2060
|
+
organization_id = best_match.get("id")
|
|
2061
|
+
full_org_details = best_match # Default to search result
|
|
2062
|
+
|
|
2063
|
+
# Fetch full organization details using the organization ID
|
|
2064
|
+
if organization_id:
|
|
2065
|
+
logger.info(f"Fetching full organization details for ID: {organization_id}")
|
|
2066
|
+
try:
|
|
2067
|
+
full_details = await get_organization_details_from_apollo(
|
|
2068
|
+
organization_id=organization_id,
|
|
2069
|
+
tool_config=tool_config,
|
|
2070
|
+
)
|
|
2071
|
+
if full_details and not full_details.get("error"):
|
|
2072
|
+
# Merge the full details with the search result
|
|
2073
|
+
# Full details from organization endpoint has more data
|
|
2074
|
+
full_org_details = full_details
|
|
2075
|
+
logger.info(f"Successfully fetched full organization details for {full_org_details.get('name')}")
|
|
2076
|
+
else:
|
|
2077
|
+
logger.warning(f"Could not fetch full organization details: {full_details.get('error', 'Unknown error')}")
|
|
2078
|
+
except Exception as e:
|
|
2079
|
+
logger.warning(f"Error fetching full organization details: {e}")
|
|
2080
|
+
|
|
2081
|
+
# Transform to standardized format using the full details
|
|
2082
|
+
standardized_org = fill_in_company_properties(full_org_details)
|
|
2083
|
+
|
|
2084
|
+
# Add logo_url to additional_properties if available
|
|
2085
|
+
if full_org_details.get("logo_url"):
|
|
2086
|
+
standardized_org["additional_properties"]["logo_url"] = full_org_details.get("logo_url")
|
|
2087
|
+
|
|
2088
|
+
# Add search metadata
|
|
2089
|
+
standardized_org['search_criteria'] = {
|
|
2090
|
+
'linkedin_url': linkedin_url,
|
|
2091
|
+
'domain': domain
|
|
2092
|
+
}
|
|
2093
|
+
standardized_org['total_matches_found'] = len(all_results)
|
|
2094
|
+
standardized_org['match_confidence'] = match_confidence
|
|
2095
|
+
standardized_org['match_reason'] = match_reason
|
|
2096
|
+
|
|
2097
|
+
# Log the matched organization details for verification
|
|
2098
|
+
logger.info(f"Successfully found organization: {standardized_org.get('name')} "
|
|
2099
|
+
f"(domain: {standardized_org.get('domain')}, "
|
|
2100
|
+
f"linkedin: {standardized_org.get('organization_linkedin_url')}, "
|
|
2101
|
+
f"confidence: {match_confidence})")
|
|
2102
|
+
return standardized_org
|
|
2103
|
+
|
|
2104
|
+
elif response.status == 429:
|
|
2105
|
+
msg = "Rate limit exceeded"
|
|
2106
|
+
logger.warning(msg)
|
|
2107
|
+
await asyncio.sleep(30)
|
|
2108
|
+
raise aiohttp.ClientResponseError(
|
|
2109
|
+
request_info=response.request_info,
|
|
2110
|
+
history=response.history,
|
|
2111
|
+
status=response.status,
|
|
2112
|
+
message=msg,
|
|
2113
|
+
headers=response.headers
|
|
2114
|
+
)
|
|
2115
|
+
else:
|
|
2116
|
+
result = await response.json()
|
|
2117
|
+
logger.warning(f"search_organization_by_linkedin_or_domain error: {result}")
|
|
2118
|
+
return {'error': result}
|
|
2119
|
+
|
|
2120
|
+
except aiohttp.ClientResponseError:
|
|
2121
|
+
raise
|
|
2122
|
+
except Exception as e:
|
|
2123
|
+
logger.exception("Exception occurred while searching for organization in Apollo.")
|
|
2124
|
+
return {'error': str(e)}
|
|
@@ -22,7 +22,7 @@ from dhisana.utils.field_validators import (
|
|
|
22
22
|
validation_organization_domain,
|
|
23
23
|
validate_website_url
|
|
24
24
|
)
|
|
25
|
-
from dhisana.utils.apollo_tools import enrich_user_info_with_apollo
|
|
25
|
+
from dhisana.utils.apollo_tools import enrich_user_info_with_apollo, enrich_person_info_from_apollo, search_organization_by_linkedin_or_domain
|
|
26
26
|
from dhisana.utils.assistant_tool_tag import assistant_tool
|
|
27
27
|
from dhisana.utils.domain_parser import get_domain_from_website, is_excluded_domain
|
|
28
28
|
from dhisana.utils.generate_structured_output_internal import get_structured_output_internal
|
|
@@ -520,6 +520,36 @@ async def enrich_user_info(
|
|
|
520
520
|
|
|
521
521
|
# 1) If we do not have a user_linkedin_url, try getting it from GitHub
|
|
522
522
|
if not user_linkedin_url:
|
|
523
|
+
email = (input_properties.get("email") or "").strip()
|
|
524
|
+
|
|
525
|
+
# 1a) If email is present, first try Apollo lookup by email for more robust matching
|
|
526
|
+
if email:
|
|
527
|
+
logger.debug("Attempting Apollo lookup by email: %s", email)
|
|
528
|
+
apollo_result = await enrich_person_info_from_apollo(
|
|
529
|
+
email=email,
|
|
530
|
+
tool_config=tool_config,
|
|
531
|
+
)
|
|
532
|
+
if apollo_result and not apollo_result.get("error"):
|
|
533
|
+
person_data = apollo_result.get("person", {})
|
|
534
|
+
if person_data:
|
|
535
|
+
apollo_linkedin_url = person_data.get("linkedin_url", "")
|
|
536
|
+
if apollo_linkedin_url:
|
|
537
|
+
user_linkedin_url = apollo_linkedin_url
|
|
538
|
+
input_properties["user_linkedin_url"] = user_linkedin_url
|
|
539
|
+
input_properties["linkedin_url_match"] = True
|
|
540
|
+
logger.debug("Found LinkedIn URL via Apollo email lookup: %s", user_linkedin_url)
|
|
541
|
+
# Also populate other fields from Apollo if not already present
|
|
542
|
+
if not input_properties.get("first_name"):
|
|
543
|
+
input_properties["first_name"] = person_data.get("first_name", "")
|
|
544
|
+
if not input_properties.get("last_name"):
|
|
545
|
+
input_properties["last_name"] = person_data.get("last_name", "")
|
|
546
|
+
if not input_properties.get("job_title"):
|
|
547
|
+
input_properties["job_title"] = person_data.get("title", "")
|
|
548
|
+
if not input_properties.get("lead_location"):
|
|
549
|
+
input_properties["lead_location"] = person_data.get("city", "")
|
|
550
|
+
return input_properties
|
|
551
|
+
|
|
552
|
+
# 1b) If still no LinkedIn URL, try getting it from GitHub
|
|
523
553
|
if github_profile_id:
|
|
524
554
|
response = await get_user_linkedin_url_from_github_profile(
|
|
525
555
|
github_profile_id=github_profile_id,
|
|
@@ -551,7 +581,6 @@ async def enrich_user_info(
|
|
|
551
581
|
location = input_properties.get("lead_location", "") or ""
|
|
552
582
|
org_name = (input_properties.get("organization_name", "") or "").strip()
|
|
553
583
|
org_domain = (input_properties.get("primary_domain_of_organization", "") or "").strip()
|
|
554
|
-
email = (input_properties.get("email") or "").strip()
|
|
555
584
|
|
|
556
585
|
if full_name and (org_name or org_domain or title):
|
|
557
586
|
# This function does a google-based search for the user's LinkedIn
|
|
@@ -568,7 +597,7 @@ async def enrich_user_info(
|
|
|
568
597
|
user_linkedin_url = found_linkedin_url
|
|
569
598
|
input_properties["user_linkedin_url"] = user_linkedin_url
|
|
570
599
|
if not user_linkedin_url and email:
|
|
571
|
-
# If we have an email but no
|
|
600
|
+
# If we have an email but no LinkedIn URL yet, try searching by email via Google
|
|
572
601
|
email_lookup_result = await find_user_linkedin_url_by_email_google(
|
|
573
602
|
email=email,
|
|
574
603
|
user_name=full_name,
|
|
@@ -775,25 +804,91 @@ async def enrich_organization_info_from_company_url(
|
|
|
775
804
|
) -> Dict[str, Any]:
|
|
776
805
|
"""
|
|
777
806
|
Given an organization LinkedIn URL, attempt to enrich its data (e.g. name, website)
|
|
778
|
-
via
|
|
807
|
+
first via Apollo API, then fallback to ProxyCurl if Apollo doesn't return results.
|
|
808
|
+
Additional Proxycurl Company API boolean flags (categories, funding_data, etc.)
|
|
779
809
|
can be supplied to control the returned payload (True -> "include"). If data is found,
|
|
780
810
|
set domain, then return the dict. Otherwise, return {}.
|
|
781
811
|
"""
|
|
812
|
+
company_data = None
|
|
813
|
+
apollo_website = None
|
|
814
|
+
apollo_domain = None
|
|
782
815
|
|
|
783
|
-
#
|
|
784
|
-
|
|
785
|
-
organization_linkedin_url
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
816
|
+
# First, try Apollo API to get company information
|
|
817
|
+
try:
|
|
818
|
+
logger.debug(f"Attempting Apollo lookup for organization LinkedIn URL: {organization_linkedin_url}")
|
|
819
|
+
apollo_result = await search_organization_by_linkedin_or_domain(
|
|
820
|
+
linkedin_url=organization_linkedin_url,
|
|
821
|
+
tool_config=tool_config,
|
|
822
|
+
)
|
|
823
|
+
if apollo_result and not apollo_result.get("error"):
|
|
824
|
+
logger.debug(f"Apollo returned company data: {apollo_result.get('organization_name')}")
|
|
825
|
+
# Store Apollo's website and domain for later use
|
|
826
|
+
apollo_website = apollo_result.get("organization_website")
|
|
827
|
+
apollo_domain = apollo_result.get("primary_domain_of_organization")
|
|
828
|
+
|
|
829
|
+
# If Apollo returned valid data, use it directly
|
|
830
|
+
# Apollo now returns ProxyCurl-compatible field names
|
|
831
|
+
if apollo_result.get("organization_name"):
|
|
832
|
+
company_data = {
|
|
833
|
+
# Primary identifiers
|
|
834
|
+
"organization_name": apollo_result.get("organization_name", ""),
|
|
835
|
+
"organization_linkedin_url": apollo_result.get("organization_linkedin_url", organization_linkedin_url),
|
|
836
|
+
"organization_website": apollo_result.get("organization_website", ""),
|
|
837
|
+
"primary_domain_of_organization": apollo_result.get("primary_domain_of_organization", ""),
|
|
838
|
+
|
|
839
|
+
# Contact info
|
|
840
|
+
"phone": apollo_result.get("phone", ""),
|
|
841
|
+
"fax": apollo_result.get("fax", ""),
|
|
842
|
+
|
|
843
|
+
# Business details - use ProxyCurl-compatible names
|
|
844
|
+
"organization_industry": apollo_result.get("organization_industry", ""),
|
|
845
|
+
"industry": apollo_result.get("industry", ""), # Keep for backward compatibility
|
|
846
|
+
"organization_size": apollo_result.get("organization_size"),
|
|
847
|
+
"company_size": apollo_result.get("company_size"), # Keep for backward compatibility
|
|
848
|
+
"founded_year": apollo_result.get("founded_year"),
|
|
849
|
+
"annual_revenue": apollo_result.get("annual_revenue"),
|
|
850
|
+
"type": apollo_result.get("type", ""),
|
|
851
|
+
"ownership": apollo_result.get("ownership", ""),
|
|
852
|
+
"description": apollo_result.get("description", ""),
|
|
853
|
+
|
|
854
|
+
# Location info
|
|
855
|
+
"organization_hq_location": apollo_result.get("organization_hq_location", ""),
|
|
856
|
+
"billing_street": apollo_result.get("billing_street", ""),
|
|
857
|
+
"billing_city": apollo_result.get("billing_city", ""),
|
|
858
|
+
"billing_state": apollo_result.get("billing_state", ""),
|
|
859
|
+
"billing_zip": apollo_result.get("billing_zip", ""),
|
|
860
|
+
"billing_country": apollo_result.get("billing_country", ""),
|
|
861
|
+
|
|
862
|
+
# Other fields
|
|
863
|
+
"keywords": apollo_result.get("keywords", []),
|
|
864
|
+
"additional_properties": apollo_result.get("additional_properties", {}),
|
|
865
|
+
}
|
|
866
|
+
except Exception as e:
|
|
867
|
+
logger.warning(f"Apollo lookup failed for {organization_linkedin_url}: {e}")
|
|
868
|
+
|
|
869
|
+
# If Apollo didn't return data, fallback to ProxyCurl
|
|
870
|
+
if not company_data:
|
|
871
|
+
logger.debug(f"Falling back to ProxyCurl for organization LinkedIn URL: {organization_linkedin_url}")
|
|
872
|
+
company_data = await enrich_organization_info_from_proxycurl(
|
|
873
|
+
organization_linkedin_url=organization_linkedin_url,
|
|
874
|
+
tool_config=tool_config,
|
|
875
|
+
categories=categories,
|
|
876
|
+
funding_data=funding_data,
|
|
877
|
+
exit_data=exit_data,
|
|
878
|
+
acquisitions=acquisitions,
|
|
879
|
+
extra=extra,
|
|
880
|
+
use_cache=use_cache,
|
|
881
|
+
fallback_to_cache=fallback_to_cache,
|
|
882
|
+
)
|
|
883
|
+
|
|
884
|
+
# If ProxyCurl returned data but Apollo had better website/domain info, use Apollo's
|
|
885
|
+
if company_data and isinstance(company_data, dict):
|
|
886
|
+
if apollo_website and not company_data.get("organization_website"):
|
|
887
|
+
company_data["organization_website"] = apollo_website
|
|
888
|
+
if apollo_domain and not company_data.get("primary_domain_of_organization"):
|
|
889
|
+
company_data["primary_domain_of_organization"] = apollo_domain
|
|
890
|
+
|
|
891
|
+
# If we have company data, set domain and get research summary
|
|
797
892
|
if company_data and isinstance(company_data, dict):
|
|
798
893
|
await set_organization_domain(company_data, use_strict_check, tool_config)
|
|
799
894
|
summary = await research_company_with_full_info_ai(company_data, "", tool_config=tool_config)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/check_email_validity_tools.py
RENAMED
|
File without changes
|
|
File without changes
|
{dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/check_linkedin_url_validity.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/compose_three_step_workflow.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/extract_email_content_for_llm.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/generate_linkedin_connect_message.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/openai_assistant_and_file_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/openapi_tool/openapi_tool.py
RENAMED
|
File without changes
|
{dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/parse_linkedin_messages_txt.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/serarch_router_local_business.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dhisana-0.0.1.dev276 → dhisana-0.0.1.dev278}/src/dhisana/utils/serpapi_local_business_search.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|