dhisana 0.0.1.dev249__py3-none-any.whl → 0.0.1.dev251__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1287,62 +1287,131 @@ def fill_in_company_properties(company_data: dict) -> dict:
1287
1287
  company_data: Raw company data from Apollo API
1288
1288
 
1289
1289
  Returns:
1290
- Dictionary with standardized company properties
1290
+ Dictionary matching the SmartList `Account` schema shape.
1291
1291
  """
1292
- company_properties = {}
1292
+ def _parse_keywords(value: Any) -> List[Any]:
1293
+ if value is None:
1294
+ return []
1295
+ if isinstance(value, list):
1296
+ return value
1297
+ if isinstance(value, str):
1298
+ text = value.strip()
1299
+ if not text:
1300
+ return []
1301
+ if "," in text:
1302
+ return [part.strip() for part in text.split(",") if part.strip()]
1303
+ return [text]
1304
+ return [value]
1293
1305
 
1294
- # Basic company information
1295
- company_properties["organization_name"] = company_data.get("name", "")
1296
- company_properties["primary_domain"] = company_data.get("primary_domain", "")
1297
- company_properties["website_url"] = company_data.get("website_url", "")
1298
- company_properties["organization_linkedin_url"] = company_data.get("linkedin_url", "")
1299
-
1300
- # Location information
1301
- company_properties["organization_city"] = company_data.get("city", "")
1302
- company_properties["organization_state"] = company_data.get("state", "")
1303
- company_properties["organization_country"] = company_data.get("country", "")
1304
-
1305
- # Create a combined location string
1306
- location_parts = [
1307
- company_data.get("city", ""),
1308
- company_data.get("state", ""),
1309
- company_data.get("country", "")
1310
- ]
1311
- company_properties["organization_location"] = ", ".join([part for part in location_parts if part])
1312
-
1313
- # Company size and financial info
1314
- company_properties["employee_count"] = company_data.get("estimated_num_employees", 0)
1315
- company_properties["annual_revenue"] = company_data.get("annual_revenue", 0)
1316
-
1317
- # Industry and business info
1318
- company_properties["industry"] = company_data.get("industry", "")
1319
- company_properties["keywords"] = ", ".join(company_data.get("keywords", []))
1320
- company_properties["description"] = company_data.get("description", "")
1321
-
1322
- # Funding and growth
1323
- company_properties["founded_year"] = company_data.get("founded_year", "")
1324
- company_properties["funding_stage"] = company_data.get("latest_funding_stage", "")
1325
- company_properties["total_funding"] = company_data.get("total_funding", 0)
1326
-
1327
- # Technology stack
1328
- tech_stack = company_data.get("technology_names", [])
1329
- if tech_stack:
1330
- company_properties["technology_stack"] = ", ".join(tech_stack)
1331
-
1332
- # Apollo-specific IDs
1333
- company_properties["apollo_organization_id"] = company_data.get("id", "")
1334
-
1335
- # Additional metadata
1336
- company_properties["phone"] = company_data.get("phone", "")
1337
- company_properties["facebook_url"] = company_data.get("facebook_url", "")
1338
- company_properties["twitter_url"] = company_data.get("twitter_url", "")
1339
-
1340
- # Store raw data for reference
1341
- company_properties["additional_properties"] = {
1342
- "apollo_organization_data": json.dumps(cleanup_properties(company_data))
1306
+ def _parse_compact_number(value: Any) -> Optional[float]:
1307
+ if value is None:
1308
+ return None
1309
+ if isinstance(value, (int, float)):
1310
+ return float(value)
1311
+ text = str(value).strip()
1312
+ if not text:
1313
+ return None
1314
+ text = text.replace("$", "").replace(",", "").strip()
1315
+ multiplier = 1.0
1316
+ suffix = text[-1:].upper()
1317
+ if suffix in ("K", "M", "B"):
1318
+ multiplier = {"K": 1e3, "M": 1e6, "B": 1e9}[suffix]
1319
+ text = text[:-1].strip()
1320
+ try:
1321
+ return float(text) * multiplier
1322
+ except ValueError:
1323
+ return None
1324
+
1325
+ annual_revenue = (
1326
+ company_data.get("organization_revenue")
1327
+ if company_data.get("organization_revenue") is not None
1328
+ else company_data.get("annual_revenue")
1329
+ )
1330
+ annual_revenue = _parse_compact_number(annual_revenue)
1331
+ if annual_revenue is None:
1332
+ annual_revenue = _parse_compact_number(company_data.get("organization_revenue_printed"))
1333
+
1334
+ company_size = company_data.get("estimated_num_employees")
1335
+ if company_size is not None:
1336
+ try:
1337
+ company_size = int(company_size)
1338
+ except (TypeError, ValueError):
1339
+ company_size = None
1340
+
1341
+ founded_year = company_data.get("founded_year")
1342
+ if founded_year is not None:
1343
+ try:
1344
+ founded_year = int(founded_year)
1345
+ except (TypeError, ValueError):
1346
+ founded_year = None
1347
+
1348
+ primary_phone = company_data.get("primary_phone")
1349
+ primary_phone_number = None
1350
+ if isinstance(primary_phone, dict):
1351
+ primary_phone_number = primary_phone.get("number") or primary_phone.get(
1352
+ "sanitized_number"
1353
+ )
1354
+
1355
+ phone = (
1356
+ primary_phone_number
1357
+ or company_data.get("phone")
1358
+ or company_data.get("primary_phone_number")
1359
+ or company_data.get("sanitized_phone")
1360
+ )
1361
+
1362
+ industry = company_data.get("industry")
1363
+ if not industry and isinstance(company_data.get("industries"), list):
1364
+ industries = [str(x).strip() for x in company_data["industries"] if str(x).strip()]
1365
+ industry = industries[0] if industries else None
1366
+
1367
+ billing_street = (
1368
+ company_data.get("street_address")
1369
+ or company_data.get("billing_street")
1370
+ or company_data.get("address")
1371
+ or company_data.get("raw_address")
1372
+ )
1373
+
1374
+ account: Dict[str, Any] = {
1375
+ "name": company_data.get("name"),
1376
+ "domain": company_data.get("primary_domain"),
1377
+ "website": company_data.get("website_url"),
1378
+ "phone": phone,
1379
+ "fax": company_data.get("fax") or company_data.get("fax_number"),
1380
+ "industry": industry,
1381
+ "company_size": company_size,
1382
+ "founded_year": founded_year,
1383
+ "annual_revenue": annual_revenue,
1384
+ "type": company_data.get("type") or company_data.get("organization_type"),
1385
+ "ownership": company_data.get("ownership"),
1386
+ "organization_linkedin_url": company_data.get("linkedin_url"),
1387
+ "billing_street": billing_street,
1388
+ "billing_city": company_data.get("city"),
1389
+ "billing_state": company_data.get("state"),
1390
+ "billing_zip": company_data.get("postal_code")
1391
+ or company_data.get("zip")
1392
+ or company_data.get("zipcode"),
1393
+ "billing_country": company_data.get("country"),
1394
+ "description": company_data.get("description"),
1395
+ "keywords": _parse_keywords(company_data.get("keywords")),
1396
+ "tags": [],
1397
+ "notes": [],
1398
+ "additional_properties": {
1399
+ "apollo_organization_id": company_data.get("id"),
1400
+ "facebook_url": company_data.get("facebook_url"),
1401
+ "twitter_url": company_data.get("twitter_url"),
1402
+ "funding_stage": company_data.get("latest_funding_stage"),
1403
+ "total_funding": company_data.get("total_funding"),
1404
+ "technology_names": company_data.get("technology_names"),
1405
+ "primary_phone": primary_phone if isinstance(primary_phone, dict) else None,
1406
+ "raw_address": company_data.get("raw_address"),
1407
+ "organization_revenue_printed": company_data.get("organization_revenue_printed"),
1408
+ "apollo_organization_data": json.dumps(cleanup_properties(company_data)),
1409
+ },
1410
+ "research_summary": None,
1411
+ "enchrichment_status": None,
1343
1412
  }
1344
1413
 
1345
- return company_properties
1414
+ return account
1346
1415
 
1347
1416
 
1348
1417
  @assistant_tool
@@ -2,6 +2,7 @@ import os
2
2
  import hashlib
3
3
  import json
4
4
  import logging
5
+ from datetime import datetime, timezone
5
6
 
6
7
  from azure.storage.blob import BlobServiceClient
7
8
  from azure.core.exceptions import ResourceNotFoundError, AzureError
@@ -49,10 +50,11 @@ def cache_output(tool_name: str, key: str, value, ttl: int = None) -> bool:
49
50
  # Construct the blob name using a virtual folder for the tool name
50
51
  blob_name = f"{tool_name}/{key_hash}.json"
51
52
 
52
- # Prepare the cache data
53
+ # Prepare the cache data with timestamp for TTL expiration checking
53
54
  cache_data = {
54
55
  "value": value,
55
- "ttl": ttl
56
+ "ttl": ttl,
57
+ "cached_at": datetime.now(timezone.utc).isoformat()
56
58
  }
57
59
  data = json.dumps(cache_data)
58
60
 
@@ -88,6 +90,24 @@ def retrieve_output(tool_name: str, key: str):
88
90
  download_stream = blob_client.download_blob()
89
91
  content = download_stream.readall() # content is in bytes
90
92
  cache_data = json.loads(content.decode("utf-8"))
93
+
94
+ # Check if TTL has expired
95
+ ttl = cache_data.get("ttl")
96
+ cached_at = cache_data.get("cached_at")
97
+
98
+ if ttl is not None and cached_at is not None:
99
+ try:
100
+ cached_time = datetime.fromisoformat(cached_at)
101
+ now = datetime.now(timezone.utc)
102
+ elapsed_seconds = (now - cached_time).total_seconds()
103
+ if elapsed_seconds > ttl:
104
+ logger.info(f"Cache expired for blob '{blob_name}' (elapsed: {elapsed_seconds}s, ttl: {ttl}s)")
105
+ return None
106
+ except (ValueError, TypeError) as e:
107
+ logger.warning(f"Error parsing cached_at timestamp: {e}")
108
+ # If we can't parse the timestamp, treat as expired for safety
109
+ return None
110
+
91
111
  return cache_data.get("value")
92
112
  except ResourceNotFoundError:
93
113
  # Blob does not exist
@@ -16,6 +16,9 @@ from urllib.parse import urlparse, urlunparse
16
16
  logging.basicConfig(level=logging.INFO)
17
17
  logger = logging.getLogger(__name__)
18
18
 
19
+ # Cache TTL for Proxycurl responses: 14 days in seconds
20
+ PROXYCURL_CACHE_TTL = 14 * 24 * 60 * 60 # 1,209,600 seconds
21
+
19
22
 
20
23
  def get_proxycurl_access_token(tool_config: Optional[List[Dict]] = None) -> str:
21
24
  """
@@ -117,7 +120,7 @@ async def enrich_person_info_from_proxycurl(
117
120
  if response.status == 200:
118
121
  result = await response.json()
119
122
  if linkedin_url:
120
- cache_output("enrich_person_info_from_proxycurl", linkedin_url, result)
123
+ cache_output("enrich_person_info_from_proxycurl", linkedin_url, result, ttl=PROXYCURL_CACHE_TTL)
121
124
  logger.info("Successfully retrieved person info from Proxycurl.")
122
125
  return result
123
126
  elif response.status == 404:
@@ -195,14 +198,14 @@ async def lookup_person_in_proxy_curl_by_name(
195
198
  logger.debug(f"Received response status: {response.status}")
196
199
  if response.status == 200:
197
200
  result = await response.json()
198
- cache_output("lookup_person_in_proxycurl_by_name", key, result)
201
+ cache_output("lookup_person_in_proxycurl_by_name", key, result, ttl=PROXYCURL_CACHE_TTL)
199
202
  logger.info("Successfully retrieved person search info from Proxycurl.")
200
203
  return result
201
204
  elif response.status == 404:
202
205
  msg = "Person not found"
203
206
  logger.warning(msg)
204
207
  if key:
205
- cache_output("lookup_person_in_proxycurl_by_name", key, {'error': msg})
208
+ cache_output("lookup_person_in_proxycurl_by_name", key, {'error': msg}, ttl=PROXYCURL_CACHE_TTL)
206
209
  return {'error': msg}
207
210
  elif response.status == 429:
208
211
  msg = "Rate limit exceeded"
@@ -415,7 +418,7 @@ async def enrich_organization_info_from_proxycurl(
415
418
  if response.status == 200:
416
419
  result = await response.json()
417
420
  transformed_result = transform_company_data(result)
418
- cache_output("enrich_organization_info_from_proxycurl", cache_key, transformed_result)
421
+ cache_output("enrich_organization_info_from_proxycurl", cache_key, transformed_result, ttl=PROXYCURL_CACHE_TTL)
419
422
  logger.info("Successfully retrieved and transformed organization info from Proxycurl by LinkedIn URL.")
420
423
  return transformed_result
421
424
  elif response.status == 429:
@@ -429,7 +432,7 @@ async def enrich_organization_info_from_proxycurl(
429
432
  f"Proxycurl organization profile not found for LinkedIn URL {standardized_url}: {error_text}"
430
433
  )
431
434
  cache_output(
432
- "enrich_organization_info_from_proxycurl", cache_key, {}
435
+ "enrich_organization_info_from_proxycurl", cache_key, {}, ttl=PROXYCURL_CACHE_TTL
433
436
  )
434
437
  return {}
435
438
  else:
@@ -483,7 +486,7 @@ async def enrich_organization_info_from_proxycurl(
483
486
  if profile_response.status == 200:
484
487
  result = await profile_response.json()
485
488
  transformed_result = transform_company_data(result)
486
- cache_output("enrich_organization_info_from_proxycurl", domain_cache_key, transformed_result)
489
+ cache_output("enrich_organization_info_from_proxycurl", domain_cache_key, transformed_result, ttl=PROXYCURL_CACHE_TTL)
487
490
  logger.info("Successfully retrieved and transformed organization info from Proxycurl by domain.")
488
491
  return transformed_result
489
492
  elif profile_response.status == 429:
@@ -509,7 +512,7 @@ async def enrich_organization_info_from_proxycurl(
509
512
  elif response.status == 404:
510
513
  msg = "Item not found"
511
514
  logger.warning(msg)
512
- cache_output("enrich_organization_info_from_proxycurl", domain_cache_key, {})
515
+ cache_output("enrich_organization_info_from_proxycurl", domain_cache_key, {}, ttl=PROXYCURL_CACHE_TTL)
513
516
  return {}
514
517
  else:
515
518
  error_text = await response.text()
@@ -572,7 +575,7 @@ async def enrich_job_info_from_proxycurl(
572
575
  logger.debug(f"Received response status: {response.status}")
573
576
  if response.status == 200:
574
577
  result = await response.json()
575
- cache_output("enrich_job_info_from_proxycurl", job_url, result)
578
+ cache_output("enrich_job_info_from_proxycurl", job_url, result, ttl=PROXYCURL_CACHE_TTL)
576
579
  logger.info("Successfully retrieved job info from Proxycurl.")
577
580
  return result
578
581
  elif response.status == 429:
@@ -583,7 +586,7 @@ async def enrich_job_info_from_proxycurl(
583
586
  elif response.status == 404:
584
587
  msg = "Job not found"
585
588
  logger.warning(msg)
586
- cache_output("enrich_job_info_from_proxycurl", job_url, {'error': msg})
589
+ cache_output("enrich_job_info_from_proxycurl", job_url, {'error': msg}, ttl=PROXYCURL_CACHE_TTL)
587
590
  return {'error': msg}
588
591
  else:
589
592
  error_text = await response.text()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dhisana
3
- Version: 0.0.1.dev249
3
+ Version: 0.0.1.dev251
4
4
  Summary: A Python SDK for Dhisana AI Platform
5
5
  Home-page: https://github.com/dhisana-ai/dhisana-python-sdk
6
6
  Author: Admin
@@ -12,10 +12,10 @@ dhisana/ui/components.py,sha256=4NXrAyl9tx2wWwoVYyABO-EOGnreGMvql1AkXWajIIo,1431
12
12
  dhisana/utils/__init__.py,sha256=jv2YF__bseklT3OWEzlqJ5qE24c4aWd5F4r0TTjOrWQ,65
13
13
  dhisana/utils/add_mapping.py,sha256=oq_QNqag86DhgdwINBRRXNx7SOb8Q9M-V0QLP6pTzr8,13837
14
14
  dhisana/utils/agent_tools.py,sha256=pzBFvfhU4wfSB4zv1eiRzjmnteJnfhC5V32r_v1m38Y,2321
15
- dhisana/utils/apollo_tools.py,sha256=_UDaYi_exIqe84z9Tdr1Qzn5cgYAZmK-aUwArTOTYp8,67436
15
+ dhisana/utils/apollo_tools.py,sha256=1b9FaL_3spQKUsOP1k8-kD1kcFxCkG4KJHoN71SjOkU,69796
16
16
  dhisana/utils/assistant_tool_tag.py,sha256=rYRl8ubLI7fUUIjg30XTefHBkFgRqNEVC12lF6U6Z-8,119
17
17
  dhisana/utils/built_with_api_tools.py,sha256=TFNGhnPb2vFdveVCpjiCvE1WKe_eK95UPpR0Ha5NgMQ,10260
18
- dhisana/utils/cache_output_tools.py,sha256=sSAruvUZn-WAJQ0lB9T1QjSmkm-_14AuxC9xKmcCQ0k,3428
18
+ dhisana/utils/cache_output_tools.py,sha256=q-d-WR_pkIUQyCJk8T-u9sfTy1TvvWoD2kJlZfqY-vA,4392
19
19
  dhisana/utils/cache_output_tools_local.py,sha256=okVIY54Xs5avTLu5Sv8neEPsPBce501m-6E_UhQkCAg,2447
20
20
  dhisana/utils/check_email_validity_tools.py,sha256=s2x1O3U97C4JcxIVuVh3sbgYTZzG7eGJCJT1s3NmApY,26609
21
21
  dhisana/utils/check_for_intent_signal.py,sha256=pC9k1_2fuUXBUxmikfxmoILlqhGMsJDVxrX0m73XQzA,4517
@@ -62,7 +62,7 @@ dhisana/utils/openai_helpers.py,sha256=ZK9S5-jcLCpiiD6XBLkCqYcNz-AGYmO9xh4e2H-FD
62
62
  dhisana/utils/openapi_spec_to_tools.py,sha256=oBLVq3WeDWvW9O02NCvY8bxQURQdKwHJHGcX8bC_b2I,1926
63
63
  dhisana/utils/parse_linkedin_messages_txt.py,sha256=g3N_ac70mAEuDDQ7Ott6mkOaBwI3ZvcsJD3R9RlYwPQ,3320
64
64
  dhisana/utils/profile.py,sha256=12IhefaLp3j74zzBzVRe50_KWqtWZ_cdzUKlYNy9T2Y,1192
65
- dhisana/utils/proxy_curl_tools.py,sha256=nHm3K_EHX8I-VGwONzCVPIA-OzbVMRXSA3VzmT_-uWk,52632
65
+ dhisana/utils/proxy_curl_tools.py,sha256=3i7Qpk0POME5OU5-lHQ6BlnOi88tIZl-Z0oMrphnCFQ,52975
66
66
  dhisana/utils/proxycurl_search_leads.py,sha256=6PlraPNYQ4fIDzTYnY-T2g_ip5fPkqHigbGoPD8ZosQ,16131
67
67
  dhisana/utils/python_function_to_tools.py,sha256=jypddM6WTlIQmRWnqAugYJXvaPYaXaMgWAZRYeeGlj4,2682
68
68
  dhisana/utils/research_lead.py,sha256=L6w2fK5in8z2xmWnXBjbkvTdrwPf8ZfvAXq3gb7-S6s,7009
@@ -95,8 +95,8 @@ dhisana/workflow/agent.py,sha256=esv7_i_XuMkV2j1nz_UlsHov_m6X5WZZiZm_tG4OBHU,565
95
95
  dhisana/workflow/flow.py,sha256=xWE3qQbM7j2B3FH8XnY3zOL_QXX4LbTW4ArndnEYJE0,1638
96
96
  dhisana/workflow/task.py,sha256=HlWz9mtrwLYByoSnePOemBUBrMEcj7KbgNjEE1oF5wo,1830
97
97
  dhisana/workflow/test.py,sha256=E7lRnXK0PguTNzyasHytLzTJdkqIPxG5_4qk4hMEeKc,3399
98
- dhisana-0.0.1.dev249.dist-info/METADATA,sha256=7jNYNMhElbz3HxRJiME1__0_1sRC_IIcFkb-RnpinYc,1190
99
- dhisana-0.0.1.dev249.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
100
- dhisana-0.0.1.dev249.dist-info/entry_points.txt,sha256=jujxteZmNI9EkEaK-pOCoWuBujU8TCevdkfl9ZcKHek,49
101
- dhisana-0.0.1.dev249.dist-info/top_level.txt,sha256=NETTHt6YifG_P7XtRHbQiXZlgSFk9Qh9aR-ng1XTf4s,8
102
- dhisana-0.0.1.dev249.dist-info/RECORD,,
98
+ dhisana-0.0.1.dev251.dist-info/METADATA,sha256=LbbiZTFJYpwScudL5nQgt_Seem5CqgywicGKDcJkBYI,1190
99
+ dhisana-0.0.1.dev251.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
100
+ dhisana-0.0.1.dev251.dist-info/entry_points.txt,sha256=jujxteZmNI9EkEaK-pOCoWuBujU8TCevdkfl9ZcKHek,49
101
+ dhisana-0.0.1.dev251.dist-info/top_level.txt,sha256=NETTHt6YifG_P7XtRHbQiXZlgSFk9Qh9aR-ng1XTf4s,8
102
+ dhisana-0.0.1.dev251.dist-info/RECORD,,