dhisana 0.0.1.dev235__tar.gz → 0.0.1.dev236__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/PKG-INFO +1 -1
  2. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/setup.py +1 -1
  3. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/enrich_lead_information.py +18 -2
  4. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/google_oauth_tools.py +51 -35
  5. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/google_workspace_tools.py +52 -35
  6. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/proxy_curl_tools.py +79 -13
  7. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana.egg-info/PKG-INFO +1 -1
  8. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/README.md +0 -0
  9. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/pyproject.toml +0 -0
  10. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/setup.cfg +0 -0
  11. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/__init__.py +0 -0
  12. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/cli/__init__.py +0 -0
  13. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/cli/cli.py +0 -0
  14. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/cli/datasets.py +0 -0
  15. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/cli/models.py +0 -0
  16. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/cli/predictions.py +0 -0
  17. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/schemas/__init__.py +0 -0
  18. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/schemas/common.py +0 -0
  19. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/schemas/sales.py +0 -0
  20. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/ui/__init__.py +0 -0
  21. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/ui/components.py +0 -0
  22. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/__init__.py +0 -0
  23. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/add_mapping.py +0 -0
  24. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/agent_tools.py +0 -0
  25. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/apollo_tools.py +0 -0
  26. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/assistant_tool_tag.py +0 -0
  27. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/built_with_api_tools.py +0 -0
  28. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/cache_output_tools.py +0 -0
  29. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/cache_output_tools_local.py +0 -0
  30. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/check_email_validity_tools.py +0 -0
  31. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/check_for_intent_signal.py +0 -0
  32. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/check_linkedin_url_validity.py +0 -0
  33. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/clay_tools.py +0 -0
  34. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/clean_properties.py +0 -0
  35. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/company_utils.py +0 -0
  36. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/compose_salesnav_query.py +0 -0
  37. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/compose_search_query.py +0 -0
  38. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/compose_three_step_workflow.py +0 -0
  39. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/composite_tools.py +0 -0
  40. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/dataframe_tools.py +0 -0
  41. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/domain_parser.py +0 -0
  42. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/email_body_utils.py +0 -0
  43. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/email_parse_helpers.py +0 -0
  44. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/email_provider.py +0 -0
  45. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/extract_email_content_for_llm.py +0 -0
  46. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/fetch_openai_config.py +0 -0
  47. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/field_validators.py +0 -0
  48. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/g2_tools.py +0 -0
  49. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/generate_content.py +0 -0
  50. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/generate_email.py +0 -0
  51. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/generate_email_response.py +0 -0
  52. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/generate_flow.py +0 -0
  53. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/generate_leads_salesnav.py +0 -0
  54. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/generate_linkedin_connect_message.py +0 -0
  55. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/generate_linkedin_response_message.py +0 -0
  56. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/generate_structured_output_internal.py +0 -0
  57. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/google_custom_search.py +0 -0
  58. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/hubspot_clearbit.py +0 -0
  59. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/hubspot_crm_tools.py +0 -0
  60. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/instantly_tools.py +0 -0
  61. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/linkedin_crawler.py +0 -0
  62. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/lusha_tools.py +0 -0
  63. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/mailgun_tools.py +0 -0
  64. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/microsoft365_tools.py +0 -0
  65. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/openai_assistant_and_file_utils.py +0 -0
  66. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/openai_helpers.py +0 -0
  67. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/openapi_spec_to_tools.py +0 -0
  68. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/openapi_tool/__init__.py +0 -0
  69. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/openapi_tool/api_models.py +0 -0
  70. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/openapi_tool/convert_openai_spec_to_tool.py +0 -0
  71. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/openapi_tool/openapi_tool.py +0 -0
  72. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/parse_linkedin_messages_txt.py +0 -0
  73. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/profile.py +0 -0
  74. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/proxycurl_search_leads.py +0 -0
  75. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/python_function_to_tools.py +0 -0
  76. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/research_lead.py +0 -0
  77. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/sales_navigator_crawler.py +0 -0
  78. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/salesforce_crm_tools.py +0 -0
  79. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/search_router.py +0 -0
  80. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/search_router_jobs.py +0 -0
  81. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/sendgrid_tools.py +0 -0
  82. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/serarch_router_local_business.py +0 -0
  83. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/serpapi_additional_tools.py +0 -0
  84. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/serpapi_google_jobs.py +0 -0
  85. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/serpapi_google_search.py +0 -0
  86. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/serpapi_local_business_search.py +0 -0
  87. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/serpapi_search_tools.py +0 -0
  88. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/serperdev_google_jobs.py +0 -0
  89. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/serperdev_local_business.py +0 -0
  90. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/serperdev_search.py +0 -0
  91. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/smtp_email_tools.py +0 -0
  92. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/test_connect.py +0 -0
  93. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/trasform_json.py +0 -0
  94. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/web_download_parse_tools.py +0 -0
  95. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/workflow_code_model.py +0 -0
  96. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/utils/zoominfo_tools.py +0 -0
  97. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/workflow/__init__.py +0 -0
  98. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/workflow/agent.py +0 -0
  99. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/workflow/flow.py +0 -0
  100. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/workflow/task.py +0 -0
  101. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana/workflow/test.py +0 -0
  102. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana.egg-info/SOURCES.txt +0 -0
  103. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana.egg-info/dependency_links.txt +0 -0
  104. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana.egg-info/entry_points.txt +0 -0
  105. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana.egg-info/requires.txt +0 -0
  106. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/src/dhisana.egg-info/top_level.txt +0 -0
  107. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/tests/test_agent_tools.py +0 -0
  108. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/tests/test_apollo_company_search.py +0 -0
  109. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/tests/test_apollo_lead_search.py +0 -0
  110. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/tests/test_connectivity.py +0 -0
  111. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/tests/test_email_body_utils.py +0 -0
  112. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/tests/test_google_document.py +0 -0
  113. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/tests/test_hubspot_call_logs.py +0 -0
  114. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/tests/test_linkedin_serper.py +0 -0
  115. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/tests/test_mcp_connectivity.py +0 -0
  116. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/tests/test_proxycurl_get_company_search_id.py +0 -0
  117. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/tests/test_proxycurl_job_count.py +0 -0
  118. {dhisana-0.0.1.dev235 → dhisana-0.0.1.dev236}/tests/test_structured_output_with_mcp.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dhisana
3
- Version: 0.0.1.dev235
3
+ Version: 0.0.1.dev236
4
4
  Summary: A Python SDK for Dhisana AI Platform
5
5
  Home-page: https://github.com/dhisana-ai/dhisana-python-sdk
6
6
  Author: Admin
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name='dhisana',
5
- version='0.0.1-dev235',
5
+ version='0.0.1-dev236',
6
6
  description='A Python SDK for Dhisana AI Platform',
7
7
  author='Admin',
8
8
  author_email='contact@dhisana.ai',
@@ -765,16 +765,32 @@ async def enrich_organization_info_from_company_url(
765
765
  organization_linkedin_url: str,
766
766
  use_strict_check: bool = True,
767
767
  tool_config: Optional[List[Dict[str, Any]]] = None,
768
+ categories: Optional[bool] = None,
769
+ funding_data: Optional[bool] = None,
770
+ exit_data: Optional[bool] = None,
771
+ acquisitions: Optional[bool] = None,
772
+ extra: Optional[bool] = None,
773
+ use_cache: Optional[str] = "if-present",
774
+ fallback_to_cache: Optional[str] = "on-error",
768
775
  ) -> Dict[str, Any]:
769
776
  """
770
777
  Given an organization LinkedIn URL, attempt to enrich its data (e.g. name, website)
771
- via ProxyCurl. If data is found, set domain, then return the dict. Otherwise, return {}.
778
+ via ProxyCurl. Additional Proxycurl Company API boolean flags (categories, funding_data, etc.)
779
+ can be supplied to control the returned payload (True -> "include"). If data is found,
780
+ set domain, then return the dict. Otherwise, return {}.
772
781
  """
773
782
 
774
783
  # Call ProxyCurl to enrich
775
784
  company_data = await enrich_organization_info_from_proxycurl(
776
785
  organization_linkedin_url=organization_linkedin_url,
777
- tool_config=tool_config
786
+ tool_config=tool_config,
787
+ categories=categories,
788
+ funding_data=funding_data,
789
+ exit_data=exit_data,
790
+ acquisitions=acquisitions,
791
+ extra=extra,
792
+ use_cache=use_cache,
793
+ fallback_to_cache=fallback_to_cache,
778
794
  )
779
795
 
780
796
  # If ProxyCurl returned any data, set domain, then return
@@ -194,46 +194,62 @@ async def list_emails_in_time_range_google_oauth_async(
194
194
  q_parts.extend([f"label:{lbl}" for lbl in context.labels])
195
195
  query = " ".join(q_parts)
196
196
 
197
- params = {"q": query}
197
+ params = {"q": query, "maxResults": 100}
198
198
 
199
199
  items: List[MessageItem] = []
200
+ max_fetch = 500 # defensive cap to avoid excessive paging
200
201
  async with httpx.AsyncClient(timeout=30) as client:
201
202
  try:
202
- list_resp = await client.get(base_url, headers=headers, params=params)
203
- list_resp.raise_for_status()
204
- list_data = list_resp.json() or {}
205
- for m in list_data.get("messages", []) or []:
206
- mid = m.get("id")
207
- tid = m.get("threadId")
208
- if not mid:
209
- continue
210
- get_url = f"{base_url}/{mid}"
211
- get_resp = await client.get(get_url, headers=headers)
212
- get_resp.raise_for_status()
213
- mdata = get_resp.json() or {}
214
-
215
- headers_list = (mdata.get("payload") or {}).get("headers", [])
216
- from_header = find_header(headers_list, "From") or ""
217
- subject_header = find_header(headers_list, "Subject") or ""
218
- date_header = find_header(headers_list, "Date") or ""
219
-
220
- iso_dt = convert_date_to_iso(date_header)
221
- s_name, s_email = parse_single_address(from_header)
222
- r_name, r_email = find_all_recipients_in_headers(headers_list)
223
-
224
- items.append(
225
- MessageItem(
226
- message_id=mdata.get("id", ""),
227
- thread_id=tid or "",
228
- sender_name=s_name,
229
- sender_email=s_email,
230
- receiver_name=r_name,
231
- receiver_email=r_email,
232
- iso_datetime=iso_dt,
233
- subject=subject_header,
234
- body=extract_email_body_in_plain_text(mdata),
203
+ next_page_token = None
204
+ while True:
205
+ page_params = dict(params)
206
+ if next_page_token:
207
+ page_params["pageToken"] = next_page_token
208
+
209
+ list_resp = await client.get(base_url, headers=headers, params=page_params)
210
+ list_resp.raise_for_status()
211
+ list_data = list_resp.json() or {}
212
+ for m in list_data.get("messages", []) or []:
213
+ if len(items) >= max_fetch:
214
+ break
215
+ mid = m.get("id")
216
+ tid = m.get("threadId")
217
+ if not mid:
218
+ continue
219
+ get_url = f"{base_url}/{mid}"
220
+ get_resp = await client.get(get_url, headers=headers)
221
+ get_resp.raise_for_status()
222
+ mdata = get_resp.json() or {}
223
+
224
+ headers_list = (mdata.get("payload") or {}).get("headers", [])
225
+ from_header = find_header(headers_list, "From") or ""
226
+ subject_header = find_header(headers_list, "Subject") or ""
227
+ date_header = find_header(headers_list, "Date") or ""
228
+
229
+ iso_dt = convert_date_to_iso(date_header)
230
+ s_name, s_email = parse_single_address(from_header)
231
+ r_name, r_email = find_all_recipients_in_headers(headers_list)
232
+
233
+ items.append(
234
+ MessageItem(
235
+ message_id=mdata.get("id", ""),
236
+ thread_id=tid or "",
237
+ sender_name=s_name,
238
+ sender_email=s_email,
239
+ receiver_name=r_name,
240
+ receiver_email=r_email,
241
+ iso_datetime=iso_dt,
242
+ subject=subject_header,
243
+ body=extract_email_body_in_plain_text(mdata),
244
+ )
235
245
  )
236
- )
246
+
247
+ if len(items) >= max_fetch:
248
+ break
249
+
250
+ next_page_token = list_data.get("nextPageToken")
251
+ if not next_page_token:
252
+ break
237
253
  except httpx.HTTPStatusError as exc:
238
254
  _rethrow_with_google_message(exc, "Gmail List OAuth")
239
255
 
@@ -241,47 +241,64 @@ async def list_emails_in_time_range_async(
241
241
  query += f' {label_query}'
242
242
 
243
243
  headers = {'Authorization': f'Bearer {access_token}'}
244
- params = {'q': query}
244
+ params = {'q': query, 'maxResults': 100}
245
245
 
246
246
  message_items: List[MessageItem] = []
247
+ max_fetch = 500 # defensive cap
247
248
  async with httpx.AsyncClient() as client:
248
- response = await client.get(gmail_api_url, headers=headers, params=params)
249
- response.raise_for_status()
250
- messages = response.json().get('messages', [])
251
-
252
- for msg in messages:
253
- message_id = msg['id']
254
- thread_id = msg['threadId']
255
- message_url = f'{gmail_api_url}/{message_id}'
256
- message_response = await client.get(message_url, headers=headers)
257
- message_response.raise_for_status()
258
- message_data = message_response.json()
259
-
260
- headers_list = message_data['payload']['headers']
261
- from_header = find_header(headers_list, 'From') or ""
262
- subject_header = find_header(headers_list, 'Subject') or ""
263
- date_header = find_header(headers_list, 'Date') or ""
264
-
265
- iso_datetime_str = convert_date_to_iso(date_header)
249
+ next_page_token = None
250
+ while True:
251
+ page_params = dict(params)
252
+ if next_page_token:
253
+ page_params["pageToken"] = next_page_token
266
254
 
267
- # Parse the "From" into (sender_name, sender_email)
268
- s_name, s_email = parse_single_address(from_header)
255
+ response = await client.get(gmail_api_url, headers=headers, params=page_params)
256
+ response.raise_for_status()
257
+ resp_json = response.json() or {}
258
+ messages = resp_json.get('messages', [])
259
+
260
+ for msg in messages:
261
+ if len(message_items) >= max_fetch:
262
+ break
263
+ message_id = msg['id']
264
+ thread_id = msg.get('threadId', "")
265
+ message_url = f'{gmail_api_url}/{message_id}'
266
+ message_response = await client.get(message_url, headers=headers)
267
+ message_response.raise_for_status()
268
+ message_data = message_response.json()
269
+
270
+ headers_list = message_data['payload']['headers']
271
+ from_header = find_header(headers_list, 'From') or ""
272
+ subject_header = find_header(headers_list, 'Subject') or ""
273
+ date_header = find_header(headers_list, 'Date') or ""
274
+
275
+ iso_datetime_str = convert_date_to_iso(date_header)
276
+
277
+ # Parse the "From" into (sender_name, sender_email)
278
+ s_name, s_email = parse_single_address(from_header)
279
+
280
+ # Parse the recipients
281
+ r_name, r_email = find_all_recipients_in_headers(headers_list)
282
+
283
+ msg_item = MessageItem(
284
+ message_id=message_data['id'],
285
+ thread_id=thread_id,
286
+ sender_name=s_name,
287
+ sender_email=s_email,
288
+ receiver_name=r_name,
289
+ receiver_email=r_email,
290
+ iso_datetime=iso_datetime_str,
291
+ subject=subject_header,
292
+ body=extract_email_body_in_plain_text(message_data)
293
+ )
294
+ message_items.append(msg_item)
269
295
 
270
- # Parse the recipients
271
- r_name, r_email = find_all_recipients_in_headers(headers_list)
296
+ if len(message_items) >= max_fetch:
297
+ break
272
298
 
273
- msg_item = MessageItem(
274
- message_id=message_data['id'],
275
- thread_id=thread_id,
276
- sender_name=s_name,
277
- sender_email=s_email,
278
- receiver_name=r_name,
279
- receiver_email=r_email,
280
- iso_datetime=iso_datetime_str,
281
- subject=subject_header,
282
- body=extract_email_body_in_plain_text(message_data)
283
- )
284
- message_items.append(msg_item)
299
+ next_page_token = resp_json.get("nextPageToken")
300
+ if not next_page_token:
301
+ break
285
302
 
286
303
  return message_items
287
304
 
@@ -273,6 +273,46 @@ def transform_company_data(data: dict) -> dict:
273
273
  return transformed
274
274
 
275
275
 
276
+ def _build_company_profile_params(
277
+ company_url: str,
278
+ profile_flags: Dict[str, Optional[str]],
279
+ ) -> Dict[str, str]:
280
+ """
281
+ Build request params for the Enrichlayer company profile endpoint,
282
+ ensuring we only forward flags that were explicitly provided.
283
+ """
284
+ params: Dict[str, str] = {'url': company_url}
285
+ for key, value in profile_flags.items():
286
+ if value is not None:
287
+ params[key] = value
288
+ return params
289
+
290
+
291
+ def _build_company_cache_key(identifier: str, profile_flags: Dict[str, Optional[str]]) -> str:
292
+ """
293
+ Builds a cache key that is unique for the combination of identifier
294
+ (LinkedIn URL or domain) and the optional enrichment flags.
295
+ """
296
+ suffix_bits = [
297
+ f"{key}={value}"
298
+ for key, value in sorted(profile_flags.items())
299
+ if value is not None
300
+ ]
301
+ if suffix_bits:
302
+ return f"{identifier}|{'&'.join(suffix_bits)}"
303
+ return identifier
304
+
305
+
306
+ def _bool_to_include_exclude(value: Optional[bool]) -> Optional[str]:
307
+ """
308
+ Convert a boolean flag into the string literals expected by Proxycurl.
309
+ True -> "include", False -> "exclude", None -> None (omit parameter).
310
+ """
311
+ if value is None:
312
+ return None
313
+ return "include" if value else "exclude"
314
+
315
+
276
316
  @backoff.on_exception(
277
317
  backoff.expo,
278
318
  aiohttp.ClientResponseError,
@@ -283,10 +323,27 @@ def transform_company_data(data: dict) -> dict:
283
323
  async def enrich_organization_info_from_proxycurl(
284
324
  organization_domain: Optional[str] = None,
285
325
  organization_linkedin_url: Optional[str] = None,
286
- tool_config: Optional[List[Dict]] = None
326
+ tool_config: Optional[List[Dict]] = None,
327
+ categories: Optional[bool] = None,
328
+ funding_data: Optional[bool] = None,
329
+ exit_data: Optional[bool] = None,
330
+ acquisitions: Optional[bool] = None,
331
+ extra: Optional[bool] = None,
332
+ use_cache: Optional[str] = "if-present",
333
+ fallback_to_cache: Optional[str] = "on-error",
287
334
  ) -> Dict:
288
335
  """
289
336
  Fetch an organization's details from Proxycurl using either the organization domain or LinkedIn URL.
337
+ Additional keyword parameters map directly to the Enrichlayer Company Profile endpoint.
338
+
339
+ Args:
340
+ organization_domain: Organization's domain name to resolve via Proxycurl.
341
+ organization_linkedin_url: LinkedIn company profile URL.
342
+ tool_config: Optional tool configuration metadata for credential lookup.
343
+ categories/funding_data/exit_data/acquisitions/extra: Set True to request
344
+ "include", False for "exclude", or None to omit.
345
+ use_cache: Controls Proxycurl caching behaviour (e.g. "if-present").
346
+ fallback_to_cache: Controls Proxycurl cache fallback behaviour (e.g. "on-error").
290
347
 
291
348
  Returns:
292
349
  dict: Transformed JSON response containing organization information,
@@ -308,6 +365,16 @@ async def enrich_organization_info_from_proxycurl(
308
365
  logger.warning("No organization domain or LinkedIn URL provided.")
309
366
  return {}
310
367
 
368
+ profile_flags: Dict[str, Optional[str]] = {
369
+ "categories": _bool_to_include_exclude(categories),
370
+ "funding_data": _bool_to_include_exclude(funding_data),
371
+ "exit_data": _bool_to_include_exclude(exit_data),
372
+ "acquisitions": _bool_to_include_exclude(acquisitions),
373
+ "extra": _bool_to_include_exclude(extra),
374
+ "use_cache": use_cache,
375
+ "fallback_to_cache": fallback_to_cache,
376
+ }
377
+
311
378
  # If LinkedIn URL is provided, standardize it and fetch data
312
379
  if organization_linkedin_url:
313
380
  logger.debug(f"Organization LinkedIn URL provided: {organization_linkedin_url}")
@@ -330,8 +397,9 @@ async def enrich_organization_info_from_proxycurl(
330
397
  if standardized_url and not standardized_url.endswith('/'):
331
398
  standardized_url += '/'
332
399
 
400
+ cache_key = _build_company_cache_key(standardized_url, profile_flags)
333
401
  # Check cache for standardized LinkedIn URL
334
- cached_response = retrieve_output("enrich_organization_info_from_proxycurl", standardized_url)
402
+ cached_response = retrieve_output("enrich_organization_info_from_proxycurl", cache_key)
335
403
  if cached_response is not None:
336
404
  logger.info(f"Cache hit for organization LinkedIn URL: {standardized_url}")
337
405
  cached_response = transform_company_data(cached_response)
@@ -339,11 +407,7 @@ async def enrich_organization_info_from_proxycurl(
339
407
 
340
408
  # Fetch details using standardized LinkedIn URL
341
409
  url = 'https://enrichlayer.com/api/v2/company'
342
- params = {
343
- 'url': standardized_url,
344
- 'use_cache': 'if-present',
345
- 'fallback_to_cache': 'on-error',
346
- }
410
+ params = _build_company_profile_params(standardized_url, profile_flags)
347
411
  logger.debug(f"Making request to Proxycurl with params: {params}")
348
412
 
349
413
  async with aiohttp.ClientSession() as session:
@@ -353,7 +417,7 @@ async def enrich_organization_info_from_proxycurl(
353
417
  if response.status == 200:
354
418
  result = await response.json()
355
419
  transformed_result = transform_company_data(result)
356
- cache_output("enrich_organization_info_from_proxycurl", standardized_url, transformed_result)
420
+ cache_output("enrich_organization_info_from_proxycurl", cache_key, transformed_result)
357
421
  logger.info("Successfully retrieved and transformed organization info from Proxycurl by LinkedIn URL.")
358
422
  return transformed_result
359
423
  elif response.status == 429:
@@ -367,7 +431,7 @@ async def enrich_organization_info_from_proxycurl(
367
431
  f"Proxycurl organization profile not found for LinkedIn URL {standardized_url}: {error_text}"
368
432
  )
369
433
  cache_output(
370
- "enrich_organization_info_from_proxycurl", standardized_url, {}
434
+ "enrich_organization_info_from_proxycurl", cache_key, {}
371
435
  )
372
436
  return {}
373
437
  else:
@@ -383,7 +447,8 @@ async def enrich_organization_info_from_proxycurl(
383
447
  # If organization domain is provided, resolve domain to LinkedIn URL and fetch data
384
448
  if organization_domain:
385
449
  logger.debug(f"Organization domain provided: {organization_domain}")
386
- cached_response = retrieve_output("enrich_organization_info_from_proxycurl", organization_domain)
450
+ domain_cache_key = _build_company_cache_key(organization_domain, profile_flags)
451
+ cached_response = retrieve_output("enrich_organization_info_from_proxycurl", domain_cache_key)
387
452
  if cached_response is not None:
388
453
  logger.info(f"Cache hit for organization domain: {organization_domain}")
389
454
  return cached_response
@@ -414,12 +479,13 @@ async def enrich_organization_info_from_proxycurl(
414
479
 
415
480
  profile_url = 'https://enrichlayer.com/api/v2/company'
416
481
  try:
417
- async with session.get(profile_url, headers=HEADERS, params={'url': standardized_url}) as profile_response:
482
+ profile_params = _build_company_profile_params(standardized_url, profile_flags)
483
+ async with session.get(profile_url, headers=HEADERS, params=profile_params) as profile_response:
418
484
  logger.debug(f"Received profile response status: {profile_response.status}")
419
485
  if profile_response.status == 200:
420
486
  result = await profile_response.json()
421
487
  transformed_result = transform_company_data(result)
422
- cache_output("enrich_organization_info_from_proxycurl", organization_domain, transformed_result)
488
+ cache_output("enrich_organization_info_from_proxycurl", domain_cache_key, transformed_result)
423
489
  logger.info("Successfully retrieved and transformed organization info from Proxycurl by domain.")
424
490
  return transformed_result
425
491
  elif profile_response.status == 429:
@@ -445,7 +511,7 @@ async def enrich_organization_info_from_proxycurl(
445
511
  elif response.status == 404:
446
512
  msg = "Item not found"
447
513
  logger.warning(msg)
448
- cache_output("enrich_organization_info_from_proxycurl", organization_domain, {})
514
+ cache_output("enrich_organization_info_from_proxycurl", domain_cache_key, {})
449
515
  return {}
450
516
  else:
451
517
  error_text = await response.text()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dhisana
3
- Version: 0.0.1.dev235
3
+ Version: 0.0.1.dev236
4
4
  Summary: A Python SDK for Dhisana AI Platform
5
5
  Home-page: https://github.com/dhisana-ai/dhisana-python-sdk
6
6
  Author: Admin
File without changes
File without changes