dhisana 0.0.1.dev85__py3-none-any.whl → 0.0.1.dev236__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. dhisana/schemas/common.py +33 -0
  2. dhisana/schemas/sales.py +224 -23
  3. dhisana/utils/add_mapping.py +72 -63
  4. dhisana/utils/apollo_tools.py +739 -109
  5. dhisana/utils/built_with_api_tools.py +4 -2
  6. dhisana/utils/cache_output_tools.py +23 -23
  7. dhisana/utils/check_email_validity_tools.py +456 -458
  8. dhisana/utils/check_for_intent_signal.py +1 -2
  9. dhisana/utils/check_linkedin_url_validity.py +34 -8
  10. dhisana/utils/clay_tools.py +3 -2
  11. dhisana/utils/clean_properties.py +3 -1
  12. dhisana/utils/compose_salesnav_query.py +0 -1
  13. dhisana/utils/compose_search_query.py +7 -3
  14. dhisana/utils/composite_tools.py +0 -1
  15. dhisana/utils/dataframe_tools.py +2 -2
  16. dhisana/utils/email_body_utils.py +72 -0
  17. dhisana/utils/email_provider.py +375 -0
  18. dhisana/utils/enrich_lead_information.py +585 -85
  19. dhisana/utils/fetch_openai_config.py +129 -0
  20. dhisana/utils/field_validators.py +1 -1
  21. dhisana/utils/g2_tools.py +0 -1
  22. dhisana/utils/generate_content.py +0 -1
  23. dhisana/utils/generate_email.py +69 -16
  24. dhisana/utils/generate_email_response.py +298 -41
  25. dhisana/utils/generate_flow.py +0 -1
  26. dhisana/utils/generate_linkedin_connect_message.py +19 -6
  27. dhisana/utils/generate_linkedin_response_message.py +156 -65
  28. dhisana/utils/generate_structured_output_internal.py +351 -131
  29. dhisana/utils/google_custom_search.py +150 -44
  30. dhisana/utils/google_oauth_tools.py +721 -0
  31. dhisana/utils/google_workspace_tools.py +391 -25
  32. dhisana/utils/hubspot_clearbit.py +3 -1
  33. dhisana/utils/hubspot_crm_tools.py +771 -167
  34. dhisana/utils/instantly_tools.py +3 -1
  35. dhisana/utils/lusha_tools.py +10 -7
  36. dhisana/utils/mailgun_tools.py +150 -0
  37. dhisana/utils/microsoft365_tools.py +447 -0
  38. dhisana/utils/openai_assistant_and_file_utils.py +121 -177
  39. dhisana/utils/openai_helpers.py +19 -16
  40. dhisana/utils/parse_linkedin_messages_txt.py +2 -3
  41. dhisana/utils/profile.py +37 -0
  42. dhisana/utils/proxy_curl_tools.py +507 -206
  43. dhisana/utils/proxycurl_search_leads.py +426 -0
  44. dhisana/utils/research_lead.py +121 -68
  45. dhisana/utils/sales_navigator_crawler.py +1 -6
  46. dhisana/utils/salesforce_crm_tools.py +323 -50
  47. dhisana/utils/search_router.py +131 -0
  48. dhisana/utils/search_router_jobs.py +51 -0
  49. dhisana/utils/sendgrid_tools.py +126 -91
  50. dhisana/utils/serarch_router_local_business.py +75 -0
  51. dhisana/utils/serpapi_additional_tools.py +290 -0
  52. dhisana/utils/serpapi_google_jobs.py +117 -0
  53. dhisana/utils/serpapi_google_search.py +188 -0
  54. dhisana/utils/serpapi_local_business_search.py +129 -0
  55. dhisana/utils/serpapi_search_tools.py +363 -432
  56. dhisana/utils/serperdev_google_jobs.py +125 -0
  57. dhisana/utils/serperdev_local_business.py +154 -0
  58. dhisana/utils/serperdev_search.py +233 -0
  59. dhisana/utils/smtp_email_tools.py +576 -0
  60. dhisana/utils/test_connect.py +1765 -92
  61. dhisana/utils/trasform_json.py +95 -16
  62. dhisana/utils/web_download_parse_tools.py +0 -1
  63. dhisana/utils/zoominfo_tools.py +2 -3
  64. dhisana/workflow/test.py +1 -1
  65. {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/METADATA +5 -2
  66. dhisana-0.0.1.dev236.dist-info/RECORD +100 -0
  67. {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/WHEEL +1 -1
  68. dhisana-0.0.1.dev85.dist-info/RECORD +0 -81
  69. {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/entry_points.txt +0 -0
  70. {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/top_level.txt +0 -0
@@ -1,12 +1,14 @@
1
1
  import base64
2
2
  import csv
3
3
  import datetime
4
+ import html as html_lib
4
5
  import io
5
6
  import json
6
7
  import logging
7
8
  import os
8
9
  import re
9
10
  import uuid
11
+ from email.mime.multipart import MIMEMultipart
10
12
  from email.mime.text import MIMEText
11
13
  from typing import Any, Dict, List, Optional
12
14
 
@@ -22,6 +24,10 @@ from googleapiclient.http import MediaFileUpload, MediaIoBaseDownload
22
24
  from dhisana.schemas.sales import MessageItem
23
25
  from dhisana.utils.assistant_tool_tag import assistant_tool
24
26
  from dhisana.utils.email_parse_helpers import *
27
+ from dhisana.utils.email_body_utils import body_variants
28
+ import asyncio
29
+ from dhisana.schemas.common import (SendEmailContext, QueryEmailContext, ReplyEmailContext, BodyFormat)
30
+
25
31
 
26
32
  ################################################################################
27
33
  # HELPER FUNCTIONS
@@ -40,7 +46,7 @@ def get_google_workspace_token(tool_config: Optional[List[Dict]] = None) -> str:
40
46
  str: The base64-encoded JSON string for the service account credentials.
41
47
 
42
48
  Raises:
43
- ValueError: If the key is not found in the tool configuration or environment variable.
49
+ ValueError: If the Google Workspace integration has not been configured.
44
50
  """
45
51
  if tool_config:
46
52
  google_workspace_config = next(
@@ -58,12 +64,14 @@ def get_google_workspace_token(tool_config: Optional[List[Dict]] = None) -> str:
58
64
  else:
59
65
  GOOGLE_SERVICE_KEY = None
60
66
 
61
- if not GOOGLE_SERVICE_KEY:
67
+ if not GOOGLE_SERVICE_KEY:
62
68
  env_service_key = os.getenv("GOOGLE_SERVICE_KEY")
63
- if env_service_key:
69
+ if env_service_key:
64
70
  GOOGLE_SERVICE_KEY = base64.b64decode(env_service_key).decode("utf-8")
65
71
  if not GOOGLE_SERVICE_KEY:
66
- raise ValueError("GOOGLE_SERVICE_KEY not found in tool_config or environment variable.")
72
+ raise ValueError(
73
+ "Google Workspace integration is not configured. Please configure the connection to Google Workspace in Integrations."
74
+ )
67
75
  return GOOGLE_SERVICE_KEY
68
76
 
69
77
 
@@ -104,6 +112,197 @@ def get_google_credentials(
104
112
  return credentials
105
113
 
106
114
 
115
+ def _looks_like_html(text: str) -> bool:
116
+ """Heuristically determine whether the body contains HTML markup."""
117
+ return bool(text and re.search(r"<[a-zA-Z][^>]*>", text))
118
+
119
+
120
+ def _html_to_plain_text(html: str) -> str:
121
+ """
122
+ Produce a very lightweight plain-text version of an HTML fragment.
123
+ This keeps newlines on block boundaries and strips tags.
124
+ """
125
+ if not html:
126
+ return ""
127
+ text = re.sub(r"(?is)<(script|style).*?>.*?</\1>", " ", html)
128
+ text = re.sub(r"(?i)<br\s*/?>", "\n", text)
129
+ text = re.sub(r"(?i)</(p|div|li|h[1-6])\s*>", "\n", text)
130
+ text = re.sub(r"(?is)<.*?>", "", text)
131
+ text = html_lib.unescape(text)
132
+ text = re.sub(r"\s+\n", "\n", text)
133
+ text = re.sub(r"\n{3,}", "\n\n", text)
134
+ return text.strip()
135
+
136
+
137
+
138
+ @assistant_tool
139
+ async def send_email_using_service_account_async(
140
+ send_email_context: SendEmailContext,
141
+ tool_config: Optional[List[Dict]] = None
142
+ ) -> str:
143
+ """
144
+ Asynchronously sends an email using the Gmail API with a service account.
145
+ The service account must have domain-wide delegation to impersonate the sender_email.
146
+
147
+ Args:
148
+ send_email_context (SendEmailContext): The context with recipient, subject,
149
+ body, sender_name, sender_email,
150
+ and an optional labels list.
151
+ tool_config (Optional[List[Dict]]): Tool configuration for credentials (if any).
152
+
153
+ Returns:
154
+ str: The ID of the sent message.
155
+ """
156
+ if not send_email_context.sender_email:
157
+ raise ValueError("sender_email is required to impersonate for sending.")
158
+
159
+ SCOPES = ['https://mail.google.com/']
160
+ credentials = get_google_credentials(send_email_context.sender_email, SCOPES, tool_config)
161
+ access_token = credentials.token
162
+
163
+ gmail_api_url = 'https://gmail.googleapis.com/gmail/v1/users/me/messages/send'
164
+
165
+ plain_body, html_body, resolved_fmt = body_variants(
166
+ send_email_context.body,
167
+ getattr(send_email_context, "body_format", None),
168
+ )
169
+
170
+ if resolved_fmt == "text":
171
+ message = MIMEText(plain_body, _subtype="plain", _charset="utf-8")
172
+ else:
173
+ # Gmail prefers multipart/alternative when HTML is present.
174
+ message = MIMEMultipart("alternative")
175
+ message.attach(MIMEText(plain_body, "plain", _charset="utf-8"))
176
+ message.attach(MIMEText(html_body, "html", _charset="utf-8"))
177
+
178
+ message['to'] = send_email_context.recipient
179
+ message['from'] = f"{send_email_context.sender_name} <{send_email_context.sender_email}>"
180
+ message['subject'] = send_email_context.subject
181
+
182
+ # Base64-encode the message
183
+ raw_message = base64.urlsafe_b64encode(message.as_bytes()).decode()
184
+
185
+ # Build the payload (with optional label IDs)
186
+ payload = {
187
+ 'raw': raw_message
188
+ }
189
+ if send_email_context.labels:
190
+ payload['labelIds'] = send_email_context.labels
191
+
192
+ headers = {
193
+ 'Authorization': f'Bearer {access_token}',
194
+ 'Content-Type': 'application/json'
195
+ }
196
+
197
+ async with httpx.AsyncClient() as client:
198
+ response = await client.post(gmail_api_url, headers=headers, json=payload)
199
+ response.raise_for_status()
200
+ sent_message = response.json()
201
+ await asyncio.sleep(20)
202
+
203
+ return sent_message.get('id', 'No ID returned')
204
+
205
+
206
+
207
+
208
+ @assistant_tool
209
+ async def list_emails_in_time_range_async(
210
+ context: QueryEmailContext,
211
+ tool_config: Optional[List[Dict]] = None
212
+ ) -> List[MessageItem]:
213
+ """
214
+ Asynchronously lists emails in a given time range using the Gmail API with a service account.
215
+ Returns a list of MessageItem objects, with iso_datetime, and separate sender/receiver fields.
216
+ """
217
+ if context.labels is None:
218
+ context.labels = []
219
+
220
+ if not context.sender_email:
221
+ raise ValueError("sender_email is required to impersonate for listing emails.")
222
+
223
+ SCOPES = ['https://mail.google.com/']
224
+ credentials = get_google_credentials(context.sender_email, SCOPES, tool_config)
225
+ access_token = credentials.token
226
+
227
+ gmail_api_url = 'https://gmail.googleapis.com/gmail/v1/users/me/messages'
228
+
229
+ # Convert RFC 3339 times to Unix epoch timestamps for the search query
230
+ start_dt = datetime.datetime.fromisoformat(context.start_time.replace('Z', '+00:00'))
231
+ end_dt = datetime.datetime.fromisoformat(context.end_time.replace('Z', '+00:00'))
232
+ start_timestamp = int(start_dt.timestamp())
233
+ end_timestamp = int(end_dt.timestamp())
234
+
235
+ # Build the search query
236
+ query = f'after:{start_timestamp} before:{end_timestamp}'
237
+ if context.unread_only:
238
+ query += ' is:unread'
239
+ if context.labels:
240
+ label_query = ' '.join([f'label:{lbl}' for lbl in context.labels])
241
+ query += f' {label_query}'
242
+
243
+ headers = {'Authorization': f'Bearer {access_token}'}
244
+ params = {'q': query, 'maxResults': 100}
245
+
246
+ message_items: List[MessageItem] = []
247
+ max_fetch = 500 # defensive cap
248
+ async with httpx.AsyncClient() as client:
249
+ next_page_token = None
250
+ while True:
251
+ page_params = dict(params)
252
+ if next_page_token:
253
+ page_params["pageToken"] = next_page_token
254
+
255
+ response = await client.get(gmail_api_url, headers=headers, params=page_params)
256
+ response.raise_for_status()
257
+ resp_json = response.json() or {}
258
+ messages = resp_json.get('messages', [])
259
+
260
+ for msg in messages:
261
+ if len(message_items) >= max_fetch:
262
+ break
263
+ message_id = msg['id']
264
+ thread_id = msg.get('threadId', "")
265
+ message_url = f'{gmail_api_url}/{message_id}'
266
+ message_response = await client.get(message_url, headers=headers)
267
+ message_response.raise_for_status()
268
+ message_data = message_response.json()
269
+
270
+ headers_list = message_data['payload']['headers']
271
+ from_header = find_header(headers_list, 'From') or ""
272
+ subject_header = find_header(headers_list, 'Subject') or ""
273
+ date_header = find_header(headers_list, 'Date') or ""
274
+
275
+ iso_datetime_str = convert_date_to_iso(date_header)
276
+
277
+ # Parse the "From" into (sender_name, sender_email)
278
+ s_name, s_email = parse_single_address(from_header)
279
+
280
+ # Parse the recipients
281
+ r_name, r_email = find_all_recipients_in_headers(headers_list)
282
+
283
+ msg_item = MessageItem(
284
+ message_id=message_data['id'],
285
+ thread_id=thread_id,
286
+ sender_name=s_name,
287
+ sender_email=s_email,
288
+ receiver_name=r_name,
289
+ receiver_email=r_email,
290
+ iso_datetime=iso_datetime_str,
291
+ subject=subject_header,
292
+ body=extract_email_body_in_plain_text(message_data)
293
+ )
294
+ message_items.append(msg_item)
295
+
296
+ if len(message_items) >= max_fetch:
297
+ break
298
+
299
+ next_page_token = resp_json.get("nextPageToken")
300
+ if not next_page_token:
301
+ break
302
+
303
+ return message_items
304
+
305
+
107
306
  ################################################################################
108
307
  # GOOGLE DRIVE FILE OPERATIONS
109
308
  ################################################################################
@@ -331,6 +530,7 @@ class SendEmailContext(BaseModel):
331
530
  sender_name: str
332
531
  sender_email: str
333
532
  labels: Optional[List[str]]
533
+ body_format: BodyFormat = BodyFormat.AUTO
334
534
 
335
535
  @assistant_tool
336
536
  async def send_email_using_service_account_async(
@@ -359,8 +559,18 @@ async def send_email_using_service_account_async(
359
559
 
360
560
  gmail_api_url = 'https://gmail.googleapis.com/gmail/v1/users/me/messages/send'
361
561
 
562
+ plain_body, html_body, resolved_fmt = body_variants(
563
+ send_email_context.body,
564
+ getattr(send_email_context, "body_format", None),
565
+ )
566
+
362
567
  # Construct the MIME text message
363
- message = MIMEText(send_email_context.body)
568
+ if resolved_fmt == "text":
569
+ message = MIMEText(plain_body, _subtype="plain", _charset="utf-8")
570
+ else:
571
+ message = MIMEMultipart("alternative")
572
+ message.attach(MIMEText(plain_body, "plain", _charset="utf-8"))
573
+ message.attach(MIMEText(html_body, "html", _charset="utf-8"))
364
574
  message['to'] = send_email_context.recipient
365
575
  message['from'] = f"{send_email_context.sender_name} <{send_email_context.sender_email}>"
366
576
  message['subject'] = send_email_context.subject
@@ -384,6 +594,7 @@ async def send_email_using_service_account_async(
384
594
  response = await client.post(gmail_api_url, headers=headers, json=payload)
385
595
  response.raise_for_status()
386
596
  sent_message = response.json()
597
+ await asyncio.sleep(20)
387
598
 
388
599
  return sent_message.get('id', 'No ID returned')
389
600
 
@@ -443,6 +654,7 @@ async def list_emails_in_time_range_async(
443
654
 
444
655
  for msg in messages:
445
656
  message_id = msg['id']
657
+ thread_id = msg['threadId']
446
658
  message_url = f'{gmail_api_url}/{message_id}'
447
659
  message_response = await client.get(message_url, headers=headers)
448
660
  message_response.raise_for_status()
@@ -462,7 +674,8 @@ async def list_emails_in_time_range_async(
462
674
  r_name, r_email = find_all_recipients_in_headers(headers_list)
463
675
 
464
676
  msg_item = MessageItem(
465
- message_id=message_data['id'],
677
+ message_id=message_data['id'],
678
+ thread_id=thread_id,
466
679
  sender_name=s_name,
467
680
  sender_email=s_email,
468
681
  receiver_name=r_name,
@@ -526,6 +739,7 @@ async def fetch_last_n_sent_messages(
526
739
 
527
740
  msg_item = MessageItem(
528
741
  message_id=message_data['id'],
742
+ thread_id=message_data['threadId'],
529
743
  sender_name=s_name,
530
744
  sender_email=s_email,
531
745
  receiver_name=r_name,
@@ -589,6 +803,7 @@ async def fetch_last_n_received_messages(
589
803
 
590
804
  msg_item = MessageItem(
591
805
  message_id=message_data['id'],
806
+ thread_id=message_data['threadId'],
592
807
  sender_name=s_name,
593
808
  sender_email=s_email,
594
809
  receiver_name=r_name,
@@ -641,6 +856,7 @@ async def get_email_details_async(
641
856
 
642
857
  msg_item = MessageItem(
643
858
  message_id=message_data['id'],
859
+ thread_id=message_data['threadId'],
644
860
  sender_name=s_name,
645
861
  sender_email=s_email,
646
862
  receiver_name=r_name,
@@ -654,14 +870,6 @@ async def get_email_details_async(
654
870
 
655
871
 
656
872
 
657
- class ReplyEmailContext(BaseModel):
658
- message_id: str
659
- reply_body: str
660
- sender_email: str
661
- sender_name: str
662
- mark_as_read: str = "True"
663
- add_labels: Optional[List[str]] = None
664
-
665
873
  @assistant_tool
666
874
  async def reply_to_email_async(
667
875
  reply_email_context: ReplyEmailContext,
@@ -704,20 +912,55 @@ async def reply_to_email_async(
704
912
  original_message = response.json()
705
913
 
706
914
  headers_list = original_message.get('payload', {}).get('headers', [])
707
- headers_dict = {h['name']: h['value'] for h in headers_list}
708
- thread_id = original_message.get('threadId')
709
-
710
- # 2. Prepare reply headers
711
- subject = headers_dict.get('Subject', '')
915
+ # Case-insensitive header lookup and resilient recipient fallback to avoid Gmail 400s.
916
+ subject = find_header(headers_list, 'Subject') or ''
712
917
  if not subject.startswith('Re:'):
713
918
  subject = f'Re: {subject}'
919
+ reply_to_header = find_header(headers_list, 'Reply-To') or ''
920
+ from_header = find_header(headers_list, 'From') or ''
921
+ to_header = find_header(headers_list, 'To') or ''
922
+ cc_header = find_header(headers_list, 'Cc') or ''
923
+ message_id_header = find_header(headers_list, 'Message-ID') or ''
924
+ thread_id = original_message.get('threadId')
925
+
926
+ sender_email_lc = (reply_email_context.sender_email or '').lower()
927
+
928
+ def _is_self(addr: str) -> bool:
929
+ return bool(sender_email_lc) and sender_email_lc in addr.lower()
714
930
 
715
- to_addresses = headers_dict.get('From', '')
716
- cc_addresses = headers_dict.get('Cc', '')
717
- message_id_header = headers_dict.get('Message-ID', '')
931
+ cc_addresses = cc_header or ''
932
+ if reply_to_header and not _is_self(reply_to_header):
933
+ to_addresses = reply_to_header
934
+ elif from_header and not _is_self(from_header):
935
+ to_addresses = from_header
936
+ elif to_header and not _is_self(to_header):
937
+ to_addresses = to_header
938
+ else:
939
+ combined = ", ".join([v for v in (to_header, cc_header, from_header) if v])
940
+ to_addresses = combined
941
+ cc_addresses = ''
942
+
943
+ if (not to_addresses or _is_self(to_addresses)) and reply_email_context.fallback_recipient:
944
+ if not _is_self(reply_email_context.fallback_recipient):
945
+ to_addresses = reply_email_context.fallback_recipient
946
+ cc_addresses = ''
947
+
948
+ if not to_addresses or _is_self(to_addresses):
949
+ raise ValueError(
950
+ "No valid recipient found in the original message; refusing to reply to sender."
951
+ )
718
952
 
719
953
  # 3. Create the reply email message
720
- msg = MIMEText(reply_email_context.reply_body)
954
+ plain_reply, html_reply, resolved_reply_fmt = body_variants(
955
+ reply_email_context.reply_body,
956
+ getattr(reply_email_context, "reply_body_format", None),
957
+ )
958
+ if resolved_reply_fmt == "text":
959
+ msg = MIMEText(plain_reply, _subtype="plain", _charset="utf-8")
960
+ else:
961
+ msg = MIMEMultipart("alternative")
962
+ msg.attach(MIMEText(plain_reply, "plain", _charset="utf-8"))
963
+ msg.attach(MIMEText(html_reply, "html", _charset="utf-8"))
721
964
  msg['To'] = to_addresses
722
965
  if cc_addresses:
723
966
  msg['Cc'] = cc_addresses
@@ -836,6 +1079,34 @@ async def get_calendar_events_using_service_account_async(
836
1079
 
837
1080
  return events
838
1081
 
1082
+ def get_google_sheet_token(tool_config: Optional[List[Dict]] = None) -> str:
1083
+ """
1084
+ Retrieves the Google Sheets API key from the provided tool configuration or
1085
+ the environment variable ``GOOGLE_SHEETS_API_KEY``.
1086
+
1087
+ Raises:
1088
+ ValueError: If the Google Sheets integration has not been configured.
1089
+ """
1090
+ GOOGLE_SHEETS_API_KEY = None
1091
+ if tool_config:
1092
+ google_sheet_config = next(
1093
+ (item for item in tool_config if item.get("name") == "google_sheets"), None
1094
+ )
1095
+ if google_sheet_config:
1096
+ config_map = {
1097
+ item["name"]: item["value"]
1098
+ for item in google_sheet_config.get("configuration", [])
1099
+ if item
1100
+ }
1101
+ GOOGLE_SHEETS_API_KEY = config_map.get("apiKey")
1102
+
1103
+ GOOGLE_SHEETS_API_KEY = GOOGLE_SHEETS_API_KEY or os.getenv("GOOGLE_SHEETS_API_KEY")
1104
+ if not GOOGLE_SHEETS_API_KEY:
1105
+ raise ValueError(
1106
+ "Google Sheets integration is not configured. Please configure the connection to Google Sheets in Integrations."
1107
+ )
1108
+ return GOOGLE_SHEETS_API_KEY
1109
+
839
1110
  def get_sheet_id_from_url(sheet_url: str) -> str:
840
1111
  """
841
1112
  Extract the spreadsheet ID from a typical Google Sheets URL.
@@ -848,6 +1119,53 @@ def get_sheet_id_from_url(sheet_url: str) -> str:
848
1119
  raise ValueError("Could not extract spreadsheet ID from the provided URL.")
849
1120
  return match.group(1)
850
1121
 
1122
+
1123
+ def get_document_id_from_url(doc_url: str) -> str:
1124
+ """Extract the document ID from a typical Google Docs URL.
1125
+
1126
+ Example URL format:
1127
+ https://docs.google.com/document/d/<DOCUMENT_ID>/edit
1128
+ """
1129
+ match = re.search(r"/d/([a-zA-Z0-9-_]+)/", doc_url)
1130
+ if not match:
1131
+ raise ValueError("Could not extract document ID from the provided URL.")
1132
+ return match.group(1)
1133
+
1134
+ async def read_google_sheet_with_api_token(
1135
+ sheet_url: str,
1136
+ range_name: str,
1137
+ sender_email: str, # kept for signature compatibility – not used
1138
+ tool_config: Optional[List[Dict]] = None
1139
+ ) -> List[List[str]]:
1140
+ """
1141
+ Read data from a *public* Google Sheet (shared “Anyone with the link → Viewer”)
1142
+ using an API key instead of OAuth credentials.
1143
+ """
1144
+
1145
+ # 1️⃣ Spreadsheet ID from the URL
1146
+ spreadsheet_id = get_sheet_id_from_url(sheet_url)
1147
+
1148
+ # 2️⃣ Grab the API key (tool_config ➜ googlesheet › apiKey, or env var)
1149
+ api_key = get_google_sheet_token(tool_config)
1150
+
1151
+ # 3️⃣ Build the Sheets service with the key
1152
+ service = build("sheets", "v4", developerKey=api_key)
1153
+ sheet = service.spreadsheets()
1154
+
1155
+ # 4️⃣ Default range to the first sheet if none supplied
1156
+ if not range_name:
1157
+ metadata = sheet.get(spreadsheetId=spreadsheet_id).execute()
1158
+ range_name = metadata["sheets"][0]["properties"]["title"]
1159
+
1160
+ # 5️⃣ Fetch the values
1161
+ result = sheet.values().get(
1162
+ spreadsheetId=spreadsheet_id,
1163
+ range=range_name
1164
+ ).execute()
1165
+
1166
+ return result.get("values", [])
1167
+
1168
+
851
1169
  async def read_google_sheet(
852
1170
  sheet_url: str,
853
1171
  range_name: str,
@@ -896,7 +1214,56 @@ async def read_google_sheet(
896
1214
  except HttpError as e:
897
1215
  logging.error(f"An error occurred while reading the Google Sheet: {e}")
898
1216
  raise
899
-
1217
+
1218
+
1219
+ async def read_google_document(
1220
+ doc_url: str,
1221
+ sender_email: str,
1222
+ tool_config: Optional[List[Dict]] = None,
1223
+ ) -> str:
1224
+ """Read text content from a Google Doc using a service account.
1225
+
1226
+ Args:
1227
+ doc_url (str): Full URL of the Google Document.
1228
+ sender_email (str): The email address to impersonate.
1229
+ tool_config (Optional[List[Dict]]): Tool configuration for credentials.
1230
+
1231
+ Returns:
1232
+ str: The concatenated text content of the document.
1233
+
1234
+ Raises:
1235
+ HttpError: If there's an error calling the Docs API.
1236
+ """
1237
+
1238
+ # --- 1. Extract Document ID from URL ---
1239
+ document_id = get_document_id_from_url(doc_url)
1240
+
1241
+ # --- 2. Set up credentials ---
1242
+ SCOPES = ['https://www.googleapis.com/auth/documents.readonly']
1243
+ credentials = get_google_credentials(sender_email, SCOPES, tool_config)
1244
+
1245
+ # --- 3. Build the Docs service and fetch the document ---
1246
+ try:
1247
+ service = build('docs', 'v1', credentials=credentials)
1248
+ document = service.documents().get(documentId=document_id).execute()
1249
+
1250
+ content = document.get('body', {}).get('content', [])
1251
+ text_parts: List[str] = []
1252
+ for element in content:
1253
+ paragraph = element.get('paragraph')
1254
+ if not paragraph:
1255
+ continue
1256
+ for elem in paragraph.get('elements', []):
1257
+ text_run = elem.get('textRun')
1258
+ if text_run:
1259
+ text_parts.append(text_run.get('content', ''))
1260
+
1261
+ return ''.join(text_parts)
1262
+
1263
+ except HttpError as e:
1264
+ logging.error(f"An error occurred while reading the Google Document: {e}")
1265
+ raise
1266
+
900
1267
  def save_values_to_csv(values: List[List[str]], output_filename: str) -> str:
901
1268
  """
902
1269
  Saves a list of row values (list of lists) to a CSV file.
@@ -918,4 +1285,3 @@ def save_values_to_csv(values: List[List[str]], output_filename: str) -> str:
918
1285
  writer.writerows(values)
919
1286
 
920
1287
  return local_file_path
921
-
@@ -27,7 +27,9 @@ async def get_company_domain_from_breeze(company_name: str):
27
27
  """
28
28
  HUBSPOT_API_KEY = os.environ.get('HUBSPOT_API_KEY')
29
29
  if not HUBSPOT_API_KEY:
30
- return {'error': "HubSpot API key not found in environment variables"}
30
+ return {
31
+ 'error': "HubSpot integration is not configured. Please configure the connection to HubSpot in Integrations."
32
+ }
31
33
 
32
34
  if not company_name:
33
35
  return {'error': "Company name must be provided"}