connectonion 0.5.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. connectonion/__init__.py +78 -0
  2. connectonion/address.py +320 -0
  3. connectonion/agent.py +450 -0
  4. connectonion/announce.py +84 -0
  5. connectonion/asgi.py +287 -0
  6. connectonion/auto_debug_exception.py +181 -0
  7. connectonion/cli/__init__.py +3 -0
  8. connectonion/cli/browser_agent/__init__.py +5 -0
  9. connectonion/cli/browser_agent/browser.py +243 -0
  10. connectonion/cli/browser_agent/prompt.md +107 -0
  11. connectonion/cli/commands/__init__.py +1 -0
  12. connectonion/cli/commands/auth_commands.py +527 -0
  13. connectonion/cli/commands/browser_commands.py +27 -0
  14. connectonion/cli/commands/create.py +511 -0
  15. connectonion/cli/commands/deploy_commands.py +220 -0
  16. connectonion/cli/commands/doctor_commands.py +173 -0
  17. connectonion/cli/commands/init.py +469 -0
  18. connectonion/cli/commands/project_cmd_lib.py +828 -0
  19. connectonion/cli/commands/reset_commands.py +149 -0
  20. connectonion/cli/commands/status_commands.py +168 -0
  21. connectonion/cli/docs/co-vibecoding-principles-docs-contexts-all-in-one.md +2010 -0
  22. connectonion/cli/docs/connectonion.md +1256 -0
  23. connectonion/cli/docs.md +123 -0
  24. connectonion/cli/main.py +148 -0
  25. connectonion/cli/templates/meta-agent/README.md +287 -0
  26. connectonion/cli/templates/meta-agent/agent.py +196 -0
  27. connectonion/cli/templates/meta-agent/prompts/answer_prompt.md +9 -0
  28. connectonion/cli/templates/meta-agent/prompts/docs_retrieve_prompt.md +15 -0
  29. connectonion/cli/templates/meta-agent/prompts/metagent.md +71 -0
  30. connectonion/cli/templates/meta-agent/prompts/think_prompt.md +18 -0
  31. connectonion/cli/templates/minimal/README.md +56 -0
  32. connectonion/cli/templates/minimal/agent.py +40 -0
  33. connectonion/cli/templates/playwright/README.md +118 -0
  34. connectonion/cli/templates/playwright/agent.py +336 -0
  35. connectonion/cli/templates/playwright/prompt.md +102 -0
  36. connectonion/cli/templates/playwright/requirements.txt +3 -0
  37. connectonion/cli/templates/web-research/agent.py +122 -0
  38. connectonion/connect.py +128 -0
  39. connectonion/console.py +539 -0
  40. connectonion/debug_agent/__init__.py +13 -0
  41. connectonion/debug_agent/agent.py +45 -0
  42. connectonion/debug_agent/prompts/debug_assistant.md +72 -0
  43. connectonion/debug_agent/runtime_inspector.py +406 -0
  44. connectonion/debug_explainer/__init__.py +10 -0
  45. connectonion/debug_explainer/explain_agent.py +114 -0
  46. connectonion/debug_explainer/explain_context.py +263 -0
  47. connectonion/debug_explainer/explainer_prompt.md +29 -0
  48. connectonion/debug_explainer/root_cause_analysis_prompt.md +43 -0
  49. connectonion/debugger_ui.py +1039 -0
  50. connectonion/decorators.py +208 -0
  51. connectonion/events.py +248 -0
  52. connectonion/execution_analyzer/__init__.py +9 -0
  53. connectonion/execution_analyzer/execution_analysis.py +93 -0
  54. connectonion/execution_analyzer/execution_analysis_prompt.md +47 -0
  55. connectonion/host.py +579 -0
  56. connectonion/interactive_debugger.py +342 -0
  57. connectonion/llm.py +801 -0
  58. connectonion/llm_do.py +307 -0
  59. connectonion/logger.py +300 -0
  60. connectonion/prompt_files/__init__.py +1 -0
  61. connectonion/prompt_files/analyze_contact.md +62 -0
  62. connectonion/prompt_files/eval_expected.md +12 -0
  63. connectonion/prompt_files/react_evaluate.md +11 -0
  64. connectonion/prompt_files/react_plan.md +16 -0
  65. connectonion/prompt_files/reflect.md +22 -0
  66. connectonion/prompts.py +144 -0
  67. connectonion/relay.py +200 -0
  68. connectonion/static/docs.html +688 -0
  69. connectonion/tool_executor.py +279 -0
  70. connectonion/tool_factory.py +186 -0
  71. connectonion/tool_registry.py +105 -0
  72. connectonion/trust.py +166 -0
  73. connectonion/trust_agents.py +71 -0
  74. connectonion/trust_functions.py +88 -0
  75. connectonion/tui/__init__.py +57 -0
  76. connectonion/tui/divider.py +39 -0
  77. connectonion/tui/dropdown.py +251 -0
  78. connectonion/tui/footer.py +31 -0
  79. connectonion/tui/fuzzy.py +56 -0
  80. connectonion/tui/input.py +278 -0
  81. connectonion/tui/keys.py +35 -0
  82. connectonion/tui/pick.py +130 -0
  83. connectonion/tui/providers.py +155 -0
  84. connectonion/tui/status_bar.py +163 -0
  85. connectonion/usage.py +161 -0
  86. connectonion/useful_events_handlers/__init__.py +16 -0
  87. connectonion/useful_events_handlers/reflect.py +116 -0
  88. connectonion/useful_plugins/__init__.py +20 -0
  89. connectonion/useful_plugins/calendar_plugin.py +163 -0
  90. connectonion/useful_plugins/eval.py +139 -0
  91. connectonion/useful_plugins/gmail_plugin.py +162 -0
  92. connectonion/useful_plugins/image_result_formatter.py +127 -0
  93. connectonion/useful_plugins/re_act.py +78 -0
  94. connectonion/useful_plugins/shell_approval.py +159 -0
  95. connectonion/useful_tools/__init__.py +44 -0
  96. connectonion/useful_tools/diff_writer.py +192 -0
  97. connectonion/useful_tools/get_emails.py +183 -0
  98. connectonion/useful_tools/gmail.py +1596 -0
  99. connectonion/useful_tools/google_calendar.py +613 -0
  100. connectonion/useful_tools/memory.py +380 -0
  101. connectonion/useful_tools/microsoft_calendar.py +604 -0
  102. connectonion/useful_tools/outlook.py +488 -0
  103. connectonion/useful_tools/send_email.py +205 -0
  104. connectonion/useful_tools/shell.py +97 -0
  105. connectonion/useful_tools/slash_command.py +201 -0
  106. connectonion/useful_tools/terminal.py +285 -0
  107. connectonion/useful_tools/todo_list.py +241 -0
  108. connectonion/useful_tools/web_fetch.py +216 -0
  109. connectonion/xray.py +467 -0
  110. connectonion-0.5.8.dist-info/METADATA +741 -0
  111. connectonion-0.5.8.dist-info/RECORD +113 -0
  112. connectonion-0.5.8.dist-info/WHEEL +4 -0
  113. connectonion-0.5.8.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,1596 @@
1
+ """
2
+ Purpose: Gmail integration tool for reading, sending, and managing emails via Google API
3
+ LLM-Note:
4
+ Dependencies: imports from [os, base64, google.oauth2.credentials, googleapiclient.discovery, googleapiclient.errors] | imported by [useful_tools/__init__.py] | requires OAuth tokens from 'co auth google' | tested by [tests/unit/test_gmail.py]
5
+ Data flow: Agent calls Gmail methods → _get_credentials() loads tokens from env → builds Gmail API service → API calls to Gmail REST endpoints → returns formatted results (email summaries, bodies, send confirmations)
6
+ State/Effects: reads GOOGLE_* env vars for OAuth tokens | makes HTTP calls to Gmail API | can modify mailbox state (mark read/unread, archive, star, send emails) | no local file persistence
7
+ Integration: exposes Gmail class with read_inbox(), get_sent_emails(), search_emails(), get_email_body(), send(), reply(), mark_read(), mark_unread(), archive_email(), star_email(), get_labels(), add_label(), count_unread(), get_all_contacts(), analyze_contact(), get_unanswered_emails(), update_contact() | used as agent tool via Agent(tools=[Gmail()])
8
+ Performance: network I/O per API call | batch fetching for list operations | email body fetched separately (lazy loading)
9
+ Errors: raises ValueError if OAuth not configured | HttpError from Google API propagates | returns error strings for display to user
10
+
11
+ Gmail tool for reading and managing Gmail emails.
12
+
13
+ Usage:
14
+ from connectonion import Agent, Gmail
15
+
16
+ gmail = Gmail()
17
+ agent = Agent("assistant", tools=[gmail])
18
+
19
+ # Agent can now use:
20
+ # - read_inbox(last, unread)
21
+ # - get_sent_emails(max_results)
22
+ # - get_all_emails(max_results)
23
+ # - search_emails(query, max_results)
24
+ # - get_email_body(email_id)
25
+ # - get_email_attachments(email_id)
26
+ # - send(to, subject, body, cc, bcc)
27
+ # - reply(email_id, body)
28
+ # - mark_read(email_id)
29
+ # - mark_unread(email_id)
30
+ # - archive_email(email_id)
31
+ # - star_email(email_id)
32
+ # - get_labels()
33
+ # - add_label(email_id, label)
34
+ # - get_emails_with_label(label, max_results)
35
+ # - count_unread()
36
+ # - get_all_contacts(max_emails)
37
+ # - analyze_contact(email, max_emails)
38
+ # - get_unanswered_emails(older_than_days, max_results)
39
+ # - update_contact(email, type, priority, deal, next_contact_date, ...)
40
+
41
+ Example:
42
+ from connectonion import Agent, Gmail
43
+
44
+ gmail = Gmail()
45
+ agent = Agent(
46
+ name="gmail-assistant",
47
+ system_prompt="You are a Gmail assistant.",
48
+ tools=[gmail]
49
+ )
50
+
51
+ agent.input("Show me my recent emails")
52
+ agent.input("Search for emails from alice@example.com")
53
+ """
54
+
55
+ import os
56
+ import base64
57
+ from google.oauth2.credentials import Credentials
58
+ from googleapiclient.discovery import build
59
+ from googleapiclient.errors import HttpError
60
+
61
+
62
+ class Gmail:
63
+ """Gmail tool for reading and managing emails."""
64
+
65
+ def __init__(self, emails_csv: str = "data/emails.csv", contacts_csv: str = "data/contacts.csv"):
66
+ """Initialize Gmail tool.
67
+
68
+ Args:
69
+ emails_csv: Path to CSV file for email caching (default: "data/emails.csv")
70
+ contacts_csv: Path to CSV file for contact caching (default: "data/contacts.csv")
71
+
72
+ Validates that gmail.readonly scope is authorized.
73
+ Raises ValueError if scope is missing.
74
+ """
75
+ scopes = os.getenv("GOOGLE_SCOPES", "")
76
+ if "gmail.readonly" not in scopes:
77
+ raise ValueError(
78
+ "Missing 'gmail.readonly' scope.\n"
79
+ f"Current scopes: {scopes}\n"
80
+ "Please authorize Gmail access:\n"
81
+ " co auth google"
82
+ )
83
+ if "gmail.send" not in scopes:
84
+ raise ValueError(
85
+ "Missing 'gmail.send' scope.\n"
86
+ f"Current scopes: {scopes}\n"
87
+ "Please authorize Gmail send access:\n"
88
+ " co auth google"
89
+ )
90
+
91
+ self._service = None
92
+ self.emails_csv = emails_csv
93
+ self.contacts_csv = contacts_csv
94
+
95
+ def _get_service(self):
96
+ """Get Gmail API service (lazy load with auto-refresh)."""
97
+ access_token = os.getenv("GOOGLE_ACCESS_TOKEN")
98
+ refresh_token = os.getenv("GOOGLE_REFRESH_TOKEN")
99
+ expires_at_str = os.getenv("GOOGLE_TOKEN_EXPIRES_AT")
100
+
101
+ if not access_token or not refresh_token:
102
+ raise ValueError(
103
+ "Google OAuth credentials not found.\n"
104
+ "Run: co auth google"
105
+ )
106
+
107
+ # Check if token is expired or about to expire (within 5 minutes)
108
+ # Always check before returning cached service
109
+ if expires_at_str:
110
+ from datetime import datetime, timedelta
111
+ expires_at = datetime.fromisoformat(expires_at_str.replace('Z', '+00:00'))
112
+ now = datetime.utcnow().replace(tzinfo=expires_at.tzinfo) if expires_at.tzinfo else datetime.utcnow()
113
+
114
+ if now >= expires_at - timedelta(minutes=5):
115
+ # Token expired or about to expire, refresh via backend
116
+ access_token = self._refresh_via_backend(refresh_token)
117
+ # Clear cached service to use new token
118
+ self._service = None
119
+
120
+ # Return cached service if available
121
+ if self._service:
122
+ return self._service
123
+
124
+ # Create credentials without client_id/client_secret
125
+ # Backend handles token refresh, so we don't need auto-refresh
126
+ creds = Credentials(
127
+ token=access_token,
128
+ refresh_token=refresh_token,
129
+ token_uri="https://oauth2.googleapis.com/token",
130
+ client_id=None,
131
+ client_secret=None,
132
+ scopes=["https://www.googleapis.com/auth/gmail.readonly",
133
+ "https://www.googleapis.com/auth/gmail.modify",
134
+ "https://www.googleapis.com/auth/gmail.send"]
135
+ )
136
+
137
+ self._service = build('gmail', 'v1', credentials=creds)
138
+ return self._service
139
+
140
+ def _refresh_via_backend(self, refresh_token: str) -> str:
141
+ """Refresh access token via backend API.
142
+
143
+ Args:
144
+ refresh_token: The refresh token
145
+
146
+ Returns:
147
+ New access token
148
+ """
149
+ import httpx
150
+
151
+ # Get backend URL and auth
152
+ backend_url = os.getenv("OPENONION_API_URL", "https://oo.openonion.ai")
153
+ api_key = os.getenv("OPENONION_API_KEY")
154
+
155
+ if not api_key:
156
+ raise ValueError(
157
+ "OPENONION_API_KEY not found.\n"
158
+ "This is needed to refresh tokens via backend."
159
+ )
160
+
161
+ # Call backend refresh endpoint
162
+ response = httpx.post(
163
+ f"{backend_url}/api/v1/oauth/google/refresh",
164
+ headers={"Authorization": f"Bearer {api_key}"},
165
+ json={"refresh_token": refresh_token}
166
+ )
167
+
168
+ if response.status_code != 200:
169
+ raise ValueError(
170
+ f"Failed to refresh token via backend: {response.text}"
171
+ )
172
+
173
+ data = response.json()
174
+ new_access_token = data["access_token"]
175
+ expires_at = data["expires_at"]
176
+
177
+ # Update environment variables for this session
178
+ os.environ["GOOGLE_ACCESS_TOKEN"] = new_access_token
179
+ os.environ["GOOGLE_TOKEN_EXPIRES_AT"] = expires_at
180
+
181
+ # Update .env file if it exists
182
+ env_file = os.path.join(os.getenv("AGENT_CONFIG_PATH", os.path.expanduser("~/.co")), "keys.env")
183
+ if os.path.exists(env_file):
184
+ with open(env_file, 'r') as f:
185
+ lines = f.readlines()
186
+
187
+ with open(env_file, 'w') as f:
188
+ for line in lines:
189
+ if line.startswith("GOOGLE_ACCESS_TOKEN="):
190
+ f.write(f"GOOGLE_ACCESS_TOKEN={new_access_token}\n")
191
+ elif line.startswith("GOOGLE_TOKEN_EXPIRES_AT="):
192
+ f.write(f"GOOGLE_TOKEN_EXPIRES_AT={expires_at}\n")
193
+ else:
194
+ f.write(line)
195
+
196
+ return new_access_token
197
+
198
+ def _format_emails(self, messages, max_results=10):
199
+ """Helper to format email list."""
200
+ if not messages:
201
+ return "No emails found."
202
+
203
+ service = self._get_service()
204
+ emails = []
205
+
206
+ for msg in messages[:max_results]:
207
+ message = service.users().messages().get(
208
+ userId='me',
209
+ id=msg['id'],
210
+ format='metadata',
211
+ metadataHeaders=['From', 'Subject', 'Date']
212
+ ).execute()
213
+
214
+ headers = message['payload']['headers']
215
+ subject = next((h['value'] for h in headers if h['name'] == 'Subject'), 'No Subject')
216
+ from_email = next((h['value'] for h in headers if h['name'] == 'From'), 'Unknown')
217
+ date = next((h['value'] for h in headers if h['name'] == 'Date'), 'Unknown')
218
+
219
+ snippet = message.get('snippet', '')
220
+ is_unread = 'UNREAD' in message.get('labelIds', [])
221
+
222
+ emails.append({
223
+ 'id': msg['id'],
224
+ 'from': from_email,
225
+ 'subject': subject,
226
+ 'date': date,
227
+ 'snippet': snippet,
228
+ 'unread': is_unread
229
+ })
230
+
231
+ # Format output
232
+ output = [f"Found {len(emails)} email(s):\n"]
233
+ for i, email in enumerate(emails, 1):
234
+ status = "[UNREAD]" if email['unread'] else ""
235
+ output.append(f"{i}. {status} From: {email['from']}")
236
+ output.append(f" Subject: {email['subject']}")
237
+ output.append(f" Date: {email['date']}")
238
+ output.append(f" Preview: {email['snippet'][:80]}...")
239
+ output.append(f" ID: {email['id']}\n")
240
+
241
+ return "\n".join(output)
242
+
243
+ # === Reading ===
244
+
245
+ def read_inbox(self, last: int = 10, unread: bool = False) -> str:
246
+ """Read emails from inbox.
247
+
248
+ Args:
249
+ last: Number of emails to retrieve (default: 10)
250
+ unread: Only get unread emails (default: False)
251
+
252
+ Returns:
253
+ Formatted string with email list
254
+ """
255
+ service = self._get_service()
256
+
257
+ query = "is:unread in:inbox" if unread else "in:inbox"
258
+
259
+ results = service.users().messages().list(
260
+ userId='me',
261
+ q=query,
262
+ maxResults=last
263
+ ).execute()
264
+
265
+ messages = results.get('messages', [])
266
+ return self._format_emails(messages, last)
267
+
268
+ def get_sent_emails(self, max_results: int = 10) -> str:
269
+ """Get emails you sent.
270
+
271
+ Args:
272
+ max_results: Number of emails to retrieve (default: 10)
273
+
274
+ Returns:
275
+ Formatted string with sent email list
276
+ """
277
+ service = self._get_service()
278
+
279
+ results = service.users().messages().list(
280
+ userId='me',
281
+ q="in:sent",
282
+ maxResults=max_results
283
+ ).execute()
284
+
285
+ messages = results.get('messages', [])
286
+ return self._format_emails(messages, max_results)
287
+
288
+ def get_all_emails(self, max_results: int = 50) -> str:
289
+ """Get emails from all folders.
290
+
291
+ Args:
292
+ max_results: Number of emails to retrieve (default: 50)
293
+
294
+ Returns:
295
+ Formatted string with email list
296
+ """
297
+ service = self._get_service()
298
+
299
+ results = service.users().messages().list(
300
+ userId='me',
301
+ maxResults=max_results
302
+ ).execute()
303
+
304
+ messages = results.get('messages', [])
305
+ return self._format_emails(messages, max_results)
306
+
307
+ # === Search ===
308
+
309
+ def search_emails(self, query: str, max_results: int = 10) -> str:
310
+ """Search emails using Gmail query syntax.
311
+
312
+ Args:
313
+ query: Gmail search query (e.g., "from:alice@example.com", "subject:meeting")
314
+ max_results: Number of results to return (default: 10)
315
+
316
+ Returns:
317
+ Formatted string with matching emails
318
+ """
319
+ service = self._get_service()
320
+
321
+ results = service.users().messages().list(
322
+ userId='me',
323
+ q=query,
324
+ maxResults=max_results
325
+ ).execute()
326
+
327
+ messages = results.get('messages', [])
328
+
329
+ if not messages:
330
+ return f"No emails found matching query: {query}"
331
+
332
+ return self._format_emails(messages, max_results)
333
+
334
+ # === Content ===
335
+
336
+ def _extract_body(self, payload) -> str:
337
+ """Extract body from email payload, preferring text/plain, falling back to stripped HTML."""
338
+ import re
339
+ from html import unescape
340
+
341
+ def strip_html(html: str) -> str:
342
+ html = re.sub(r'<style[^>]*>.*?</style>', '', html, flags=re.DOTALL | re.IGNORECASE)
343
+ html = re.sub(r'<script[^>]*>.*?</script>', '', html, flags=re.DOTALL | re.IGNORECASE)
344
+ text = re.sub(r'<[^>]+>', '', html)
345
+ text = unescape(text)
346
+ return re.sub(r'\s+', ' ', text).strip()
347
+
348
+ # Single part email
349
+ if 'body' in payload and payload['body'].get('data'):
350
+ data = base64.urlsafe_b64decode(payload['body']['data']).decode('utf-8', errors='replace')
351
+ if payload.get('mimeType') == 'text/html':
352
+ return strip_html(data)
353
+ return data
354
+
355
+ # Multipart email
356
+ if 'parts' in payload:
357
+ plain_body = None
358
+ html_body = None
359
+ for part in payload['parts']:
360
+ mime = part.get('mimeType', '')
361
+ if mime == 'text/plain' and part['body'].get('data'):
362
+ plain_body = base64.urlsafe_b64decode(part['body']['data']).decode('utf-8', errors='replace')
363
+ elif mime == 'text/html' and part['body'].get('data'):
364
+ html_body = base64.urlsafe_b64decode(part['body']['data']).decode('utf-8', errors='replace')
365
+ elif 'parts' in part:
366
+ nested = self._extract_body(part)
367
+ if nested:
368
+ return nested
369
+ if plain_body:
370
+ return plain_body
371
+ if html_body:
372
+ return strip_html(html_body)
373
+ return ''
374
+
375
+ def get_email_body(self, email_id: str) -> str:
376
+ """Get full email body.
377
+
378
+ Args:
379
+ email_id: Gmail message ID
380
+
381
+ Returns:
382
+ Full email content with headers
383
+ """
384
+ service = self._get_service()
385
+
386
+ message = service.users().messages().get(
387
+ userId='me',
388
+ id=email_id,
389
+ format='full'
390
+ ).execute()
391
+
392
+ headers = message['payload']['headers']
393
+ subject = next((h['value'] for h in headers if h['name'] == 'Subject'), 'No Subject')
394
+ from_email = next((h['value'] for h in headers if h['name'] == 'From'), 'Unknown')
395
+ to_email = next((h['value'] for h in headers if h['name'] == 'To'), 'Unknown')
396
+ date = next((h['value'] for h in headers if h['name'] == 'Date'), 'Unknown')
397
+
398
+ body = self._extract_body(message['payload'])
399
+ if not body:
400
+ body = message.get('snippet', 'No body content')
401
+
402
+ output = [
403
+ f"From: {from_email}",
404
+ f"To: {to_email}",
405
+ f"Subject: {subject}",
406
+ f"Date: {date}",
407
+ "\n--- Email Body ---\n",
408
+ body
409
+ ]
410
+
411
+ return "\n".join(output)
412
+
413
+ def get_email_attachments(self, email_id: str) -> str:
414
+ """List attachments in email.
415
+
416
+ Args:
417
+ email_id: Gmail message ID
418
+
419
+ Returns:
420
+ List of attachment names and sizes
421
+ """
422
+ service = self._get_service()
423
+
424
+ message = service.users().messages().get(
425
+ userId='me',
426
+ id=email_id,
427
+ format='full'
428
+ ).execute()
429
+
430
+ attachments = []
431
+
432
+ if 'parts' in message['payload']:
433
+ for part in message['payload']['parts']:
434
+ if part.get('filename'):
435
+ size = part['body'].get('size', 0)
436
+ attachments.append({
437
+ 'filename': part['filename'],
438
+ 'size': size,
439
+ 'id': part['body'].get('attachmentId', '')
440
+ })
441
+
442
+ if not attachments:
443
+ return "No attachments in this email."
444
+
445
+ output = [f"Found {len(attachments)} attachment(s):\n"]
446
+ for i, att in enumerate(attachments, 1):
447
+ size_kb = att['size'] / 1024
448
+ output.append(f"{i}. {att['filename']} ({size_kb:.1f} KB)")
449
+ output.append(f" ID: {att['id']}\n")
450
+
451
+ return "\n".join(output)
452
+
453
+ def send(self, to: str, subject: str, body: str, cc: str = None, bcc: str = None) -> str:
454
+ """Send email via Gmail API.
455
+
456
+ Args:
457
+ to: Recipient email address
458
+ subject: Email subject
459
+ body: Email body (plain text)
460
+ cc: Optional CC recipients (comma-separated)
461
+ bcc: Optional BCC recipients (comma-separated)
462
+
463
+ Returns:
464
+ Confirmation message with sent message ID
465
+ """
466
+ from email.mime.text import MIMEText
467
+
468
+ service = self._get_service()
469
+
470
+ # Create message
471
+ message = MIMEText(body)
472
+ message['To'] = to
473
+ message['Subject'] = subject
474
+
475
+ if cc:
476
+ message['Cc'] = cc
477
+ if bcc:
478
+ message['Bcc'] = bcc
479
+
480
+ # Encode message
481
+ raw_message = base64.urlsafe_b64encode(message.as_bytes()).decode('utf-8')
482
+
483
+ # Send via Gmail API
484
+ sent_message = service.users().messages().send(
485
+ userId='me',
486
+ body={'raw': raw_message}
487
+ ).execute()
488
+
489
+ return f"Email sent successfully to {to}. Message ID: {sent_message['id']}"
490
+
491
+ def reply(self, email_id: str, body: str) -> str:
492
+ """Reply to an email via Gmail API.
493
+
494
+ Args:
495
+ email_id: Gmail message ID to reply to
496
+ body: Reply message body (plain text)
497
+
498
+ Returns:
499
+ Confirmation message with sent message ID
500
+ """
501
+ from email.mime.text import MIMEText
502
+
503
+ service = self._get_service()
504
+
505
+ # Get original message to extract headers
506
+ original = service.users().messages().get(
507
+ userId='me',
508
+ id=email_id,
509
+ format='metadata',
510
+ metadataHeaders=['From', 'To', 'Subject', 'Message-ID']
511
+ ).execute()
512
+
513
+ headers = {h['name']: h['value'] for h in original['payload']['headers']}
514
+ original_subject = headers.get('Subject', '')
515
+ original_from = headers.get('From', '')
516
+ original_message_id = headers.get('Message-ID', '')
517
+ thread_id = original.get('threadId', '')
518
+
519
+ # Create reply
520
+ message = MIMEText(body)
521
+ message['To'] = original_from
522
+ message['Subject'] = original_subject if original_subject.startswith('Re: ') else f"Re: {original_subject}"
523
+
524
+ if original_message_id:
525
+ message['In-Reply-To'] = original_message_id
526
+ message['References'] = original_message_id
527
+
528
+ # Encode message
529
+ raw_message = base64.urlsafe_b64encode(message.as_bytes()).decode('utf-8')
530
+
531
+ # Send as reply in same thread
532
+ sent_message = service.users().messages().send(
533
+ userId='me',
534
+ body={'raw': raw_message, 'threadId': thread_id}
535
+ ).execute()
536
+
537
+ return f"Reply sent successfully. Message ID: {sent_message['id']}"
538
+
539
+ # === Actions ===
540
+
541
+ def mark_read(self, email_id: str) -> str:
542
+ """Mark email as read.
543
+
544
+ Args:
545
+ email_id: Gmail message ID
546
+
547
+ Returns:
548
+ Confirmation message
549
+ """
550
+ service = self._get_service()
551
+
552
+ service.users().messages().modify(
553
+ userId='me',
554
+ id=email_id,
555
+ body={'removeLabelIds': ['UNREAD']}
556
+ ).execute()
557
+
558
+ return f"Marked email as read: {email_id}"
559
+
560
+ def mark_unread(self, email_id: str) -> str:
561
+ """Mark email as unread.
562
+
563
+ Args:
564
+ email_id: Gmail message ID
565
+
566
+ Returns:
567
+ Confirmation message
568
+ """
569
+ service = self._get_service()
570
+
571
+ service.users().messages().modify(
572
+ userId='me',
573
+ id=email_id,
574
+ body={'addLabelIds': ['UNREAD']}
575
+ ).execute()
576
+
577
+ return f"Marked email as unread: {email_id}"
578
+
579
+ def archive_email(self, email_id: str) -> str:
580
+ """Archive email (remove from inbox).
581
+
582
+ Args:
583
+ email_id: Gmail message ID
584
+
585
+ Returns:
586
+ Confirmation message
587
+ """
588
+ service = self._get_service()
589
+
590
+ service.users().messages().modify(
591
+ userId='me',
592
+ id=email_id,
593
+ body={'removeLabelIds': ['INBOX']}
594
+ ).execute()
595
+
596
+ return f"Archived email: {email_id}"
597
+
598
+ def star_email(self, email_id: str) -> str:
599
+ """Add star to email.
600
+
601
+ Args:
602
+ email_id: Gmail message ID
603
+
604
+ Returns:
605
+ Confirmation message
606
+ """
607
+ service = self._get_service()
608
+
609
+ service.users().messages().modify(
610
+ userId='me',
611
+ id=email_id,
612
+ body={'addLabelIds': ['STARRED']}
613
+ ).execute()
614
+
615
+ return f"Starred email: {email_id}"
616
+
617
+ # === Labels ===
618
+
619
+ def get_labels(self) -> str:
620
+ """List all Gmail labels.
621
+
622
+ Returns:
623
+ List of label names and IDs
624
+ """
625
+ service = self._get_service()
626
+
627
+ results = service.users().labels().list(userId='me').execute()
628
+ labels = results.get('labels', [])
629
+
630
+ if not labels:
631
+ return "No labels found."
632
+
633
+ output = [f"Found {len(labels)} label(s):\n"]
634
+ for label in labels:
635
+ label_type = label.get('type', 'user')
636
+ output.append(f"- {label['name']} (ID: {label['id']}, Type: {label_type})")
637
+
638
+ return "\n".join(output)
639
+
640
+ def add_label(self, email_id: str, label: str) -> str:
641
+ """Add label to email.
642
+
643
+ Args:
644
+ email_id: Gmail message ID
645
+ label: Label name or ID
646
+
647
+ Returns:
648
+ Confirmation message
649
+ """
650
+ service = self._get_service()
651
+
652
+ # Try to find label by name first
653
+ results = service.users().labels().list(userId='me').execute()
654
+ labels = results.get('labels', [])
655
+
656
+ label_id = label
657
+ for lbl in labels:
658
+ if lbl['name'].lower() == label.lower():
659
+ label_id = lbl['id']
660
+ break
661
+
662
+ service.users().messages().modify(
663
+ userId='me',
664
+ id=email_id,
665
+ body={'addLabelIds': [label_id]}
666
+ ).execute()
667
+
668
+ return f"Added label '{label}' to email: {email_id}"
669
+
670
+ def get_emails_with_label(self, label: str, max_results: int = 10) -> str:
671
+ """Get emails with specific label.
672
+
673
+ Args:
674
+ label: Label name (e.g., "Important", "Work")
675
+ max_results: Number of emails to retrieve (default: 10)
676
+
677
+ Returns:
678
+ Formatted string with email list
679
+ """
680
+ service = self._get_service()
681
+
682
+ # Find label ID by name
683
+ results = service.users().labels().list(userId='me').execute()
684
+ labels = results.get('labels', [])
685
+
686
+ label_id = None
687
+ for lbl in labels:
688
+ if lbl['name'].lower() == label.lower():
689
+ label_id = lbl['id']
690
+ break
691
+
692
+ if not label_id:
693
+ return f"Label not found: {label}"
694
+
695
+ results = service.users().messages().list(
696
+ userId='me',
697
+ labelIds=[label_id],
698
+ maxResults=max_results
699
+ ).execute()
700
+
701
+ messages = results.get('messages', [])
702
+
703
+ if not messages:
704
+ return f"No emails with label: {label}"
705
+
706
+ return self._format_emails(messages, max_results)
707
+
708
+ # === Stats ===
709
+
710
+ def count_unread(self) -> str:
711
+ """Count unread emails.
712
+
713
+ Returns:
714
+ Number of unread emails
715
+ """
716
+ service = self._get_service()
717
+
718
+ results = service.users().messages().list(
719
+ userId='me',
720
+ q="is:unread",
721
+ maxResults=1
722
+ ).execute()
723
+
724
+ # Get total from resultSizeEstimate
725
+ count = results.get('resultSizeEstimate', 0)
726
+
727
+ return f"You have {count} unread email(s)."
728
+
729
+ def get_my_identity(self) -> str:
730
+ """Get the user's email address and aliases (who am I?).
731
+
732
+ Returns:
733
+ User's primary email and all send-as aliases (their organization domains)
734
+ """
735
+ service = self._get_service()
736
+
737
+ # Get primary email
738
+ profile = service.users().getProfile(userId='me').execute()
739
+ primary_email = profile.get('emailAddress', '')
740
+
741
+ # Get all send-as aliases
742
+ send_as = service.users().settings().sendAs().list(userId='me').execute()
743
+ aliases = []
744
+ domains = set()
745
+ for alias in send_as.get('sendAs', []):
746
+ email = alias.get('sendAsEmail', '')
747
+ if email and email != primary_email:
748
+ aliases.append(email)
749
+ # Extract domains
750
+ if '@' in email:
751
+ domain = email.split('@')[1]
752
+ if domain not in ['gmail.com', 'hotmail.com', 'outlook.com', 'yahoo.com']:
753
+ domains.add(domain)
754
+
755
+ result = f"Primary email: {primary_email}\n"
756
+ if aliases:
757
+ result += f"Aliases: {', '.join(aliases)}\n"
758
+ if domains:
759
+ result += f"Organization domains: {', '.join(sorted(domains))}\n"
760
+ result += f"\nUse exclude_domains=\"{','.join(sorted(domains))}\" to exclude your own addresses from contact lists."
761
+
762
+ return result
763
+
764
+ def detect_all_my_emails(self, max_emails: int = 100) -> str:
765
+ """Detect all email addresses receiving mail (including routed aliases).
766
+
767
+ Uses simple rule: if email was forwarded to our Gmail (X-Forwarded-To),
768
+ the FIRST address in To header (that's not our Gmail) is our routed address.
769
+
770
+ Args:
771
+ max_emails: Number of recent emails to scan (default 100)
772
+
773
+ Returns:
774
+ All detected email addresses (primary + aliases + routed addresses)
775
+ """
776
+ routed_emails, primary_email, known_emails = self._detect_routed_addresses(max_emails)
777
+
778
+ # Extract org domains
779
+ detected_org_domains = {email.split('@')[1] for email in routed_emails if '@' in email}
780
+
781
+ result = f"Primary email: {primary_email}\n"
782
+ if known_emails - {primary_email}:
783
+ result += f"Send-as aliases: {', '.join(sorted(known_emails - {primary_email}))}\n"
784
+ if detected_org_domains:
785
+ result += f"Organization domains: {', '.join(sorted(detected_org_domains))}\n"
786
+ if routed_emails:
787
+ result += f"Routed addresses detected: {', '.join(sorted(routed_emails))}\n"
788
+
789
+ all_my_emails = known_emails | routed_emails
790
+ result += f"\nAll your addresses ({len(all_my_emails)}): {', '.join(sorted(all_my_emails))}"
791
+
792
+ return result
793
+
794
+ def _detect_routed_addresses(self, max_emails: int = 100) -> tuple:
795
+ """Internal: Detect routed addresses using simple first-address rule.
796
+
797
+ Rule: If X-Forwarded-To points to our Gmail, the FIRST address in To header
798
+ (that's not our Gmail) is our routed address.
799
+
800
+ Returns:
801
+ tuple: (routed_emails set, primary_email str, known_emails set)
802
+ """
803
+ import re
804
+ service = self._get_service()
805
+
806
+ # Get primary email
807
+ profile = service.users().getProfile(userId='me').execute()
808
+ primary_email = profile.get('emailAddress', '').lower()
809
+
810
+ # Get send-as aliases
811
+ send_as = service.users().settings().sendAs().list(userId='me').execute()
812
+ known_emails = {primary_email}
813
+ for alias in send_as.get('sendAs', []):
814
+ email = alias.get('sendAsEmail', '').lower()
815
+ if email:
816
+ known_emails.add(email)
817
+
818
+ # Scan forwarded emails
819
+ routed_emails = set()
820
+ results = service.users().messages().list(
821
+ userId='me',
822
+ maxResults=max_emails,
823
+ q='in:inbox'
824
+ ).execute()
825
+
826
+ for msg_meta in results.get('messages', []):
827
+ msg = service.users().messages().get(
828
+ userId='me',
829
+ id=msg_meta['id'],
830
+ format='metadata',
831
+ metadataHeaders=['To', 'X-Forwarded-To']
832
+ ).execute()
833
+
834
+ headers = {h['name']: h['value'] for h in msg.get('payload', {}).get('headers', [])}
835
+
836
+ # Check if this email was forwarded to our Gmail
837
+ forwarded_to = headers.get('X-Forwarded-To', '').lower()
838
+ if primary_email not in forwarded_to:
839
+ continue
840
+
841
+ # Get FIRST email in To header - that's the routed address
842
+ to_header = headers.get('To', '')
843
+ to_emails = re.findall(r'[\w.+-]+@[\w.-]+\.\w+', to_header.lower())
844
+ if to_emails:
845
+ first_email = to_emails[0]
846
+ # If first email is not our Gmail, it's a routed address
847
+ if first_email not in known_emails:
848
+ routed_emails.add(first_email)
849
+
850
+ return routed_emails, primary_email, known_emails
851
+
852
+ def get_all_my_emails(self, max_emails: int = 100) -> set:
853
+ """Return set of all email addresses associated with this account.
854
+
855
+ Combines:
856
+ - Primary Gmail address
857
+ - Send-as aliases configured in Gmail
858
+ - Addresses detected from forwarded emails (Cloudflare routes)
859
+
860
+ Uses simple rule: for forwarded emails, the FIRST address in To header is our routed address.
861
+
862
+ Args:
863
+ max_emails: Number of emails to scan for detecting routed addresses
864
+
865
+ Returns:
866
+ Set of email addresses (lowercase)
867
+ """
868
+ routed_emails, primary_email, known_emails = self._detect_routed_addresses(max_emails)
869
+ return known_emails | routed_emails
870
+
871
+ # === CRM ===
872
+
873
+ def _scan_contacts(self, max_emails: int = 500, exclude_automated: bool = True, exclude_domains: str = "") -> tuple:
874
+ """Internal helper: scan emails and return contact data (no CSV writing).
875
+
876
+ Returns:
877
+ tuple: (contacts_dict, email_records) where contacts_dict maps email -> {name, frequency, last_contact}
878
+ """
879
+ import re
880
+ from collections import defaultdict
881
+
882
+ service = self._get_service()
883
+
884
+ # Get user's email addresses to exclude self
885
+ profile = service.users().getProfile(userId='me').execute()
886
+ user_email = profile.get('emailAddress', '').lower()
887
+
888
+ # Get user's send-as addresses (aliases)
889
+ user_addresses = {user_email}
890
+ user_domains = set()
891
+
892
+ # Add explicitly excluded domains
893
+ if exclude_domains:
894
+ for domain in exclude_domains.split(','):
895
+ user_domains.add(domain.strip().lower())
896
+
897
+ send_as = service.users().settings().sendAs().list(userId='me').execute()
898
+ for alias in send_as.get('sendAs', []):
899
+ alias_email = alias.get('sendAsEmail', '').lower()
900
+ user_addresses.add(alias_email)
901
+ if '@' in alias_email:
902
+ domain = alias_email.split('@')[1]
903
+ if domain not in ['gmail.com', 'hotmail.com', 'outlook.com', 'yahoo.com']:
904
+ user_domains.add(domain)
905
+
906
+ automated_patterns = [
907
+ 'mailer-daemon', 'postmaster@', 'bounce@', 'bounces@',
908
+ 'unsubscribe', 'unsub-', 'optout@', 'opt-out@',
909
+ 'noreply@', 'no-reply@', 'donotreply@', 'do-not-reply@',
910
+ 'mailchimp.com', 'sendgrid.net', 'amazonses.com', 'mailjet.com',
911
+ 'customer.io', 'responsys', 'oraclecloud.com',
912
+ ]
913
+
914
+ def is_automated(email_addr: str) -> bool:
915
+ return any(pattern in email_addr.lower() for pattern in automated_patterns)
916
+
917
+ # Get emails with pagination
918
+ messages = []
919
+ page_token = None
920
+ while len(messages) < max_emails:
921
+ results = service.users().messages().list(
922
+ userId='me',
923
+ maxResults=min(100, max_emails - len(messages)),
924
+ pageToken=page_token
925
+ ).execute()
926
+ messages.extend(results.get('messages', []))
927
+ page_token = results.get('nextPageToken')
928
+ if not page_token:
929
+ break
930
+
931
+ contacts = defaultdict(lambda: {'name': '', 'threads': set(), 'last_contact': None})
932
+ email_records = []
933
+
934
+ for msg in messages:
935
+ message = service.users().messages().get(
936
+ userId='me',
937
+ id=msg['id'],
938
+ format='metadata',
939
+ metadataHeaders=['From', 'To', 'Cc', 'Subject', 'Date']
940
+ ).execute()
941
+
942
+ headers = message['payload']['headers']
943
+ headers_dict = {h['name']: h['value'] for h in headers}
944
+
945
+ email_records.append({
946
+ 'id': msg['id'],
947
+ 'thread_id': message.get('threadId', ''),
948
+ 'from_email': headers_dict.get('From', ''),
949
+ 'to_email': headers_dict.get('To', ''),
950
+ 'subject': headers_dict.get('Subject', ''),
951
+ 'date': headers_dict.get('Date', ''),
952
+ 'snippet': message.get('snippet', '')[:200]
953
+ })
954
+
955
+ seen_in_msg = set()
956
+ for header in headers:
957
+ if header['name'] in ['From', 'To', 'Cc']:
958
+ value = header['value']
959
+ email_pattern = r'<([^>]+)>|([^\s<>,]+@[^\s<>,]+)'
960
+ matches = re.findall(email_pattern, value)
961
+
962
+ for match in matches:
963
+ email = match[0] or match[1]
964
+ email = email.strip('"\'<> ')
965
+ if email and '@' in email and email not in seen_in_msg:
966
+ email_lower = email.lower()
967
+
968
+ if email_lower in user_addresses:
969
+ continue
970
+ email_domain = email_lower.split('@')[1] if '@' in email_lower else ''
971
+ if email_domain in user_domains:
972
+ continue
973
+ if exclude_automated and is_automated(email):
974
+ continue
975
+
976
+ seen_in_msg.add(email)
977
+
978
+ name_match = re.search(rf'([^<>]+)<{re.escape(email)}>', value)
979
+ name = name_match.group(1).strip() if name_match else email.split('@')[0]
980
+
981
+ thread_id = message.get('threadId', msg['id'])
982
+ if not contacts[email]['name']:
983
+ contacts[email]['name'] = name
984
+ contacts[email]['threads'].add(thread_id)
985
+
986
+ email_date = headers_dict.get('Date', '')
987
+ if email_date and not contacts[email]['last_contact']:
988
+ from email.utils import parsedate_to_datetime
989
+ try:
990
+ dt = parsedate_to_datetime(email_date)
991
+ contacts[email]['last_contact'] = dt.strftime('%Y-%m-%d')
992
+ except:
993
+ contacts[email]['last_contact'] = email_date[:10]
994
+
995
+ # Convert to simple dict format
996
+ result = {}
997
+ for email, info in contacts.items():
998
+ result[email] = {
999
+ 'email': email,
1000
+ 'name': info['name'],
1001
+ 'frequency': len(info['threads']),
1002
+ 'last_contact': info.get('last_contact', ''),
1003
+ 'type': '',
1004
+ 'company': '',
1005
+ 'relationship': '',
1006
+ 'priority': '',
1007
+ 'deal': '',
1008
+ 'next_contact_date': '',
1009
+ 'tags': '',
1010
+ 'notes': ''
1011
+ }
1012
+
1013
+ return result, email_records
1014
+
1015
+ def get_all_contacts(self, max_emails: int = 500, exclude_automated: bool = True, exclude_domains: str = "") -> str:
1016
+ """Get all unique contacts from emails with frequency count. OVERWRITES contacts.csv.
1017
+
1018
+ Use this for initial setup. For regular updates that preserve CRM data, use sync_contacts().
1019
+
1020
+ Args:
1021
+ max_emails: Maximum emails to scan (default: 500)
1022
+ exclude_automated: Filter out no-reply, system, and automated senders (default: True)
1023
+ exclude_domains: Comma-separated domains to exclude (e.g. "mycompany.com,myorg.ai")
1024
+
1025
+ Returns:
1026
+ List of contacts sorted by frequency with email, name, and count
1027
+ """
1028
+ import csv
1029
+
1030
+ # Scan emails using helper
1031
+ contacts, email_records = self._scan_contacts(max_emails, exclude_automated, exclude_domains)
1032
+
1033
+ # Sort by frequency descending
1034
+ sorted_contacts = sorted(contacts.values(), key=lambda x: int(x['frequency']), reverse=True)
1035
+
1036
+ # Write emails CSV
1037
+ if self.emails_csv:
1038
+ with open(self.emails_csv, 'w', newline='') as f:
1039
+ writer = csv.DictWriter(f, fieldnames=['id', 'thread_id', 'from_email', 'to_email', 'subject', 'date', 'snippet'])
1040
+ writer.writeheader()
1041
+ writer.writerows(email_records)
1042
+
1043
+ # Write contacts CSV (OVERWRITES - use sync_contacts to preserve CRM data)
1044
+ if self.contacts_csv:
1045
+ fieldnames = ['email', 'name', 'frequency', 'last_contact', 'type', 'company', 'relationship', 'priority', 'deal', 'next_contact_date', 'tags', 'notes']
1046
+ with open(self.contacts_csv, 'w', newline='') as f:
1047
+ writer = csv.DictWriter(f, fieldnames=fieldnames)
1048
+ writer.writeheader()
1049
+ writer.writerows(sorted_contacts)
1050
+
1051
+ # Format output
1052
+ output = [f"Found {len(contacts)} unique contacts (sorted by thread count):\n"]
1053
+ for c in sorted_contacts:
1054
+ output.append(f"- {c['name']} <{c['email']}> ({c['frequency']} threads)")
1055
+
1056
+ return "\n".join(output)
1057
+
1058
+ def analyze_contact(self, email: str, max_emails: int = 50) -> str:
1059
+ """Analyze a specific contact using LLM to extract context and relationship info.
1060
+
1061
+ Args:
1062
+ email: Contact's email address to analyze
1063
+ max_emails: Number of recent emails to analyze (default: 50)
1064
+
1065
+ Returns:
1066
+ LLM-generated analysis with context, tags, and relationship notes
1067
+ """
1068
+ from connectonion.llm_do import llm_do
1069
+
1070
+ # Search for emails from this contact
1071
+ emails_result = self.search_emails(query=f"from:{email} OR to:{email}", max_results=max_emails)
1072
+
1073
+ # Use LLM to analyze with markdown system prompt
1074
+ from pathlib import Path
1075
+
1076
+ input_data = f"""Contact: {email}
1077
+
1078
+ Emails:
1079
+ {emails_result}"""
1080
+
1081
+ # Get path to prompt file relative to this module
1082
+ # gmail.py is in connectonion/useful_tools/, prompt_files/ is in connectonion/prompt_files/
1083
+ prompt_path = Path(__file__).parent.parent / "prompt_files" / "analyze_contact.md"
1084
+
1085
+ analysis = llm_do(
1086
+ input_data,
1087
+ system_prompt=prompt_path
1088
+ )
1089
+
1090
+ return f"Analysis for {email}:\n\n{analysis}"
1091
+
1092
+ def get_unanswered_emails(self, within_days: int = 120, max_results: int = 20) -> str:
1093
+ """Find emails from the last N days that we haven't replied to.
1094
+
1095
+ Useful for CRM to identify conversations that need follow-up.
1096
+ Checks threads where the last message is FROM someone else (not us).
1097
+
1098
+ Args:
1099
+ within_days: Look back this many days (default: 120 = ~4 months)
1100
+ max_results: Maximum emails to return (default: 20)
1101
+
1102
+ Returns:
1103
+ List of unanswered emails with sender, subject, date, and age
1104
+ """
1105
+ import re
1106
+ from datetime import datetime, timezone
1107
+ from email.utils import parsedate_to_datetime
1108
+
1109
+ service = self._get_service()
1110
+
1111
+ # Get ALL user email addresses including Cloudflare routed addresses (auto-detected)
1112
+ user_emails = self.get_all_my_emails(max_emails=50)
1113
+
1114
+ # Search for inbox emails from the last N days
1115
+ # Use pagination to ensure we find enough unanswered emails
1116
+ query = f"in:inbox newer_than:{within_days}d"
1117
+ unanswered = []
1118
+ seen_threads = set()
1119
+ page_token = None
1120
+ max_pages = 10 # Safety limit to avoid infinite loops
1121
+ pages_fetched = 0
1122
+
1123
+ while len(unanswered) < max_results and pages_fetched < max_pages:
1124
+ results = service.users().messages().list(
1125
+ userId='me',
1126
+ q=query,
1127
+ maxResults=100, # Fetch in larger batches for efficiency
1128
+ pageToken=page_token
1129
+ ).execute()
1130
+
1131
+ messages = results.get('messages', [])
1132
+ if not messages:
1133
+ break
1134
+
1135
+ for msg in messages:
1136
+ # Get thread to check if we replied
1137
+ thread_id = msg.get('threadId')
1138
+ if thread_id in seen_threads:
1139
+ continue
1140
+ seen_threads.add(thread_id)
1141
+
1142
+ # Get full thread
1143
+ thread = service.users().threads().get(
1144
+ userId='me',
1145
+ id=thread_id,
1146
+ format='metadata',
1147
+ metadataHeaders=['From', 'Subject', 'Date']
1148
+ ).execute()
1149
+
1150
+ thread_messages = thread.get('messages', [])
1151
+ if not thread_messages:
1152
+ continue
1153
+
1154
+ # Check the last message in thread
1155
+ last_msg = thread_messages[-1]
1156
+ headers = last_msg['payload']['headers']
1157
+ last_from = next((h['value'] for h in headers if h['name'] == 'From'), '')
1158
+
1159
+ # Extract email from "Name <email>" format
1160
+ email_match = re.search(r'<([^>]+)>', last_from)
1161
+ last_from_email = email_match.group(1).lower() if email_match else last_from.lower()
1162
+
1163
+ # Skip if last message is from us (we already replied)
1164
+ # Check against ALL our email addresses (primary + aliases)
1165
+ if any(email in last_from_email for email in user_emails):
1166
+ continue
1167
+
1168
+ # Get first message details
1169
+ first_msg = thread_messages[0]
1170
+ first_headers = first_msg['payload']['headers']
1171
+ first_from = next((h['value'] for h in first_headers if h['name'] == 'From'), '')
1172
+ first_email_match = re.search(r'<([^>]+)>', first_from)
1173
+ first_from_email = first_email_match.group(1).lower() if first_email_match else first_from.lower()
1174
+ subject = next((h['value'] for h in first_headers if h['name'] == 'Subject'), 'No Subject')
1175
+ subject_lower = subject.lower()
1176
+
1177
+ # Skip if WE sent the first message (we initiated, not awaiting reply)
1178
+ # Check against ALL our email addresses (primary + aliases)
1179
+ if any(email in first_from_email for email in user_emails):
1180
+ continue
1181
+
1182
+ # Skip automated senders by email patterns
1183
+ automated_email_patterns = [
1184
+ # Generic automated prefixes
1185
+ 'noreply', 'no-reply', 'donotreply', 'do-not-reply',
1186
+ 'notifications@', 'notification@', 'newsletter@', 'news@',
1187
+ 'alerts@', 'alert@', 'updates@', 'update@',
1188
+ 'security@', 'team@', 'support@', 'help@', 'info@',
1189
+ 'marketing@', 'promo@', 'promotions@', 'offers@',
1190
+ 'billing@', 'invoice@', 'receipt@', 'order@',
1191
+ 'feedback@', 'survey@', 'announce@', 'digest@',
1192
+ 'hello@', # Common marketing prefix
1193
+ # Common automated domains/subdomains
1194
+ 'mail.instagram.com', 'mail.linkedin.com', 'mail.facebook.com',
1195
+ 'mail.twitter.com', 'mail.x.com', 'mail.google.com',
1196
+ 'facebookmail.com', 'linkedin.com', 'glassdoor.com',
1197
+ 'calendly.com', 'zoom.us', 'mailchimp', 'sendgrid',
1198
+ 'amazonses', 'postmark', 'intercom', 'hubspot',
1199
+ 'mailgun', 'sparkpost', 'constantcontact', 'campaign-archive',
1200
+ 'vimeo.com', 'vimeo@', # Video platforms
1201
+ 'mongodb.com', 'mongodb@', 'atlassian.com', 'github.com',
1202
+ 'aws.amazon.com', 'cloud.google.com', 'azure.microsoft.com',
1203
+ # Subdomain patterns (careful - these match anywhere in domain)
1204
+ 'mail.', 'send.', 'email.', 'mailer.', 'bounce.',
1205
+ 'notify.', 'msg.', 'campaigns.',
1206
+ ]
1207
+ if any(p in last_from_email for p in automated_email_patterns):
1208
+ continue
1209
+
1210
+ # Skip by subject line patterns (common automated email subjects)
1211
+ automated_subject_patterns = [
1212
+ 'your job', 'job alert', 'new jobs', 'jobs for you',
1213
+ 'password reset', 'verify your', 'confirm your',
1214
+ 'security alert', 'new sign-in', 'new login', 'login attempt',
1215
+ 'weekly digest', 'daily digest', 'monthly digest',
1216
+ 'newsletter', 'unsubscribe', 'subscription',
1217
+ 'receipt for', 'invoice', 'payment confirmation', 'order confirmation',
1218
+ 'your order', 'shipping confirmation', 'delivery update',
1219
+ 'welcome to', 'thanks for signing up', 'account created',
1220
+ 'is active', 'expiring soon', 'expires', 'renew',
1221
+ # Calendar/meeting related
1222
+ 'invitation:', 'invitation from', 'canceled event', 'accepted:', 'declined:',
1223
+ 'updated invitation', 'event canceled', 'meeting canceled',
1224
+ 'from an unknown sender',
1225
+ # Account related
1226
+ 'account registration', 'registration complete', 'verify your email',
1227
+ 'confirm your email', 'activate your account', 'action required',
1228
+ 'build your first', 'getting started with', 'complete your setup',
1229
+ # Monthly/periodic reports
1230
+ 'in january', 'in february', 'in march', 'in april', 'in may',
1231
+ 'in june', 'in july', 'in august', 'in september', 'in october',
1232
+ 'in november', 'in december', 'this month', 'last month',
1233
+ 'pro tips', 'tips to', 'getting started',
1234
+ ]
1235
+ if any(p in subject_lower for p in automated_subject_patterns):
1236
+ continue
1237
+ from_email = next((h['value'] for h in first_headers if h['name'] == 'From'), 'Unknown')
1238
+ date_str = next((h['value'] for h in first_headers if h['name'] == 'Date'), '')
1239
+
1240
+ # Calculate age
1241
+ age_days = within_days # Default fallback
1242
+ if date_str:
1243
+ date_obj = parsedate_to_datetime(date_str)
1244
+ now = datetime.now(timezone.utc)
1245
+ age_days = (now - date_obj).days
1246
+
1247
+ unanswered.append({
1248
+ 'thread_id': thread_id,
1249
+ 'from': from_email,
1250
+ 'subject': subject,
1251
+ 'date': date_str,
1252
+ 'age_days': age_days,
1253
+ 'messages_in_thread': len(thread_messages)
1254
+ })
1255
+
1256
+ if len(unanswered) >= max_results:
1257
+ break
1258
+
1259
+ # Pagination: get next page
1260
+ page_token = results.get('nextPageToken')
1261
+ pages_fetched += 1
1262
+ if not page_token:
1263
+ break
1264
+
1265
+ if not unanswered:
1266
+ return f"No unanswered emails found in the last {within_days} days."
1267
+
1268
+ # Format output
1269
+ output = [f"Found {len(unanswered)} unanswered email(s) from the last {within_days} days:\n"]
1270
+ for i, email in enumerate(unanswered, 1):
1271
+ output.append(f"{i}. From: {email['from']}")
1272
+ output.append(f" Subject: {email['subject']}")
1273
+ output.append(f" Age: {email['age_days']} days ({email['messages_in_thread']} messages in thread)")
1274
+ output.append(f" Thread ID: {email['thread_id']}\n")
1275
+
1276
+ return "\n".join(output)
1277
+
1278
+ # === CSV Caching ===
1279
+
1280
+ def sync_emails(self, days_back: int = 300) -> str:
1281
+ """Sync emails to CSV cache file with full content (incremental).
1282
+
1283
+ First run: fetches all emails from last N days with full body.
1284
+ Subsequent runs: only fetches new emails not already in cache.
1285
+
1286
+ Args:
1287
+ days_back: How many days of email history to sync (default: 300)
1288
+
1289
+ Returns:
1290
+ Summary of sync operation
1291
+ """
1292
+ if not self.emails_csv:
1293
+ return "No emails_csv path configured. Initialize Gmail with emails_csv parameter."
1294
+
1295
+ import csv
1296
+ import base64
1297
+ from datetime import datetime, timedelta
1298
+
1299
+ service = self._get_service()
1300
+
1301
+ # Get existing email IDs from cache
1302
+ existing_ids = set()
1303
+ if os.path.exists(self.emails_csv):
1304
+ with open(self.emails_csv, 'r') as f:
1305
+ reader = csv.DictReader(f)
1306
+ for row in reader:
1307
+ existing_ids.add(row['id'])
1308
+
1309
+ # Build query for date range
1310
+ after_date = (datetime.now() - timedelta(days=days_back)).strftime('%Y/%m/%d')
1311
+ query = f"after:{after_date}"
1312
+
1313
+ # Fetch ALL email IDs in date range with pagination
1314
+ messages = []
1315
+ page_token = None
1316
+ while True:
1317
+ results = service.users().messages().list(
1318
+ userId='me',
1319
+ q=query,
1320
+ maxResults=100,
1321
+ pageToken=page_token
1322
+ ).execute()
1323
+ messages.extend(results.get('messages', []))
1324
+ page_token = results.get('nextPageToken')
1325
+ if not page_token:
1326
+ break
1327
+
1328
+ # Filter out already cached emails
1329
+ new_msg_ids = [msg for msg in messages if msg['id'] not in existing_ids]
1330
+
1331
+ def get_email_body(payload):
1332
+ """Extract plain text body from email payload."""
1333
+ if 'body' in payload and payload['body'].get('data'):
1334
+ return base64.urlsafe_b64decode(payload['body']['data']).decode('utf-8', errors='replace')
1335
+
1336
+ if 'parts' in payload:
1337
+ for part in payload['parts']:
1338
+ if part['mimeType'] == 'text/plain' and part['body'].get('data'):
1339
+ return base64.urlsafe_b64decode(part['body']['data']).decode('utf-8', errors='replace')
1340
+ # Recurse into nested parts
1341
+ if 'parts' in part:
1342
+ result = get_email_body(part)
1343
+ if result:
1344
+ return result
1345
+ return ''
1346
+
1347
+ new_emails = []
1348
+ for msg in new_msg_ids:
1349
+ message = service.users().messages().get(
1350
+ userId='me',
1351
+ id=msg['id'],
1352
+ format='full'
1353
+ ).execute()
1354
+
1355
+ headers = {h['name']: h['value'] for h in message['payload'].get('headers', [])}
1356
+ body = get_email_body(message['payload'])
1357
+
1358
+ new_emails.append({
1359
+ 'id': msg['id'],
1360
+ 'thread_id': message.get('threadId', ''),
1361
+ 'from_email': headers.get('From', ''),
1362
+ 'to_email': headers.get('To', ''),
1363
+ 'subject': headers.get('Subject', ''),
1364
+ 'date': headers.get('Date', ''),
1365
+ 'body': body,
1366
+ 'snippet': message.get('snippet', '')
1367
+ })
1368
+
1369
+ # Append new emails to CSV
1370
+ fieldnames = ['id', 'thread_id', 'from_email', 'to_email', 'subject', 'date', 'body', 'snippet']
1371
+ file_exists = os.path.exists(self.emails_csv) and os.path.getsize(self.emails_csv) > 0
1372
+ with open(self.emails_csv, 'a', newline='') as f:
1373
+ writer = csv.DictWriter(f, fieldnames=fieldnames)
1374
+ if not file_exists:
1375
+ writer.writeheader()
1376
+ writer.writerows(new_emails)
1377
+
1378
+ return f"Synced {len(new_emails)} new emails (from {len(messages)} total in last {days_back} days). Cache now has {len(existing_ids) + len(new_emails)} emails."
1379
+
1380
+ def sync_contacts(self, max_emails: int = 500, exclude_domains: str = "") -> str:
1381
+ """Sync contacts - adds new, updates existing, KEEPS all contacts, PRESERVES CRM data.
1382
+
1383
+ Unlike get_all_contacts() which overwrites everything, sync_contacts():
1384
+ - Adds NEW contacts with empty CRM fields
1385
+ - Updates frequency and last_contact for existing contacts
1386
+ - KEEPS contacts not in recent scan (they're still valid, just no recent emails)
1387
+ - PRESERVES existing CRM fields (type, priority, company, relationship, etc.)
1388
+
1389
+ Use get_all_contacts() for initial setup, sync_contacts() for regular updates.
1390
+
1391
+ Args:
1392
+ max_emails: Maximum emails to scan (default: 500)
1393
+ exclude_domains: Comma-separated domains to exclude
1394
+
1395
+ Returns:
1396
+ Summary of sync operation
1397
+ """
1398
+ if not self.contacts_csv:
1399
+ return "No contacts_csv path configured. Initialize Gmail with contacts_csv parameter."
1400
+
1401
+ import csv
1402
+
1403
+ # Step 1: Load existing contacts with ALL data
1404
+ existing = {}
1405
+ if os.path.exists(self.contacts_csv):
1406
+ with open(self.contacts_csv, 'r') as f:
1407
+ for row in csv.DictReader(f):
1408
+ existing[row['email'].lower()] = dict(row)
1409
+
1410
+ old_count = len(existing)
1411
+
1412
+ # Step 2: Scan emails using helper (no CSV write)
1413
+ fresh, _ = self._scan_contacts(max_emails, True, exclude_domains)
1414
+
1415
+ # Step 3: Merge - update existing, add new, KEEP contacts not in scan
1416
+ new_count = 0
1417
+ updated_count = 0
1418
+ for email, data in fresh.items():
1419
+ email_key = email.lower()
1420
+ if email_key in existing:
1421
+ # Update frequency and last_contact, keep CRM fields
1422
+ existing[email_key]['frequency'] = str(data['frequency'])
1423
+ existing[email_key]['last_contact'] = data['last_contact']
1424
+ if not existing[email_key].get('name'):
1425
+ existing[email_key]['name'] = data['name']
1426
+ updated_count += 1
1427
+ else:
1428
+ # New contact
1429
+ existing[email_key] = data
1430
+ new_count += 1
1431
+
1432
+ # Step 4: Write merged data (sorted by frequency)
1433
+ fieldnames = ['email', 'name', 'frequency', 'last_contact', 'type', 'company',
1434
+ 'relationship', 'priority', 'deal', 'next_contact_date', 'tags', 'notes']
1435
+ sorted_contacts = sorted(existing.values(), key=lambda x: int(x.get('frequency', 0)), reverse=True)
1436
+
1437
+ with open(self.contacts_csv, 'w', newline='') as f:
1438
+ writer = csv.DictWriter(f, fieldnames=fieldnames)
1439
+ writer.writeheader()
1440
+ writer.writerows(sorted_contacts)
1441
+
1442
+ return f"Synced {len(existing)} contacts ({new_count} new, {updated_count} updated, {old_count - updated_count} unchanged)"
1443
+
1444
+ def get_cached_contacts(self) -> str:
1445
+ """Get contacts from CSV cache (fast, no API call).
1446
+
1447
+ Returns:
1448
+ List of cached contacts sorted by frequency
1449
+ """
1450
+ if not self.contacts_csv or not os.path.exists(self.contacts_csv):
1451
+ return "No cached contacts. Run sync_contacts() first."
1452
+
1453
+ import csv
1454
+
1455
+ contacts = []
1456
+ with open(self.contacts_csv, 'r') as f:
1457
+ reader = csv.DictReader(f)
1458
+ for row in reader:
1459
+ contacts.append(row)
1460
+
1461
+ if not contacts:
1462
+ return "No contacts in cache. Run sync_contacts() first."
1463
+
1464
+ result = [f"Cached contacts ({len(contacts)}):\n"]
1465
+ for c in contacts[:50]:
1466
+ result.append(f"- {c['name']} <{c['email']}> ({c['frequency']} emails)")
1467
+
1468
+ return "\n".join(result)
1469
+
1470
+ def update_contact(self, email: str, type: str = None, company: str = None,
1471
+ relationship: str = None, priority: str = None, deal: str = None,
1472
+ next_contact_date: str = None, tags: str = None, notes: str = None,
1473
+ last_contact: str = None) -> str:
1474
+ """Update CRM fields for a contact in contacts.csv.
1475
+
1476
+ Args:
1477
+ email: Contact email address (required)
1478
+ type: Contact type - PERSON, SERVICE, or NOTIFICATION
1479
+ company: Company/organization name
1480
+ relationship: e.g., "applicant", "vendor", "investor", "friend"
1481
+ priority: high, medium, or low
1482
+ deal: Active opportunity/project name
1483
+ next_contact_date: When to follow up (YYYY-MM-DD)
1484
+ tags: Comma-separated tags
1485
+ notes: Additional context
1486
+ last_contact: Date of last contact (YYYY-MM-DD)
1487
+
1488
+ Returns:
1489
+ Confirmation message
1490
+ """
1491
+ if not self.contacts_csv or not os.path.exists(self.contacts_csv):
1492
+ return f"Contact {email} not found. Run sync_contacts() first."
1493
+
1494
+ import csv
1495
+
1496
+ # Read existing contacts
1497
+ contacts = []
1498
+ found = False
1499
+ with open(self.contacts_csv, 'r') as f:
1500
+ reader = csv.DictReader(f)
1501
+ fieldnames = reader.fieldnames
1502
+ for row in reader:
1503
+ if row['email'] == email:
1504
+ found = True
1505
+ if type is not None:
1506
+ row['type'] = type
1507
+ if company is not None:
1508
+ row['company'] = company
1509
+ if relationship is not None:
1510
+ row['relationship'] = relationship
1511
+ if priority is not None:
1512
+ row['priority'] = priority
1513
+ if deal is not None:
1514
+ row['deal'] = deal
1515
+ if next_contact_date is not None:
1516
+ row['next_contact_date'] = next_contact_date
1517
+ if tags is not None:
1518
+ row['tags'] = tags
1519
+ if notes is not None:
1520
+ row['notes'] = notes
1521
+ if last_contact is not None:
1522
+ row['last_contact'] = last_contact
1523
+ contacts.append(row)
1524
+
1525
+ if not found:
1526
+ return f"Contact {email} not found in contacts.csv"
1527
+
1528
+ # Write back
1529
+ with open(self.contacts_csv, 'w', newline='') as f:
1530
+ writer = csv.DictWriter(f, fieldnames=fieldnames)
1531
+ writer.writeheader()
1532
+ writer.writerows(contacts)
1533
+
1534
+ updates = []
1535
+ if type:
1536
+ updates.append(f"type={type}")
1537
+ if priority:
1538
+ updates.append(f"priority={priority}")
1539
+ if deal:
1540
+ updates.append(f"deal={deal}")
1541
+ if next_contact_date:
1542
+ updates.append(f"next_contact_date={next_contact_date}")
1543
+ if relationship:
1544
+ updates.append(f"relationship={relationship}")
1545
+ if company:
1546
+ updates.append(f"company={company}")
1547
+ if last_contact:
1548
+ updates.append(f"last_contact={last_contact}")
1549
+
1550
+ return f"Updated {email}: {', '.join(updates) if updates else 'no changes'}"
1551
+
1552
+ def bulk_update_contacts(self, updates: list) -> str:
1553
+ """Update multiple contacts in one operation (efficient batch update).
1554
+
1555
+ Args:
1556
+ updates: List of dicts, each with 'email' (required) and optional fields:
1557
+ type, company, relationship, priority, deal, next_contact_date, tags, notes
1558
+ Example: [{"email": "foo@bar.com", "type": "PERSON", "priority": "high"},
1559
+ {"email": "baz@qux.com", "type": "NOTIFICATION", "priority": "low"}]
1560
+
1561
+ Returns:
1562
+ Summary of updates made
1563
+ """
1564
+ if not self.contacts_csv or not os.path.exists(self.contacts_csv):
1565
+ return "No contacts.csv found. Run sync_contacts() first."
1566
+
1567
+ import csv
1568
+
1569
+ # Build lookup map from updates list
1570
+ updates_map = {}
1571
+ for u in updates:
1572
+ if 'email' in u:
1573
+ updates_map[u['email'].lower()] = u
1574
+
1575
+ # Read all contacts
1576
+ contacts = []
1577
+ fieldnames = None
1578
+ with open(self.contacts_csv, 'r') as f:
1579
+ reader = csv.DictReader(f)
1580
+ fieldnames = reader.fieldnames
1581
+ for row in reader:
1582
+ email_key = row['email'].lower()
1583
+ if email_key in updates_map:
1584
+ update = updates_map[email_key]
1585
+ for field in ['type', 'company', 'relationship', 'priority', 'deal', 'next_contact_date', 'tags', 'notes', 'last_contact']:
1586
+ if field in update and update[field] is not None:
1587
+ row[field] = update[field]
1588
+ contacts.append(row)
1589
+
1590
+ # Write back
1591
+ with open(self.contacts_csv, 'w', newline='') as f:
1592
+ writer = csv.DictWriter(f, fieldnames=fieldnames)
1593
+ writer.writeheader()
1594
+ writer.writerows(contacts)
1595
+
1596
+ return f"Bulk updated {len(updates_map)} contacts"