devscontext 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,580 @@
1
+ """Gmail adapter for fetching email context.
2
+
3
+ This adapter connects to Gmail API to search for emails mentioning
4
+ ticket IDs or keywords, groups by thread, and extracts content.
5
+
6
+ Implements the Adapter interface for the plugin system.
7
+
8
+ OAuth Notes:
9
+ - First run requires browser authentication
10
+ - Subsequent runs use stored refresh token
11
+ - Token is stored at config.token_path
12
+
13
+ Example:
14
+ config = GmailConfig(credentials_path="credentials.json", enabled=True)
15
+ adapter = GmailAdapter(config)
16
+ context = await adapter.fetch_task_context("PROJ-123", ticket)
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import asyncio
22
+ import base64
23
+ import re
24
+ from datetime import UTC, datetime
25
+ from email.utils import parseaddr, parsedate_to_datetime
26
+ from pathlib import Path
27
+ from typing import TYPE_CHECKING, Any, ClassVar
28
+
29
+ from devscontext.constants import (
30
+ ADAPTER_GMAIL,
31
+ GMAIL_API_SCOPES,
32
+ GMAIL_BODY_MAX_CHARS,
33
+ GMAIL_MAX_RESULTS_PER_QUERY,
34
+ SOURCE_TYPE_EMAIL,
35
+ )
36
+ from devscontext.logging import get_logger
37
+ from devscontext.models import (
38
+ GmailConfig,
39
+ GmailContext,
40
+ GmailMessage,
41
+ GmailThread,
42
+ )
43
+ from devscontext.plugins.base import Adapter, SearchResult, SourceContext
44
+ from devscontext.utils import extract_keywords, truncate_text
45
+
46
+ if TYPE_CHECKING:
47
+ from devscontext.models import JiraTicket
48
+
49
+ logger = get_logger(__name__)
50
+
51
+
52
+ class GmailAdapter(Adapter):
53
+ """Adapter for fetching context from Gmail.
54
+
55
+ Implements the Adapter interface for the plugin system.
56
+ Searches for emails mentioning ticket IDs or keywords,
57
+ groups by conversation thread, and extracts content.
58
+
59
+ Class Attributes:
60
+ name: Adapter identifier ("gmail").
61
+ source_type: Source category ("email").
62
+ config_schema: Configuration model (GmailConfig).
63
+
64
+ OAuth Notes:
65
+ - First run requires browser authentication via OAuth flow
66
+ - Subsequent runs use stored refresh token from token_path
67
+ - Only read-only access is requested (gmail.readonly scope)
68
+ """
69
+
70
+ name: ClassVar[str] = ADAPTER_GMAIL
71
+ source_type: ClassVar[str] = SOURCE_TYPE_EMAIL
72
+ config_schema: ClassVar[type[GmailConfig]] = GmailConfig
73
+
74
+ def __init__(self, config: GmailConfig) -> None:
75
+ """Initialize the Gmail adapter.
76
+
77
+ Args:
78
+ config: Gmail configuration with credentials path.
79
+ """
80
+ self._config = config
81
+ self._service: Any = None
82
+ self._credentials: Any = None
83
+
84
+ def _get_service(self) -> Any:
85
+ """Get or create the Gmail API service (lazy initialization).
86
+
87
+ Returns:
88
+ Gmail API service object.
89
+
90
+ Raises:
91
+ ImportError: If google-api-python-client is not installed.
92
+ ValueError: If credentials cannot be loaded.
93
+ """
94
+ if self._service is not None:
95
+ return self._service
96
+
97
+ try:
98
+ from google.auth.transport.requests import Request
99
+ from google.oauth2.credentials import Credentials
100
+ from google_auth_oauthlib.flow import InstalledAppFlow
101
+ from googleapiclient.discovery import build
102
+ except ImportError as e:
103
+ raise ImportError(
104
+ "google-api-python-client not installed. "
105
+ "Install with: pip install devscontext[gmail]"
106
+ ) from e
107
+
108
+ creds = None
109
+ token_path = Path(self._config.token_path)
110
+ credentials_path = Path(self._config.credentials_path)
111
+
112
+ # Load existing token if available
113
+ if token_path.exists():
114
+ try:
115
+ creds = Credentials.from_authorized_user_file(str(token_path), GMAIL_API_SCOPES)
116
+ except Exception as e:
117
+ logger.warning(f"Failed to load token: {e}")
118
+
119
+ # Refresh or get new credentials
120
+ if creds is None or not creds.valid:
121
+ if creds and creds.expired and creds.refresh_token:
122
+ try:
123
+ creds.refresh(Request())
124
+ except Exception as e:
125
+ logger.warning(f"Failed to refresh token: {e}")
126
+ creds = None
127
+
128
+ if creds is None:
129
+ if not credentials_path.exists():
130
+ raise ValueError(f"Gmail credentials file not found: {credentials_path}")
131
+
132
+ # This will open browser for auth on first run
133
+ flow = InstalledAppFlow.from_client_secrets_file(
134
+ str(credentials_path),
135
+ GMAIL_API_SCOPES,
136
+ )
137
+ creds = flow.run_local_server(port=0)
138
+
139
+ # Save token for future use
140
+ token_path.parent.mkdir(parents=True, exist_ok=True)
141
+ token_path.write_text(creds.to_json())
142
+
143
+ self._credentials = creds
144
+ self._service = build("gmail", "v1", credentials=creds)
145
+ return self._service
146
+
147
+ async def close(self) -> None:
148
+ """Close resources."""
149
+ self._service = None
150
+ self._credentials = None
151
+
152
+ async def _search_emails(
153
+ self,
154
+ query: str,
155
+ max_results: int = GMAIL_MAX_RESULTS_PER_QUERY,
156
+ ) -> list[dict[str, Any]]:
157
+ """Search Gmail for messages matching query.
158
+
159
+ Args:
160
+ query: Gmail search query (supports Gmail search operators).
161
+ max_results: Maximum number of messages to return.
162
+
163
+ Returns:
164
+ List of message metadata dicts with id and threadId.
165
+ """
166
+ try:
167
+ service = self._get_service()
168
+
169
+ # Add scope filter from config
170
+ full_query = f"{query} {self._config.search_scope}"
171
+
172
+ # Add label filter if configured
173
+ if self._config.labels:
174
+ label_query = " OR ".join(f"label:{label}" for label in self._config.labels)
175
+ full_query = f"({full_query}) ({label_query})"
176
+
177
+ # Run in thread pool to avoid blocking
178
+ loop = asyncio.get_event_loop()
179
+ result = await loop.run_in_executor(
180
+ None,
181
+ lambda: (
182
+ service.users()
183
+ .messages()
184
+ .list(
185
+ userId="me",
186
+ q=full_query,
187
+ maxResults=max_results,
188
+ )
189
+ .execute()
190
+ ),
191
+ )
192
+
193
+ messages: list[dict[str, Any]] = result.get("messages", [])
194
+ return messages
195
+
196
+ except ImportError:
197
+ logger.warning("Gmail dependencies not installed")
198
+ return []
199
+ except Exception as e:
200
+ logger.warning(f"Gmail search failed: {e}")
201
+ return []
202
+
203
+ async def _get_message(self, message_id: str) -> dict[str, Any] | None:
204
+ """Fetch full message content by ID.
205
+
206
+ Args:
207
+ message_id: Gmail message ID.
208
+
209
+ Returns:
210
+ Message dict or None if fetch fails.
211
+ """
212
+ try:
213
+ service = self._get_service()
214
+
215
+ loop = asyncio.get_event_loop()
216
+ msg: dict[str, Any] = await loop.run_in_executor(
217
+ None,
218
+ lambda: (
219
+ service.users()
220
+ .messages()
221
+ .get(
222
+ userId="me",
223
+ id=message_id,
224
+ format="full",
225
+ )
226
+ .execute()
227
+ ),
228
+ )
229
+
230
+ return msg
231
+
232
+ except Exception as e:
233
+ logger.warning(f"Failed to fetch message {message_id}: {e}")
234
+ return None
235
+
236
+ async def _get_thread(self, thread_id: str) -> dict[str, Any] | None:
237
+ """Fetch full thread with all messages.
238
+
239
+ Args:
240
+ thread_id: Gmail thread ID.
241
+
242
+ Returns:
243
+ Thread dict or None if fetch fails.
244
+ """
245
+ try:
246
+ service = self._get_service()
247
+
248
+ loop = asyncio.get_event_loop()
249
+ thread: dict[str, Any] = await loop.run_in_executor(
250
+ None,
251
+ lambda: (
252
+ service.users()
253
+ .threads()
254
+ .get(
255
+ userId="me",
256
+ id=thread_id,
257
+ format="full",
258
+ )
259
+ .execute()
260
+ ),
261
+ )
262
+
263
+ return thread
264
+
265
+ except Exception as e:
266
+ logger.warning(f"Failed to fetch thread {thread_id}: {e}")
267
+ return None
268
+
269
+ def _parse_message(self, msg: dict[str, Any]) -> GmailMessage:
270
+ """Parse Gmail API message into our model.
271
+
272
+ Args:
273
+ msg: Raw message dict from Gmail API.
274
+
275
+ Returns:
276
+ GmailMessage instance.
277
+ """
278
+ headers = {h["name"].lower(): h["value"] for h in msg.get("payload", {}).get("headers", [])}
279
+
280
+ # Parse sender
281
+ sender_raw = headers.get("from", "")
282
+ sender_name, sender_email = parseaddr(sender_raw)
283
+
284
+ # Parse recipients
285
+ to_raw = headers.get("to", "")
286
+ recipients = [addr.strip() for addr in to_raw.split(",") if addr.strip()]
287
+
288
+ cc_raw = headers.get("cc", "")
289
+ cc = [addr.strip() for addr in cc_raw.split(",") if addr.strip()]
290
+
291
+ # Parse date
292
+ date_str = headers.get("date", "")
293
+ try:
294
+ date = parsedate_to_datetime(date_str)
295
+ if date.tzinfo is None:
296
+ date = date.replace(tzinfo=UTC)
297
+ except (ValueError, TypeError):
298
+ date = datetime.now(UTC)
299
+
300
+ # Extract body
301
+ body_text = self._extract_body(msg.get("payload", {}))
302
+
303
+ return GmailMessage(
304
+ message_id=msg.get("id", ""),
305
+ thread_id=msg.get("threadId", ""),
306
+ subject=headers.get("subject", "(no subject)"),
307
+ sender=sender_email or sender_raw,
308
+ sender_name=sender_name or None,
309
+ recipients=recipients,
310
+ cc=cc,
311
+ date=date,
312
+ snippet=msg.get("snippet", ""),
313
+ body_text=truncate_text(body_text, GMAIL_BODY_MAX_CHARS),
314
+ labels=msg.get("labelIds", []),
315
+ )
316
+
317
+ def _extract_body(self, payload: dict[str, Any]) -> str:
318
+ """Extract plain text body from message payload.
319
+
320
+ Recursively searches for text/plain parts, falling back to
321
+ HTML with tags stripped if no plain text is found.
322
+
323
+ Args:
324
+ payload: Message payload dict.
325
+
326
+ Returns:
327
+ Extracted body text.
328
+ """
329
+ # Try to find plain text part
330
+ if payload.get("mimeType") == "text/plain":
331
+ data = payload.get("body", {}).get("data", "")
332
+ if data:
333
+ return base64.urlsafe_b64decode(data).decode("utf-8", errors="replace")
334
+
335
+ # Check parts recursively
336
+ for part in payload.get("parts", []):
337
+ if part.get("mimeType") == "text/plain":
338
+ data = part.get("body", {}).get("data", "")
339
+ if data:
340
+ return base64.urlsafe_b64decode(data).decode("utf-8", errors="replace")
341
+
342
+ # Recurse into nested parts
343
+ nested = self._extract_body(part)
344
+ if nested:
345
+ return nested
346
+
347
+ # Fall back to HTML if no plain text
348
+ if payload.get("mimeType") == "text/html":
349
+ data = payload.get("body", {}).get("data", "")
350
+ if data:
351
+ html = base64.urlsafe_b64decode(data).decode("utf-8", errors="replace")
352
+ # Strip HTML tags (simple approach)
353
+ return re.sub(r"<[^>]+>", " ", html).strip()
354
+
355
+ return ""
356
+
357
+ async def fetch_task_context(
358
+ self,
359
+ task_id: str,
360
+ ticket: JiraTicket | None = None,
361
+ ) -> SourceContext:
362
+ """Fetch context from Gmail.
363
+
364
+ Search strategy:
365
+ 1. Search for exact ticket ID in subject/body
366
+ 2. Search for keywords from ticket title
367
+ 3. Group results by thread
368
+ 4. Fetch full thread content
369
+
370
+ Args:
371
+ task_id: The task identifier to search for.
372
+ ticket: Optional Jira ticket for keyword extraction.
373
+
374
+ Returns:
375
+ SourceContext with GmailContext data.
376
+ """
377
+ if not self._config.enabled:
378
+ logger.debug("Gmail adapter is disabled")
379
+ return SourceContext(
380
+ source_name=self.name,
381
+ source_type=self.source_type,
382
+ data=None,
383
+ raw_text="",
384
+ )
385
+
386
+ if not self._config.credentials_path:
387
+ logger.warning("Gmail adapter missing credentials path")
388
+ return SourceContext(
389
+ source_name=self.name,
390
+ source_type=self.source_type,
391
+ data=None,
392
+ raw_text="",
393
+ )
394
+
395
+ # Build search queries
396
+ search_terms = [task_id]
397
+ if ticket:
398
+ keywords = extract_keywords(ticket.title)[:3]
399
+ search_terms.extend(keywords)
400
+
401
+ # Search with all terms combined
402
+ query = " OR ".join(f'"{term}"' for term in search_terms)
403
+ message_refs = await self._search_emails(query, self._config.max_results)
404
+
405
+ if not message_refs:
406
+ return SourceContext(
407
+ source_name=self.name,
408
+ source_type=self.source_type,
409
+ data=GmailContext(),
410
+ raw_text="",
411
+ metadata={"task_id": task_id, "thread_count": 0},
412
+ )
413
+
414
+ # Group by thread and fetch full threads
415
+ thread_ids: list[str] = [
416
+ tid for m in message_refs if (tid := m.get("threadId")) is not None
417
+ ]
418
+ thread_ids = list(dict.fromkeys(thread_ids)) # Deduplicate while preserving order
419
+ threads: list[GmailThread] = []
420
+
421
+ for thread_id in thread_ids[:10]: # Limit threads to avoid too many API calls
422
+ thread_data = await self._get_thread(thread_id)
423
+ if not thread_data:
424
+ continue
425
+
426
+ messages = [self._parse_message(m) for m in thread_data.get("messages", [])]
427
+
428
+ if not messages:
429
+ continue
430
+
431
+ # Get participants from all messages
432
+ participants: set[str] = set()
433
+ for msg in messages:
434
+ participants.add(msg.sender)
435
+ participants.update(msg.recipients)
436
+
437
+ threads.append(
438
+ GmailThread(
439
+ thread_id=thread_id,
440
+ subject=messages[0].subject,
441
+ messages=messages,
442
+ participants=list(participants),
443
+ latest_date=max(m.date for m in messages),
444
+ )
445
+ )
446
+
447
+ # Sort threads by latest date
448
+ threads.sort(key=lambda t: t.latest_date, reverse=True)
449
+
450
+ gmail_context = GmailContext(threads=threads)
451
+ raw_text = self._format_gmail_context(gmail_context)
452
+
453
+ logger.info(
454
+ "Gmail context assembled",
455
+ extra={
456
+ "task_id": task_id,
457
+ "thread_count": len(threads),
458
+ "message_count": sum(len(t.messages) for t in threads),
459
+ },
460
+ )
461
+
462
+ return SourceContext(
463
+ source_name=self.name,
464
+ source_type=self.source_type,
465
+ data=gmail_context,
466
+ raw_text=raw_text,
467
+ metadata={
468
+ "task_id": task_id,
469
+ "thread_count": len(threads),
470
+ "message_count": sum(len(t.messages) for t in threads),
471
+ },
472
+ )
473
+
474
+ def _format_gmail_context(self, context: GmailContext) -> str:
475
+ """Format Gmail context as raw text for synthesis.
476
+
477
+ Args:
478
+ context: GmailContext with threads.
479
+
480
+ Returns:
481
+ Formatted markdown string.
482
+ """
483
+ parts: list[str] = []
484
+
485
+ for thread in context.threads:
486
+ thread_parts = [
487
+ f"## Email Thread: {thread.subject}",
488
+ f"**Participants:** {', '.join(thread.participants[:5])}",
489
+ f"**Latest:** {thread.latest_date.strftime('%Y-%m-%d')}",
490
+ "",
491
+ ]
492
+
493
+ for msg in thread.messages[:5]: # Limit messages per thread
494
+ sender = msg.sender_name or msg.sender
495
+ date_str = msg.date.strftime("%Y-%m-%d %H:%M")
496
+ thread_parts.append(f"**{sender}** ({date_str}):")
497
+ thread_parts.append(msg.body_text or msg.snippet)
498
+ thread_parts.append("")
499
+
500
+ parts.append("\n".join(thread_parts))
501
+
502
+ return "\n\n---\n\n".join(parts)
503
+
504
+ async def search(
505
+ self,
506
+ query: str,
507
+ max_results: int = 10,
508
+ ) -> list[SearchResult]:
509
+ """Search Gmail for emails matching the query.
510
+
511
+ Args:
512
+ query: Search terms.
513
+ max_results: Maximum number of results.
514
+
515
+ Returns:
516
+ List of SearchResult items.
517
+ """
518
+ if not self._config.enabled:
519
+ return []
520
+
521
+ message_refs = await self._search_emails(query, max_results)
522
+
523
+ results: list[SearchResult] = []
524
+ for ref in message_refs[:max_results]:
525
+ msg = await self._get_message(ref.get("id", ""))
526
+ if not msg:
527
+ continue
528
+
529
+ parsed = self._parse_message(msg)
530
+
531
+ results.append(
532
+ SearchResult(
533
+ source_name=self.name,
534
+ source_type=self.source_type,
535
+ title=parsed.subject,
536
+ excerpt=parsed.snippet,
537
+ metadata={
538
+ "from": parsed.sender,
539
+ "date": parsed.date.isoformat(),
540
+ "thread_id": parsed.thread_id,
541
+ },
542
+ )
543
+ )
544
+
545
+ return results
546
+
547
+ async def health_check(self) -> bool:
548
+ """Check if Gmail is configured and accessible.
549
+
550
+ Returns:
551
+ True if healthy or disabled, False if there's an issue.
552
+ """
553
+ if not self._config.enabled:
554
+ return True
555
+
556
+ if not self._config.credentials_path:
557
+ logger.warning("Gmail adapter missing credentials path")
558
+ return False
559
+
560
+ try:
561
+ service = self._get_service()
562
+
563
+ loop = asyncio.get_event_loop()
564
+ result = await loop.run_in_executor(
565
+ None,
566
+ lambda: service.users().getProfile(userId="me").execute(),
567
+ )
568
+
569
+ if result.get("emailAddress"):
570
+ logger.info("Gmail health check passed")
571
+ return True
572
+
573
+ return False
574
+
575
+ except ImportError:
576
+ logger.warning("Gmail dependencies not installed")
577
+ return False
578
+ except Exception as e:
579
+ logger.warning(f"Gmail health check failed: {e}")
580
+ return False