@realtimex/email-automator 2.4.0 → 2.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,7 @@ import { asyncHandler, NotFoundError } from '../middleware/errorHandler.js';
3
3
  import { authMiddleware } from '../middleware/auth.js';
4
4
  import { apiRateLimit } from '../middleware/rateLimit.js';
5
5
  import { createLogger } from '../utils/logger.js';
6
+ import { getStorageService } from '../services/storage.js';
6
7
 
7
8
  const router = Router();
8
9
  const logger = createLogger('EmailsRoutes');
@@ -19,8 +20,15 @@ router.get('/',
19
20
  account_id,
20
21
  action_taken,
21
22
  search,
23
+ sort_by = 'date',
24
+ sort_order = 'desc'
22
25
  } = req.query;
23
26
 
27
+ // Validate sort params
28
+ const validSortFields = ['date', 'created_at'];
29
+ const sortField = validSortFields.includes(sort_by as string) ? sort_by as string : 'date';
30
+ const isAscending = sort_order === 'asc';
31
+
24
32
  let query = req.supabase!
25
33
  .from('emails')
26
34
  .select(`
@@ -28,7 +36,7 @@ router.get('/',
28
36
  email_accounts!inner(id, user_id, email_address, provider)
29
37
  `, { count: 'exact' })
30
38
  .eq('email_accounts.user_id', req.user!.id)
31
- .order('date', { ascending: false })
39
+ .order(sortField, { ascending: isAscending })
32
40
  .range(
33
41
  parseInt(offset as string, 10),
34
42
  parseInt(offset as string, 10) + parseInt(limit as string, 10) - 1
@@ -88,6 +96,34 @@ router.get('/:emailId',
88
96
  })
89
97
  );
90
98
 
99
+ // Get raw email content (.eml)
100
+ router.get('/:emailId/raw',
101
+ authMiddleware,
102
+ asyncHandler(async (req, res) => {
103
+ const { emailId } = req.params;
104
+
105
+ const { data: email, error } = await req.supabase!
106
+ .from('emails')
107
+ .select('file_path, subject, email_accounts!inner(user_id)')
108
+ .eq('id', emailId)
109
+ .eq('email_accounts.user_id', req.user!.id)
110
+ .single();
111
+
112
+ if (error || !email || !email.file_path) {
113
+ throw new NotFoundError('Raw Email');
114
+ }
115
+
116
+ const storageService = getStorageService();
117
+ const content = await storageService.readEmail(email.file_path);
118
+
119
+ const filename = `${email.subject || 'email'}.eml`.replace(/[^a-z0-9._-]/gi, '_');
120
+
121
+ res.setHeader('Content-Type', 'message/rfc822');
122
+ res.setHeader('Content-Disposition', `attachment; filename="${filename}"`);
123
+ res.send(content);
124
+ })
125
+ );
126
+
91
127
  // Delete email record (not the actual email from provider)
92
128
  router.delete('/:emailId',
93
129
  apiRateLimit,
@@ -98,7 +134,7 @@ router.delete('/:emailId',
98
134
  // Verify ownership first
99
135
  const { data: email } = await req.supabase!
100
136
  .from('emails')
101
- .select('id, email_accounts!inner(user_id)')
137
+ .select('id, file_path, email_accounts!inner(user_id)')
102
138
  .eq('id', emailId)
103
139
  .eq('email_accounts.user_id', req.user!.id)
104
140
  .single();
@@ -107,6 +143,13 @@ router.delete('/:emailId',
107
143
  throw new NotFoundError('Email');
108
144
  }
109
145
 
146
+ // 1. Delete from disk
147
+ if (email.file_path) {
148
+ const storageService = getStorageService();
149
+ await storageService.deleteEmail(email.file_path);
150
+ }
151
+
152
+ // 2. Delete from DB
110
153
  const { error } = await req.supabase!
111
154
  .from('emails')
112
155
  .delete()
@@ -46,7 +46,15 @@ export class EventLogger {
46
46
  await this.log('action', state, { action, reason }, emailId);
47
47
  }
48
48
 
49
- async error(state: string, error: any, emailId?: string) {
50
- await this.log('error', state, { error: error.message || error }, emailId);
49
+ async error(state: string, errorOrDetails: any, emailId?: string) {
50
+ let details;
51
+ if (errorOrDetails instanceof Error) {
52
+ details = { error: errorOrDetails.message };
53
+ } else if (typeof errorOrDetails === 'object' && errorOrDetails !== null) {
54
+ details = errorOrDetails;
55
+ } else {
56
+ details = { error: String(errorOrDetails) };
57
+ }
58
+ await this.log('error', state, details, emailId);
51
59
  }
52
60
  }
@@ -17,19 +17,8 @@ export interface RuleAttachment {
17
17
  export interface GmailMessage {
18
18
  id: string;
19
19
  threadId: string;
20
- subject: string;
21
- sender: string;
22
- recipient: string;
23
- date: string;
24
- internalDate: string; // Gmail's internal timestamp (ms since epoch) - use this for checkpointing
25
- body: string;
26
- snippet: string;
27
- headers: {
28
- importance?: string;
29
- listUnsubscribe?: string;
30
- autoSubmitted?: string;
31
- mailer?: string;
32
- };
20
+ internalDate: string;
21
+ raw: string; // Base64url encoded raw RFC822 message
33
22
  }
34
23
 
35
24
  export interface OAuthCredentials {
@@ -184,102 +173,55 @@ export class GmailService {
184
173
  return data;
185
174
  }
186
175
 
187
- async fetchMessages(
188
- account: EmailAccount,
189
- options: { maxResults?: number; query?: string; pageToken?: string } = {}
190
- ): Promise<{ messages: GmailMessage[]; nextPageToken?: string }> {
191
- const gmail = await this.getAuthenticatedClient(account);
192
- const { maxResults = config.processing.batchSize, query, pageToken } = options;
193
-
194
- const response = await gmail.users.messages.list({
195
- userId: 'me',
196
- maxResults,
197
- q: query,
198
- pageToken,
199
- });
200
-
201
- const messages: GmailMessage[] = [];
202
-
203
- for (const msg of response.data.messages || []) {
204
- if (!msg.id) continue;
205
-
206
- try {
207
- const detail = await gmail.users.messages.get({
208
- userId: 'me',
209
- id: msg.id,
210
- format: 'full',
211
- });
212
-
213
- const parsed = this.parseMessage(detail.data);
214
- if (parsed) {
215
- messages.push(parsed);
216
- }
217
- } catch (error) {
218
- logger.warn('Failed to fetch message details', { messageId: msg.id, error });
219
- }
220
- }
221
-
222
- return {
223
- messages,
224
- nextPageToken: response.data.nextPageToken ?? undefined,
225
- };
226
- }
227
-
228
176
  /**
229
- * Fetch messages in OLDEST-FIRST order using "Fetch IDs → Sort → Hydrate" strategy.
177
+ * Fetch messages in OLDEST-FIRST order using "Fetch IDs → Reverse → Hydrate" strategy.
230
178
  *
231
- * Gmail API always returns newest first and doesn't support sorting.
232
- * To process oldest emails first (critical for checkpoint-based sync), we:
233
- * 1. Fetch ALL message IDs matching the query (lightweight, paginated)
234
- * 2. Sort by internalDate ascending (oldest first)
179
+ * Gmail API always returns newest first. To process absolute oldest emails first:
180
+ * 1. Fetch ALL message IDs matching the query (lightweight)
181
+ * 2. Reverse the list (turning Newest-First into Oldest-First)
235
182
  * 3. Take first N messages (limit)
236
- * 4. Hydrate only those N messages with full details
237
- *
238
- * This ensures we never skip emails when using max_emails pagination.
183
+ * 4. Hydrate ONLY those N messages
239
184
  */
240
185
  async fetchMessagesOldestFirst(
241
186
  account: EmailAccount,
242
- options: { limit: number; query?: string; maxIdsToFetch?: number }
187
+ options: { limit: number; query?: string }
243
188
  ): Promise<{ messages: GmailMessage[]; hasMore: boolean }> {
244
- const { limit, query, maxIdsToFetch = 1000 } = options;
189
+ const { limit, query } = options;
245
190
 
246
- // Step 1: Fetch all message IDs (lightweight)
247
- const allIds = await this.fetchAllMessageIds(account, query, maxIdsToFetch);
191
+ // Step 1: Fetch IDs (No hydration yet, so this is fast)
192
+ const allIds = await this.fetchAllMessageIds(account, query);
248
193
 
249
194
  if (allIds.length === 0) {
250
195
  return { messages: [], hasMore: false };
251
196
  }
252
197
 
253
- logger.debug('Fetched message IDs', { count: allIds.length, query });
254
-
255
- // Step 2: Sort by internalDate ascending (oldest first)
256
- allIds.sort((a, b) => parseInt(a.internalDate) - parseInt(b.internalDate));
198
+ // Step 2: Reverse to get oldest first
199
+ allIds.reverse();
257
200
 
258
- // Step 3: Take first N IDs
201
+ // Step 3: Take the window we need
259
202
  const idsToHydrate = allIds.slice(0, limit);
260
203
  const hasMore = allIds.length > limit;
261
204
 
262
- // Step 4: Hydrate those specific messages
263
- const messages = await this.hydrateMessages(account, idsToHydrate.map(m => m.id));
205
+ logger.debug('Hydrating oldest emails', { totalFound: allIds.length, hydrating: idsToHydrate.length });
264
206
 
265
- // Re-sort hydrated messages by internalDate (maintain order)
266
- messages.sort((a, b) => parseInt(a.internalDate) - parseInt(b.internalDate));
207
+ // Step 4: Hydrate only the target messages
208
+ const messages = await this.hydrateMessages(account, idsToHydrate);
267
209
 
268
210
  return { messages, hasMore };
269
211
  }
270
212
 
271
213
  /**
272
214
  * Fetch all message IDs matching a query (lightweight, paginated).
273
- * Uses minimal fields for speed: only id and internalDate.
215
+ * Collects IDs only to remain fast even for large result sets.
274
216
  */
275
217
  private async fetchAllMessageIds(
276
218
  account: EmailAccount,
277
- query: string | undefined,
278
- maxIds: number
279
- ): Promise<{ id: string; internalDate: string }[]> {
219
+ query: string | undefined
220
+ ): Promise<string[]> {
280
221
  const gmail = await this.getAuthenticatedClient(account);
281
- const results: { id: string; internalDate: string }[] = [];
222
+ const allIds: string[] = [];
282
223
  let pageToken: string | undefined;
224
+ const MAX_IDS = 5000; // Efficient chunk size for finding the "bottom" of recent emails
283
225
 
284
226
  do {
285
227
  const response = await gmail.users.messages.list({
@@ -287,42 +229,22 @@ export class GmailService {
287
229
  q: query,
288
230
  pageToken,
289
231
  maxResults: 500, // Max allowed per page
290
- // Note: messages.list only returns id and threadId, not internalDate
291
- // We need to fetch internalDate separately with minimal format
292
232
  });
293
233
 
294
234
  const messageRefs = response.data.messages || [];
295
-
296
- // Fetch internalDate for each message (using metadata format for speed)
297
235
  for (const ref of messageRefs) {
298
- if (!ref.id || results.length >= maxIds) break;
299
-
300
- try {
301
- const msg = await gmail.users.messages.get({
302
- userId: 'me',
303
- id: ref.id,
304
- format: 'minimal', // Only returns id, threadId, labelIds, snippet, internalDate
305
- });
306
-
307
- if (msg.data.id && msg.data.internalDate) {
308
- results.push({
309
- id: msg.data.id,
310
- internalDate: msg.data.internalDate,
311
- });
312
- }
313
- } catch (error) {
314
- logger.warn('Failed to fetch message metadata', { messageId: ref.id });
315
- }
236
+ if (ref.id) allIds.push(ref.id);
316
237
  }
317
238
 
318
239
  pageToken = response.data.nextPageToken ?? undefined;
319
- } while (pageToken && results.length < maxIds);
240
+ } while (pageToken && allIds.length < MAX_IDS);
320
241
 
321
- return results;
242
+ logger.info('Collected matching message IDs', { total: allIds.length, query });
243
+ return allIds;
322
244
  }
323
245
 
324
246
  /**
325
- * Hydrate specific messages by ID (fetch full details).
247
+ * Hydrate specific messages by ID (fetch raw RFC822 data).
326
248
  */
327
249
  private async hydrateMessages(
328
250
  account: EmailAccount,
@@ -331,73 +253,38 @@ export class GmailService {
331
253
  const gmail = await this.getAuthenticatedClient(account);
332
254
  const messages: GmailMessage[] = [];
333
255
 
334
- for (const id of messageIds) {
335
- try {
336
- const detail = await gmail.users.messages.get({
337
- userId: 'me',
338
- id,
339
- format: 'full',
340
- });
341
-
342
- const parsed = this.parseMessage(detail.data);
343
- if (parsed) {
344
- messages.push(parsed);
256
+ // Hydrate in small parallel batches to avoid rate limits
257
+ const BATCH_SIZE = 10;
258
+ for (let i = 0; i < messageIds.length; i += BATCH_SIZE) {
259
+ const batch = messageIds.slice(i, i + BATCH_SIZE);
260
+ const hydrated = await Promise.all(batch.map(async (id) => {
261
+ try {
262
+ const detail = await gmail.users.messages.get({
263
+ userId: 'me',
264
+ id,
265
+ format: 'raw',
266
+ });
267
+
268
+ if (detail.data.raw) {
269
+ return {
270
+ id: detail.data.id!,
271
+ threadId: detail.data.threadId!,
272
+ internalDate: detail.data.internalDate!,
273
+ raw: detail.data.raw
274
+ };
275
+ }
276
+ } catch (error) {
277
+ logger.warn('Failed to hydrate message', { messageId: id, error });
345
278
  }
346
- } catch (error) {
347
- logger.warn('Failed to hydrate message', { messageId: id, error });
348
- }
279
+ return null;
280
+ }));
281
+
282
+ messages.push(...hydrated.filter((m): m is GmailMessage => m !== null));
349
283
  }
350
284
 
351
285
  return messages;
352
286
  }
353
287
 
354
- private parseMessage(message: gmail_v1.Schema$Message): GmailMessage | null {
355
- if (!message.id || !message.threadId) return null;
356
-
357
- const headers = message.payload?.headers || [];
358
- const getHeader = (name: string) => headers.find(h => h.name?.toLowerCase() === name.toLowerCase())?.value || '';
359
-
360
- let body = '';
361
- const payload = message.payload;
362
-
363
- if (payload?.parts) {
364
- // Multipart message
365
- const textPart = payload.parts.find(p => p.mimeType === 'text/plain');
366
- const htmlPart = payload.parts.find(p => p.mimeType === 'text/html');
367
- const part = textPart || htmlPart || payload.parts[0];
368
- body = this.decodeBody(part?.body?.data);
369
- } else if (payload?.body?.data) {
370
- body = this.decodeBody(payload.body.data);
371
- }
372
-
373
- return {
374
- id: message.id,
375
- threadId: message.threadId,
376
- subject: getHeader('Subject') || 'No Subject',
377
- sender: getHeader('From'),
378
- recipient: getHeader('To'),
379
- date: getHeader('Date'),
380
- internalDate: message.internalDate || '', // Gmail's internal timestamp (ms since epoch)
381
- body,
382
- snippet: message.snippet || '',
383
- headers: {
384
- importance: getHeader('Importance') || getHeader('X-Priority'),
385
- listUnsubscribe: getHeader('List-Unsubscribe'),
386
- autoSubmitted: getHeader('Auto-Submitted'),
387
- mailer: getHeader('X-Mailer'),
388
- }
389
- };
390
- }
391
-
392
- private decodeBody(data?: string | null): string {
393
- if (!data) return '';
394
- try {
395
- return Buffer.from(data, 'base64').toString('utf-8');
396
- } catch {
397
- return '';
398
- }
399
- }
400
-
401
288
  async trashMessage(account: EmailAccount, messageId: string): Promise<void> {
402
289
  const gmail = await this.getAuthenticatedClient(account);
403
290
  await gmail.users.messages.trash({ userId: 'me', id: messageId });
@@ -151,7 +151,8 @@ REQUIRED JSON STRUCTURE:
151
151
  await eventLogger.info('Thinking', `Analyzing email: ${context.subject}`, {
152
152
  model: this.model,
153
153
  system_prompt: systemPrompt,
154
- content_preview: cleanedContent
154
+ content_preview: cleanedContent,
155
+ content_length: cleanedContent.length
155
156
  }, emailId);
156
157
  } catch (err) {
157
158
  console.error('[Intelligence] Failed to log thinking event:', err);
@@ -167,7 +168,7 @@ REQUIRED JSON STRUCTURE:
167
168
  { role: 'system', content: systemPrompt },
168
169
  { role: 'user', content: cleanedContent || '[Empty email body]' },
169
170
  ],
170
- response_format: { type: 'json_object' },
171
+ // response_format: { type: 'json_object' }, // Removed for compatibility
171
172
  temperature: 0.1,
172
173
  });
173
174
 
@@ -16,17 +16,7 @@ const GRAPH_SCOPES = [
16
16
  export interface OutlookMessage {
17
17
  id: string;
18
18
  conversationId: string;
19
- subject: string;
20
- sender: string;
21
- recipient: string;
22
- date: string;
23
- body: string;
24
- snippet: string;
25
- headers: {
26
- importance?: string;
27
- listUnsubscribe?: string;
28
- autoSubmitted?: string;
29
- };
19
+ raw: string; // MIME content
30
20
  }
31
21
 
32
22
  export interface DeviceCodeResponse {
@@ -160,9 +150,7 @@ export class MicrosoftService {
160
150
  const accessToken = account.access_token || '';
161
151
  const { top = 20, skip = 0, filter } = options;
162
152
 
163
- // IMPORTANT: Use ascending order to fetch OLDEST emails first
164
- // This ensures checkpoint-based pagination works correctly and doesn't skip emails
165
- let url = `https://graph.microsoft.com/v1.0/me/messages?$top=${top}&$skip=${skip}&$orderby=receivedDateTime asc&$select=id,conversationId,subject,from,toRecipients,receivedDateTime,body,bodyPreview,importance`;
153
+ let url = `https://graph.microsoft.com/v1.0/me/messages?$top=${top}&$skip=${skip}&$orderby=receivedDateTime asc&$select=id,conversationId`;
166
154
  if (filter) {
167
155
  url += `&$filter=${encodeURIComponent(filter)}`;
168
156
  }
@@ -184,19 +172,33 @@ export class MicrosoftService {
184
172
  }
185
173
 
186
174
  const data = await response.json();
187
- const messages: OutlookMessage[] = (data.value || []).map((msg: any) => ({
188
- id: msg.id,
189
- conversationId: msg.conversationId,
190
- subject: msg.subject || 'No Subject',
191
- sender: msg.from?.emailAddress?.address || 'Unknown',
192
- recipient: msg.toRecipients?.[0]?.emailAddress?.address || '',
193
- date: msg.receivedDateTime,
194
- body: msg.body?.content || '',
195
- snippet: msg.bodyPreview || '',
196
- headers: {
197
- importance: msg.importance,
175
+ const messageRefs = data.value || [];
176
+ const messages: OutlookMessage[] = [];
177
+
178
+ // For each message, fetch the raw MIME content
179
+ for (const ref of messageRefs) {
180
+ try {
181
+ const rawResponse = await fetch(
182
+ `https://graph.microsoft.com/v1.0/me/messages/${ref.id}/$value`,
183
+ {
184
+ headers: {
185
+ Authorization: `Bearer ${accessToken}`,
186
+ },
187
+ }
188
+ );
189
+
190
+ if (rawResponse.ok) {
191
+ const rawMime = await rawResponse.text();
192
+ messages.push({
193
+ id: ref.id,
194
+ conversationId: ref.conversationId,
195
+ raw: rawMime
196
+ });
197
+ }
198
+ } catch (error) {
199
+ logger.warn('Failed to fetch raw content for Outlook message', { messageId: ref.id, error });
198
200
  }
199
- }));
201
+ }
200
202
 
201
203
  return {
202
204
  messages,
@@ -287,16 +289,19 @@ export class MicrosoftService {
287
289
  ): Promise<string> {
288
290
  const accessToken = account.access_token || '';
289
291
 
290
- // Get original message
292
+ // Get original message (minimal metadata)
291
293
  const originalResponse = await fetch(
292
- `https://graph.microsoft.com/v1.0/me/messages/${originalMessageId}`,
294
+ `https://graph.microsoft.com/v1.0/me/messages/${originalMessageId}?$select=id,conversationId`,
293
295
  {
294
296
  headers: {
295
297
  Authorization: `Bearer ${accessToken}`,
296
298
  },
297
299
  }
298
300
  );
299
- const original = await originalResponse.json();
301
+
302
+ if (!originalResponse.ok) {
303
+ throw new Error('Failed to fetch original message metadata');
304
+ }
300
305
 
301
306
  // Create reply draft
302
307
  const response = await fetch(