heathrow 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +58 -0
  3. data/README.md +205 -0
  4. data/bin/heathrow +42 -0
  5. data/bin/heathrowd +283 -0
  6. data/docs/ARCHITECTURE.md +1172 -0
  7. data/docs/DATABASE_SCHEMA.md +685 -0
  8. data/docs/DEVELOPMENT_WORKFLOW.md +867 -0
  9. data/docs/DISCORD_SETUP.md +142 -0
  10. data/docs/GMAIL_OAUTH_SETUP.md +120 -0
  11. data/docs/PLUGIN_SYSTEM.md +1370 -0
  12. data/docs/PROJECT_PLAN.md +1022 -0
  13. data/docs/README.md +417 -0
  14. data/docs/REDDIT_SETUP.md +174 -0
  15. data/docs/REPLY_FORWARD.md +182 -0
  16. data/docs/WHATSAPP_TELEGRAM_SETUP.md +306 -0
  17. data/heathrow.gemspec +34 -0
  18. data/heathrowd.service +21 -0
  19. data/img/heathrow.svg +95 -0
  20. data/img/rss_threaded.png +0 -0
  21. data/img/sources.png +0 -0
  22. data/lib/heathrow/address_book.rb +42 -0
  23. data/lib/heathrow/config.rb +332 -0
  24. data/lib/heathrow/database.rb +731 -0
  25. data/lib/heathrow/database_new.rb +392 -0
  26. data/lib/heathrow/event_bus.rb +175 -0
  27. data/lib/heathrow/logger.rb +122 -0
  28. data/lib/heathrow/message.rb +176 -0
  29. data/lib/heathrow/message_composer.rb +399 -0
  30. data/lib/heathrow/message_organizer.rb +774 -0
  31. data/lib/heathrow/migrations/001_initial_schema.rb +248 -0
  32. data/lib/heathrow/notmuch.rb +45 -0
  33. data/lib/heathrow/oauth2_smtp.rb +254 -0
  34. data/lib/heathrow/plugin/base.rb +212 -0
  35. data/lib/heathrow/plugin_manager.rb +141 -0
  36. data/lib/heathrow/poller.rb +93 -0
  37. data/lib/heathrow/smtp_sender.rb +204 -0
  38. data/lib/heathrow/source.rb +39 -0
  39. data/lib/heathrow/sources/base.rb +74 -0
  40. data/lib/heathrow/sources/discord.rb +357 -0
  41. data/lib/heathrow/sources/gmail.rb +294 -0
  42. data/lib/heathrow/sources/imap.rb +198 -0
  43. data/lib/heathrow/sources/instagram.rb +307 -0
  44. data/lib/heathrow/sources/instagram_fetch.py +101 -0
  45. data/lib/heathrow/sources/instagram_send.py +55 -0
  46. data/lib/heathrow/sources/instagram_send_marionette.py +104 -0
  47. data/lib/heathrow/sources/maildir.rb +606 -0
  48. data/lib/heathrow/sources/messenger.rb +212 -0
  49. data/lib/heathrow/sources/messenger_fetch.js +297 -0
  50. data/lib/heathrow/sources/messenger_fetch_marionette.py +138 -0
  51. data/lib/heathrow/sources/messenger_send.js +32 -0
  52. data/lib/heathrow/sources/messenger_send.py +100 -0
  53. data/lib/heathrow/sources/reddit.rb +461 -0
  54. data/lib/heathrow/sources/rss.rb +299 -0
  55. data/lib/heathrow/sources/slack.rb +375 -0
  56. data/lib/heathrow/sources/source_manager.rb +328 -0
  57. data/lib/heathrow/sources/telegram.rb +498 -0
  58. data/lib/heathrow/sources/webpage.rb +207 -0
  59. data/lib/heathrow/sources/weechat.rb +479 -0
  60. data/lib/heathrow/sources/whatsapp.rb +474 -0
  61. data/lib/heathrow/ui/application.rb +8098 -0
  62. data/lib/heathrow/ui/navigation.rb +8 -0
  63. data/lib/heathrow/ui/panes.rb +8 -0
  64. data/lib/heathrow/ui/source_wizard.rb +567 -0
  65. data/lib/heathrow/ui/threaded_view.rb +780 -0
  66. data/lib/heathrow/ui/views.rb +8 -0
  67. data/lib/heathrow/version.rb +3 -0
  68. data/lib/heathrow/wizards/discord_wizard.rb +193 -0
  69. data/lib/heathrow/wizards/slack_wizard.rb +140 -0
  70. data/lib/heathrow.rb +55 -0
  71. metadata +147 -0
@@ -0,0 +1,774 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ module Heathrow
5
+ # Organizes messages into threads, groups, and channels
6
+ class MessageOrganizer
7
+ attr_reader :messages, :threads, :groups, :channels
8
+
9
+ def initialize(messages = [], db = nil, group_by_folder: false)
10
+ @messages = messages
11
+ @db = db
12
+ @group_by_folder = group_by_folder
13
+ @threads = {}
14
+ @groups = {}
15
+ @channels = {}
16
+ @dms = []
17
+ # Pre-populate source type cache in one query instead of per-message lookups
18
+ @source_types_cache = db ? db.get_source_type_map : {}
19
+ organize_messages
20
+ end
21
+
22
+ # Get plugin type for a message (from source_type or by looking up source)
23
+ def get_plugin_type(msg)
24
+ return msg['source_type'] if msg['source_type']
25
+ @source_types_cache[msg['source_id']] || 'unknown'
26
+ end
27
+
28
+ # Detect if message is an email based on metadata
29
+ def is_email_message?(msg)
30
+ metadata = parse_metadata(msg['metadata'])
31
+ metadata && metadata['message_id'] # Emails have Message-ID header
32
+ end
33
+
34
+ # Organize messages into logical structures
35
+ def organize_messages
36
+ original_count = @messages.size
37
+ filtered_count = 0
38
+
39
+ # Sort messages by timestamp ascending so root messages are processed before replies
40
+ sorted_messages = @messages.sort_by { |m| m['timestamp'] || 0 }
41
+
42
+ if ENV['DEBUG']
43
+ File.write('/tmp/heathrow_debug.log', "ORGANIZER: Sorted #{sorted_messages.size} messages by timestamp\n", mode: 'a')
44
+ File.write('/tmp/heathrow_debug.log', "ORGANIZER: First 10 IDs in order: #{sorted_messages.first(10).map { |m| m['id'] }.join(', ')}\n", mode: 'a')
45
+ end
46
+
47
+ sorted_messages.each do |msg|
48
+ # Skip synthetic header messages from previous organization
49
+ if msg['is_header'] || msg['is_channel_header'] || msg['is_thread_header'] || msg['is_dm_header']
50
+ File.write('/tmp/heathrow_debug.log', "ORGANIZER: Skipping header message: #{msg['id']}\n", mode: 'a') if ENV['DEBUG']
51
+ next
52
+ end
53
+ filtered_count += 1
54
+
55
+ # If grouping by folder, use folder organization for all messages
56
+ if @group_by_folder
57
+ organize_by_folder(msg)
58
+ next
59
+ end
60
+
61
+ # Get plugin type for this message
62
+ plugin_type = get_plugin_type(msg)
63
+
64
+ # Also check if it looks like an email based on metadata
65
+ if plugin_type == 'unknown' && is_email_message?(msg)
66
+ plugin_type = 'email'
67
+ end
68
+
69
+ # Stamp source_type on message so formatting code can use it
70
+ msg['source_type'] = plugin_type
71
+
72
+ case plugin_type
73
+ when 'discord'
74
+ organize_discord_message(msg)
75
+ when 'slack'
76
+ organize_slack_message(msg)
77
+ when 'reddit'
78
+ organize_reddit_message(msg)
79
+ when 'telegram'
80
+ organize_telegram_message(msg)
81
+ when 'gmail', 'imap', 'email', 'maildir'
82
+ organize_email_thread(msg)
83
+ when 'rss'
84
+ organize_rss_message(msg)
85
+ when 'web'
86
+ organize_webwatch_message(msg)
87
+ when 'messenger'
88
+ organize_messenger_message(msg)
89
+ when 'instagram'
90
+ organize_instagram_message(msg)
91
+ when 'weechat'
92
+ organize_weechat_message(msg)
93
+ when 'workspace'
94
+ organize_workspace_message(msg)
95
+ else
96
+ # Other/unknown sources - treat as simple messages in a channel
97
+ organize_other_message(msg)
98
+ end
99
+ end
100
+
101
+ # Build thread relationships
102
+ build_thread_hierarchy
103
+
104
+ File.write('/tmp/heathrow_debug.log', "ORGANIZER: Processed #{filtered_count}/#{original_count} messages (#{original_count - filtered_count} headers skipped)\n", mode: 'a') if ENV['DEBUG']
105
+ end
106
+
107
+ # Get organized view of messages
108
+ def get_organized_view(sort_order = nil, sort_inverted = false)
109
+ organized = []
110
+
111
+ # Add channels/groups with their messages
112
+ @channels.each do |channel_id, channel_data|
113
+ organized << {
114
+ type: 'channel',
115
+ name: channel_data[:name],
116
+ source: channel_data[:source],
117
+ messages: channel_data[:messages],
118
+ collapsed: channel_data[:collapsed] || false,
119
+ unread_count: count_unread(channel_data[:messages]),
120
+ display_name: channel_data[:display_name] # Pass display_name for Discord channels
121
+ }
122
+ end
123
+
124
+ # Add DMs separately first (they go at the top or bottom)
125
+ dm_section = nil
126
+ unless @dms.empty?
127
+ dm_section = {
128
+ type: 'dm_section',
129
+ name: 'Direct Messages',
130
+ messages: @dms,
131
+ collapsed: false,
132
+ unread_count: count_unread(@dms)
133
+ }
134
+ end
135
+
136
+ # Add threaded messages (emails, forums) to the main list
137
+ @threads.each do |thread_id, thread_data|
138
+ next if thread_data[:in_channel] # Skip if already in a channel
139
+
140
+ organized << {
141
+ type: 'thread',
142
+ subject: thread_data[:subject],
143
+ messages: thread_data[:messages],
144
+ collapsed: thread_data[:collapsed] || false,
145
+ unread_count: count_unread(thread_data[:messages])
146
+ }
147
+ end
148
+
149
+ # Sort based on sort_order
150
+ case sort_order
151
+ when 'alphabetical'
152
+ organized.sort! do |a, b|
153
+ clean_a = section_display_name(a).gsub(/^[#\[\]@\s]+/, '')
154
+ clean_b = section_display_name(b).gsub(/^[#\[\]@\s]+/, '')
155
+ clean_a == clean_b ? section_display_name(a) <=> section_display_name(b) : clean_a <=> clean_b
156
+ end
157
+ when 'unread'
158
+ organized.sort! do |a, b|
159
+ cmp = b[:unread_count].to_i <=> a[:unread_count].to_i
160
+ cmp != 0 ? cmp : section_display_name(a) <=> section_display_name(b)
161
+ end
162
+ when 'latest'
163
+ organized.sort! do |a, b|
164
+ newest_a = (a[:messages] || []).map { |m| m['timestamp'].to_i }.max || 0
165
+ newest_b = (b[:messages] || []).map { |m| m['timestamp'].to_i }.max || 0
166
+ newest_b <=> newest_a
167
+ end
168
+ when 'source'
169
+ organized.sort! do |a, b|
170
+ sa = a[:source] || (a[:type] == 'thread' ? 'email' : 'unknown')
171
+ sb = b[:source] || (b[:type] == 'thread' ? 'email' : 'unknown')
172
+ cmp = sa.to_s <=> sb.to_s
173
+ cmp != 0 ? cmp : section_display_name(a) <=> section_display_name(b)
174
+ end
175
+ end
176
+
177
+ # Add DM section(s) at the end (or beginning if inverted)
178
+ if dm_section
179
+ if sort_order == 'conversation'
180
+ # Split DMs into per-conversation sections
181
+ convos = {}
182
+ dm_section[:messages].each do |msg|
183
+ metadata = parse_metadata(msg['metadata'])
184
+ key = metadata['thread_id'] || msg['sender'] || 'Unknown'
185
+ convos[key] ||= { name: msg['sender'] || msg['subject'] || 'Unknown', messages: [] }
186
+ convos[key][:messages] << msg
187
+ end
188
+ conv_sections = convos.map do |_key, data|
189
+ {
190
+ type: 'dm_section',
191
+ name: data[:name],
192
+ messages: data[:messages],
193
+ collapsed: false,
194
+ unread_count: count_unread(data[:messages])
195
+ }
196
+ end
197
+ # Sort conversation sections alphabetically by name
198
+ conv_sections.sort_by! { |s| s[:name].to_s.downcase }
199
+ if sort_inverted
200
+ organized.unshift(*conv_sections.reverse)
201
+ else
202
+ organized.push(*conv_sections)
203
+ end
204
+ else
205
+ if sort_inverted
206
+ organized.unshift(dm_section) # Add at beginning
207
+ else
208
+ organized.push(dm_section) # Add at end
209
+ end
210
+ end
211
+ end
212
+
213
+ # Sort messages within each section to match sort order
214
+ if sort_order == 'latest' || sort_order == 'conversation'
215
+ organized.each do |section|
216
+ next unless section[:messages]
217
+ section[:messages].sort_by! { |m| -(m['timestamp'].to_i) }
218
+ end
219
+ end
220
+
221
+ # Apply invert if requested - reverse everything
222
+ if sort_inverted
223
+ organized.reverse!
224
+ organized.each do |section|
225
+ next unless section[:messages]
226
+ section[:messages].reverse!
227
+ end
228
+ end
229
+
230
+ organized
231
+ end
232
+
233
+ private
234
+
235
+ # Get display name for a section (used in sorting)
236
+ def section_display_name(section)
237
+ (section[:name] || section[:subject] || '').to_s.downcase
238
+ end
239
+
240
+ # Organize Discord messages by channel and DMs
241
+ def organize_discord_message(msg)
242
+ # Parse metadata to check if it's actually a DM
243
+ metadata = parse_metadata(msg['metadata'])
244
+ is_dm = metadata['is_dm'] == true
245
+
246
+ # Check if it's a DM based on recipient format or metadata
247
+ if is_dm || msg['recipient'] == 'DM'
248
+ # Direct message
249
+ msg['is_dm'] = true
250
+ @dms << msg
251
+ elsif msg['recipient'] =~ /^(.+)#(.+)$/
252
+ # Server channel message (format: ServerName#channelName)
253
+ guild_name = $1
254
+ channel_name = $2
255
+ channel_id = metadata['channel_id'] || msg['recipient']
256
+
257
+ # Create a unique key for this server/channel combination
258
+ channel_key = "discord_#{channel_id}"
259
+
260
+ @channels[channel_key] ||= {
261
+ name: "#{guild_name} > #{channel_name}",
262
+ source: 'discord',
263
+ messages: [],
264
+ guild: guild_name,
265
+ channel: channel_name,
266
+ display_name: "#{guild_name}##{channel_name}"
267
+ }
268
+
269
+ @channels[channel_key][:messages] << msg
270
+ else
271
+ # For Discord messages with just channel ID as recipient (old format)
272
+ channel_id = msg['recipient']
273
+
274
+ # Group by channel ID
275
+ channel_key = "discord_#{channel_id}"
276
+
277
+ # Map channel IDs to display names (from heathrowrc config)
278
+ names = Config.instance&.channel_name_map || {}
279
+
280
+ # Try to get a readable name
281
+ channel_display = names[channel_id]
282
+ if channel_display.nil? && channel_id =~ /^\d+$/
283
+ # Unknown channel, show last 4 digits
284
+ channel_display = "Discord-#{channel_id[-4..-1]}"
285
+ elsif channel_display.nil?
286
+ channel_display = channel_id
287
+ end
288
+
289
+ @channels[channel_key] ||= {
290
+ name: channel_display,
291
+ source: 'discord',
292
+ messages: [],
293
+ display_name: channel_display
294
+ }
295
+
296
+ @channels[channel_key][:messages] << msg
297
+ end
298
+ end
299
+
300
+ # Organize Slack messages by channel and threads
301
+ def organize_slack_message(msg)
302
+ # Extract channel info from message
303
+ channel_id = msg['channel_id'] || extract_channel_from_recipient(msg['recipient'])
304
+
305
+ if channel_id =~ /^D/ # Direct message channel
306
+ msg['is_dm'] = true
307
+ @dms << msg
308
+ elsif channel_id =~ /^C/ || channel_id =~ /^G/ # Channel or private group
309
+ channel_name = msg['channel_name'] || msg['recipient']
310
+
311
+ @channels[channel_id] ||= {
312
+ name: channel_name,
313
+ source: 'slack',
314
+ messages: [],
315
+ type: channel_id.start_with?('C') ? 'public' : 'private'
316
+ }
317
+
318
+ @channels[channel_id][:messages] << msg
319
+ end
320
+
321
+ # Handle threading - only for actual threaded messages
322
+ if msg['thread_ts']
323
+ add_to_thread(msg, thread_id: msg['thread_ts'])
324
+ end
325
+ # Don't create threads for regular channel messages
326
+ end
327
+
328
+ # Organize Reddit messages by subreddit and thread
329
+ def organize_reddit_message(msg)
330
+ # Check if it's a private message
331
+ if msg['external_id'] =~ /reddit_msg_/
332
+ msg['is_dm'] = true
333
+ @dms << msg
334
+ else
335
+ # Subreddit post or comment
336
+ subreddit = extract_subreddit(msg['recipient'])
337
+
338
+ if subreddit
339
+ @groups[subreddit] ||= {
340
+ name: subreddit,
341
+ source: 'reddit',
342
+ messages: [],
343
+ type: 'subreddit'
344
+ }
345
+
346
+ @groups[subreddit][:messages] << msg
347
+ end
348
+
349
+ # Handle comment threads
350
+ if msg['external_id'] =~ /reddit_comment_/
351
+ # This is a comment, find parent post
352
+ parent_id = extract_reddit_parent(msg)
353
+ add_to_thread(msg, thread_id: parent_id)
354
+ else
355
+ # This is a post, start a thread
356
+ add_to_thread(msg)
357
+ end
358
+ end
359
+ end
360
+
361
+ # Organize Telegram messages
362
+ def organize_telegram_message(msg)
363
+ # Check if it's a DM or group
364
+ if msg['recipient'] =~ /^@/ || msg['chat_type'] == 'private'
365
+ msg['is_dm'] = true
366
+ @dms << msg
367
+ else
368
+ # Group/channel message
369
+ group_name = msg['recipient']
370
+ group_id = msg['chat_id'] || group_name
371
+
372
+ @groups[group_id] ||= {
373
+ name: group_name,
374
+ source: 'telegram',
375
+ messages: [],
376
+ type: msg['chat_type'] || 'group'
377
+ }
378
+
379
+ @groups[group_id][:messages] << msg
380
+ end
381
+
382
+ # Handle replies
383
+ if msg['reply_to_message_id']
384
+ add_to_thread(msg, thread_id: msg['reply_to_message_id'])
385
+ else
386
+ add_to_thread(msg)
387
+ end
388
+ end
389
+
390
+ # Organize messages by folder (from labels)
391
+ def organize_by_folder(msg)
392
+ # Get folder from labels (first label is folder name)
393
+ labels = parse_labels(msg['labels'])
394
+ folder_name = labels.first || 'Uncategorized'
395
+
396
+ folder_key = "folder_#{folder_name.downcase.gsub(/[^a-z0-9]/, '_')}"
397
+
398
+ @channels[folder_key] ||= {
399
+ name: folder_name,
400
+ source: 'folder',
401
+ messages: [],
402
+ display_name: folder_name
403
+ }
404
+
405
+ @channels[folder_key][:messages] << msg
406
+ end
407
+
408
+ # Parse labels (handle both JSON string and array)
409
+ def parse_labels(labels)
410
+ return [] unless labels
411
+ return labels if labels.is_a?(Array)
412
+ JSON.parse(labels) rescue []
413
+ end
414
+
415
+ # Organize other/unknown source types
416
+ def organize_other_message(msg)
417
+ # Group by source type
418
+ source_type = msg['source_type'] || 'other'
419
+ channel_key = "other_#{source_type}"
420
+
421
+ @channels[channel_key] ||= {
422
+ name: source_type.capitalize,
423
+ source: source_type,
424
+ messages: [],
425
+ display_name: source_type.capitalize
426
+ }
427
+
428
+ @channels[channel_key][:messages] << msg
429
+ end
430
+
431
+ # Organize RSS messages as flat feed
432
+ def organize_rss_message(msg)
433
+ # Group by feed title from metadata (not sender, which may be per-article author)
434
+ metadata = parse_metadata(msg['metadata'])
435
+ feed_name = metadata['feed_title'] || msg['sender'] || 'RSS Feed'
436
+
437
+ feed_key = "rss_#{feed_name.downcase.gsub(/[^a-z0-9]/, '_')}"
438
+
439
+ @channels[feed_key] ||= {
440
+ name: feed_name,
441
+ source: 'rss',
442
+ messages: [],
443
+ display_name: feed_name
444
+ }
445
+
446
+ # Also ensure each message has a proper sender
447
+ msg['sender'] = feed_name if msg['sender'].nil? || msg['sender'].empty?
448
+
449
+ @channels[feed_key][:messages] << msg
450
+ end
451
+
452
+ # Organize web watch messages by page
453
+ def organize_webwatch_message(msg)
454
+ metadata = parse_metadata(msg['metadata'])
455
+ page_name = metadata['page_title'] || msg['sender'] || 'Web Watch'
456
+ page_key = "web_#{page_name.downcase.gsub(/[^a-z0-9]/, '_')}"
457
+
458
+ @channels[page_key] ||= {
459
+ name: page_name,
460
+ source: 'web',
461
+ messages: [],
462
+ display_name: page_name
463
+ }
464
+
465
+ @channels[page_key][:messages] << msg
466
+ end
467
+
468
+ # Organize Messenger messages as DMs/conversations
469
+ def organize_messenger_message(msg)
470
+ metadata = parse_metadata(msg['metadata'])
471
+ thread_id = metadata['thread_id'] || msg['sender']
472
+ name = msg['subject'] || msg['sender'] || 'Messenger'
473
+
474
+ # Treat as DMs
475
+ msg['is_dm'] = true
476
+ @dms << msg
477
+ end
478
+
479
+ # Organize WeeChat relay messages by buffer (IRC channels, Slack channels, DMs)
480
+ def organize_weechat_message(msg)
481
+ metadata = parse_metadata(msg['metadata'])
482
+ buffer_type = metadata['buffer_type'] || ''
483
+ buffer = metadata['buffer'] || 'WeeChat'
484
+ is_dm = metadata['is_dm'] == true
485
+ platform = (metadata['platform'] || 'irc').upcase
486
+ channel_name = metadata['channel_name'] || metadata['buffer_short'] || buffer.split('.').last
487
+
488
+ if is_dm
489
+ msg['is_dm'] = true
490
+ @dms << msg
491
+ else
492
+ channel_key = "weechat_#{buffer.downcase.gsub(/[^a-z0-9.]/, '_')}"
493
+
494
+ @channels[channel_key] ||= {
495
+ name: channel_name,
496
+ source: 'weechat',
497
+ messages: [],
498
+ display_name: "#{platform}: #{channel_name}"
499
+ }
500
+
501
+ @channels[channel_key][:messages] << msg
502
+ end
503
+ end
504
+
505
+ # Organize Workspace messages by channel or as DMs
506
+ def organize_workspace_message(msg)
507
+ metadata = parse_metadata(msg['metadata'])
508
+ conv_type = metadata['conv_type'] || 'channel'
509
+ conv_name = metadata['channel_name'] || msg['subject'] || 'Workspace'
510
+
511
+ if conv_type == 'private' || metadata['is_dm']
512
+ msg['is_dm'] = true
513
+ @dms << msg
514
+ else
515
+ channel_key = "workspace_#{conv_name.downcase.gsub(/[^a-z0-9]/, '_')}"
516
+ @channels[channel_key] ||= {
517
+ name: conv_name,
518
+ source: 'workspace',
519
+ messages: [],
520
+ display_name: "WS: #{conv_name}"
521
+ }
522
+ @channels[channel_key][:messages] << msg
523
+ end
524
+ end
525
+
526
+ # Organize Instagram messages as DMs/conversations
527
+ def organize_instagram_message(msg)
528
+ metadata = parse_metadata(msg['metadata'])
529
+ thread_id = metadata['thread_id'] || msg['sender']
530
+ name = msg['subject'] || msg['sender'] || 'Instagram'
531
+
532
+ msg['is_dm'] = true
533
+ @dms << msg
534
+ end
535
+
536
+ # Organize email messages into threads
537
+ def organize_email_thread(msg)
538
+ # Use Message-ID and In-Reply-To headers for threading
539
+ metadata = parse_metadata(msg['metadata']) || {}
540
+ message_id = metadata['message_id']
541
+ in_reply_to = metadata['in_reply_to']
542
+ references = metadata['references']
543
+
544
+ File.write('/tmp/heathrow_debug.log', "ORGANIZER: Email msg #{msg['id']} - subject: '#{msg['subject']}', in_reply_to: #{in_reply_to.inspect}\n", mode: 'a') if ENV['DEBUG']
545
+
546
+ # Find or create thread
547
+ thread_id = find_email_thread(message_id, in_reply_to, references)
548
+
549
+ File.write('/tmp/heathrow_debug.log', "ORGANIZER: -> thread_id: #{thread_id.inspect} (#{thread_id ? 'found' : 'will create new'})\n", mode: 'a') if ENV['DEBUG']
550
+
551
+ add_to_thread(msg, thread_id: thread_id, subject: msg['subject'])
552
+ end
553
+
554
+ # Add message to a thread
555
+ def add_to_thread(msg, thread_id: nil, subject: nil, channel_id: nil)
556
+ thread_id ||= generate_thread_id(msg)
557
+
558
+ @threads[thread_id] ||= {
559
+ messages: [],
560
+ subject: subject || msg['subject'],
561
+ first_message_id: msg['id'],
562
+ in_channel: !channel_id.nil?
563
+ }
564
+
565
+ @threads[thread_id][:messages] << msg
566
+ msg['thread_id'] = thread_id
567
+ end
568
+
569
+ # Build parent-child relationships in threads
570
+ def build_thread_hierarchy
571
+ @threads.each do |thread_id, thread_data|
572
+ messages = thread_data[:messages]
573
+
574
+ # Skip threading for channel messages (they're already in channels)
575
+ next if thread_data[:in_channel]
576
+
577
+ # Sort messages by timestamp
578
+ messages.sort_by! { |m| m['timestamp'] || '' }
579
+
580
+ # Build hierarchy based on reply relationships
581
+ messages.each do |msg|
582
+ # Find parent based on various criteria
583
+ parent = find_parent_message(msg, messages)
584
+ if parent
585
+ msg['parent_id'] = parent['id']
586
+ msg['thread_level'] = (parent['thread_level'] || 0) + 1
587
+ else
588
+ msg['thread_level'] = 0
589
+ end
590
+ end
591
+ end
592
+ end
593
+
594
+ # Find parent message for threading
595
+ def find_parent_message(msg, messages)
596
+ return nil if messages.size <= 1
597
+
598
+ # Get plugin type for this message
599
+ plugin_type = get_plugin_type(msg)
600
+
601
+ # Skip threading for Discord/Slack channel messages
602
+ return nil if plugin_type =~ /discord|slack|telegram/ && !msg['is_dm']
603
+
604
+ # For email sources, use In-Reply-To from metadata
605
+ if plugin_type =~ /mail|imap|gmail|email/
606
+ metadata = parse_metadata(msg['metadata'])
607
+ if metadata && metadata['in_reply_to']
608
+ # Find message with matching Message-ID
609
+ parent = messages.find { |m|
610
+ m_meta = parse_metadata(m['metadata'])
611
+ m_meta && m_meta['message_id'] == metadata['in_reply_to']
612
+ }
613
+ return parent if parent
614
+
615
+ # Also try References header (list of parent messages)
616
+ if metadata['references'] && metadata['references'].is_a?(Array)
617
+ # Try each reference from most recent to oldest
618
+ metadata['references'].reverse.each do |ref_id|
619
+ parent = messages.find { |m|
620
+ m_meta = parse_metadata(m['metadata'])
621
+ m_meta && m_meta['message_id'] == ref_id
622
+ }
623
+ return parent if parent
624
+ end
625
+ end
626
+ end
627
+ end
628
+
629
+ # For chat platforms, use reply_to fields only for DMs
630
+ if msg['reply_to_message_id'] && msg['is_dm']
631
+ return messages.find { |m| m['external_id'] == msg['reply_to_message_id'] }
632
+ end
633
+
634
+ nil
635
+ end
636
+
637
+ # Extract channel ID from Discord message
638
+ def extract_channel_id(msg)
639
+ # Try to extract from external_id or raw_data
640
+ if msg['external_id'] =~ /discord_(\d+)_/
641
+ return $1
642
+ end
643
+
644
+ # Parse raw_data if available
645
+ if msg['raw_data']
646
+ data = JSON.parse(msg['raw_data']) rescue {}
647
+ return data['channel_id'] if data['channel_id']
648
+ end
649
+
650
+ # Generate from recipient
651
+ "discord_#{msg['recipient'].gsub(/[^a-z0-9]/i, '_')}"
652
+ end
653
+
654
+ # Extract subreddit from recipient
655
+ def extract_subreddit(recipient)
656
+ if recipient =~ /^r\/(\w+)/
657
+ return "r/#{$1}"
658
+ end
659
+ nil
660
+ end
661
+
662
+ # Extract parent ID for Reddit comments
663
+ def extract_reddit_parent(msg)
664
+ # Parse from subject line (Re: post_title)
665
+ if msg['subject'] =~ /^Re: (.+)/
666
+ parent_subject = $1[0..50]
667
+ # Find post with matching subject
668
+ parent = @messages.find { |m|
669
+ m['external_id'] =~ /reddit_post_/ &&
670
+ m['subject'] && m['subject'].start_with?(parent_subject)
671
+ }
672
+ return parent['external_id'] if parent
673
+ end
674
+
675
+ # Default to post ID from external_id pattern
676
+ "reddit_post_unknown"
677
+ end
678
+
679
+ # Parse metadata JSON
680
+ def parse_metadata(metadata_str)
681
+ return {} unless metadata_str
682
+ return metadata_str if metadata_str.is_a?(Hash) # Already parsed
683
+ JSON.parse(metadata_str) rescue {}
684
+ end
685
+
686
+ # Find email thread based on headers
687
+ def find_email_thread(message_id, in_reply_to, references)
688
+ File.write('/tmp/heathrow_debug.log', " find_email_thread: in_reply_to=#{in_reply_to.inspect}, references=#{references.inspect}, @threads.size=#{@threads.size}\n", mode: 'a') if ENV['DEBUG']
689
+
690
+ # Check if this message is already part of a thread
691
+ if in_reply_to
692
+ # Find thread containing the message we're replying to
693
+ @threads.each do |thread_id, thread_data|
694
+ if thread_data[:messages].any? { |m|
695
+ meta = parse_metadata(m['metadata'])
696
+ msg_id = meta['message_id']
697
+ matches = (msg_id == in_reply_to)
698
+ if ENV['DEBUG']
699
+ File.write('/tmp/heathrow_debug.log', " Checking in_reply_to: msg #{m['id']} message_id=#{msg_id.inspect} == #{in_reply_to.inspect}? #{matches}\n", mode: 'a')
700
+ end
701
+ matches
702
+ }
703
+ File.write('/tmp/heathrow_debug.log', " Found thread via in_reply_to: #{thread_id}\n", mode: 'a') if ENV['DEBUG']
704
+ return thread_id
705
+ end
706
+ end
707
+ end
708
+
709
+ # Check references header
710
+ if references && !references.empty?
711
+ # References can be an array or a string
712
+ ref_list = references.is_a?(Array) ? references : references.split(/\s+/)
713
+ File.write('/tmp/heathrow_debug.log', " Checking references: #{ref_list.inspect}\n", mode: 'a') if ENV['DEBUG']
714
+ # Find thread containing any referenced message
715
+ @threads.each do |thread_id, thread_data|
716
+ thread_data[:messages].each do |m|
717
+ meta = parse_metadata(m['metadata'])
718
+ if ref_list.include?(meta['message_id'])
719
+ File.write('/tmp/heathrow_debug.log', " Found thread via references: #{thread_id} (matched #{meta['message_id']})\n", mode: 'a') if ENV['DEBUG']
720
+ return thread_id
721
+ end
722
+ end
723
+ end
724
+ end
725
+
726
+ File.write('/tmp/heathrow_debug.log', " No thread found, will create new\n", mode: 'a') if ENV['DEBUG']
727
+ # Return nil to let generate_thread_id create a proper unique ID
728
+ nil
729
+ end
730
+
731
+ # Generate a thread ID for a message
732
+ def generate_thread_id(msg)
733
+ # Use external_id for unique threading
734
+ if msg['external_id'] =~ /(post|msg|message)_(\w+)/
735
+ return "thread_#{$2}"
736
+ end
737
+
738
+ # For emails, be more strict about threading
739
+ # Each email should be its own thread unless it has In-Reply-To header
740
+ if msg['source_type'] =~ /gmail|imap|email/
741
+ # Use message ID to create unique thread
742
+ metadata = parse_metadata(msg['metadata'])
743
+ if metadata['message_id']
744
+ return "thread_email_#{metadata['message_id'].gsub(/[^a-z0-9]+/, '_')}"
745
+ end
746
+ end
747
+
748
+ # Use subject for other messages
749
+ if msg['subject'] && !msg['subject'].empty?
750
+ subject_base = msg['subject'].gsub(/^(Re:|Fwd:|Fw:)\s*/i, '').strip
751
+ # Include sender to make threads more unique
752
+ sender_part = (msg['sender'] || '').downcase.gsub(/[^a-z0-9]+/, '_')[0..10]
753
+ return "thread_#{sender_part}_#{subject_base.downcase.gsub(/[^a-z0-9]+/, '_')}"
754
+ end
755
+
756
+ # Default to message ID
757
+ "thread_#{msg['id'] || Time.now.to_i}"
758
+ end
759
+
760
+ # Extract channel from Slack recipient
761
+ def extract_channel_from_recipient(recipient)
762
+ # Slack recipients often include channel name
763
+ if recipient =~ /#(\w+)/
764
+ return "C#{recipient.gsub(/[^a-z0-9]/i, '')}"
765
+ end
766
+ recipient
767
+ end
768
+
769
+ # Count unread messages
770
+ def count_unread(messages)
771
+ messages.count { |m| m['is_read'].to_i == 0 }
772
+ end
773
+ end
774
+ end