heathrow 0.7.9 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8023c45996b5eba44dfbb1e9718fe11d149ec7f14f35dd5569fe5172b53a018e
4
- data.tar.gz: d1f94f8964b220fb7f831ca60298a3ca68f1dc4bdc4b9949282bb7add4c8d65e
3
+ metadata.gz: e278aa83b425434144c53f99442726b3f67e2d0bc06cd34817256b7b862da924
4
+ data.tar.gz: 5b33d7429d0d8022393c994106d959236ea42f137401588819b49512e713755a
5
5
  SHA512:
6
- metadata.gz: 5b438204942e8ad23d166b36be5acc3dc9a0fa1c2456f9946f258c02d1597176bf0dafde75081e21c469839487938bc8181bf3151cff4fca30d7bd617ae53d18
7
- data.tar.gz: 1944ede7efac79dac8ace2640dee9c2260754898f8dc24ed1b7793898ed2b3eb57b47cef9fc7102ab8b1da09b20be898aa23312872704301b5aa3fb96a501caf
6
+ metadata.gz: '0596e613d29bcfc00ebe98afb84bb9f5fc9db44edbd33f1ef78c2604e451d639d1d526e7152808c7002cd47c2e12699e861dc4d5e972cbb51dc7457a9f38036d'
7
+ data.tar.gz: a29bf091fd86b3f5c2a731f42e799276260e45bee61b2bff4dee1f6f9dbb69b831f01b2dc4321635d4907b63370970f9f5181227b02b494412a7fd26c066ec46
data/heathrow.gemspec CHANGED
@@ -29,6 +29,6 @@ Gem::Specification.new do |spec|
29
29
  spec.require_paths = ['lib']
30
30
 
31
31
  # Runtime dependencies - keep it simple!
32
- spec.add_runtime_dependency 'rcurses', '>= 5.0'
32
+ spec.add_runtime_dependency 'rcurses', '~> 7.0'
33
33
  spec.add_runtime_dependency 'sqlite3', '>= 1.4'
34
34
  end
@@ -163,7 +163,7 @@ module Heathrow
163
163
  template << "Date: #{format_date(original_date)}"
164
164
  template << "Subject: #{original_subject}"
165
165
  template << ""
166
- template << original_content
166
+ template << original_content.gsub("\r", "")
167
167
 
168
168
  # Signature at the bottom
169
169
  sig = get_signature
@@ -10,6 +10,7 @@ module Heathrow
10
10
  COOKIE_DIR = File.join(Dir.home, '.heathrow', 'cookies')
11
11
  COOKIE_FILE = File.join(COOKIE_DIR, 'messenger.json')
12
12
  FETCH_SCRIPT = File.join(__dir__, 'messenger_fetch_marionette.py')
13
+ THREAD_SCRIPT = File.join(__dir__, 'messenger_fetch_thread.py')
13
14
 
14
15
  # Required cookies for authentication
15
16
  REQUIRED_COOKIES = %w[c_user xs]
@@ -21,25 +22,106 @@ module Heathrow
21
22
  end
22
23
 
23
24
  def sync(source_id)
24
- return 0 unless valid_cookies?
25
+ unless valid_cookies?
26
+ @sync_error = "Messenger: invalid cookies"
27
+ return 0
28
+ end
25
29
 
26
30
  begin
27
31
  data = fetch_via_playwright
28
- return 0 unless data
32
+ unless data
33
+ @sync_error = "Messenger: no data (is the tab open?)"
34
+ return 0
35
+ end
36
+
37
+ if data['error']
38
+ @sync_error = "Messenger: #{data['error']}"
39
+ return 0
40
+ end
41
+
29
42
  threads = data['threads'] || []
30
- return 0 if threads.empty?
43
+ if threads.empty?
44
+ @sync_error = "Messenger: no threads found"
45
+ return 0
46
+ end
31
47
 
32
48
  count = 0
33
49
  threads.each do |thread|
34
50
  count += process_thread(source_id, thread)
35
51
  end
52
+ @sync_error = nil
36
53
  count
37
54
  rescue => e
38
- STDERR.puts "Messenger error: #{e.message}" if ENV['DEBUG']
55
+ @sync_error = "Messenger: #{e.message}"
39
56
  0
40
57
  end
41
58
  end
42
59
 
60
+ attr_reader :sync_error
61
+
62
+ # Deep-fetch a single thread: navigate into it and scrape visible messages
63
+ def sync_thread(source_id, thread_id, thread_name)
64
+ result = `timeout 15 python3 #{Shellwords.escape(THREAD_SCRIPT)} #{Shellwords.escape(thread_id)} 2>/dev/null`
65
+ return 0 if result.nil? || result.strip.empty?
66
+
67
+ data = JSON.parse(result)
68
+ if data['error']
69
+ @sync_error = "Messenger thread: #{data['error']}"
70
+ return 0
71
+ end
72
+
73
+ messages = data['messages'] || []
74
+ return 0 if messages.empty?
75
+
76
+ count = 0
77
+ messages.each_with_index do |msg, i|
78
+ text = (msg['text'] || '').strip
79
+ next if text.empty? || text.length < 2
80
+ # Skip UI garbage
81
+ next if text =~ /^(Today|Yesterday) at \d/i
82
+ next if text =~ /^Enter, Message sent/i
83
+ next if text =~ /^You (sent|replied|reacted)/i
84
+
85
+ sender = msg['sender'] || ''
86
+ sender = thread_name if sender.empty?
87
+
88
+ ext_id = "msng_#{thread_id}_d#{Digest::MD5.hexdigest(text)[0..11]}"
89
+
90
+ msg_data = {
91
+ source_id: source_id,
92
+ external_id: ext_id,
93
+ thread_id: thread_id.to_s,
94
+ sender: sender,
95
+ sender_name: sender,
96
+ recipients: [thread_name],
97
+ subject: thread_name,
98
+ content: text,
99
+ html_content: nil,
100
+ timestamp: Time.now.to_i,
101
+ received_at: Time.now.to_i,
102
+ read: true,
103
+ starred: false,
104
+ archived: false,
105
+ labels: ['Messenger'],
106
+ attachments: nil,
107
+ metadata: { thread_id: thread_id, message_id: ext_id, platform: 'messenger' },
108
+ raw_data: { thread_id: thread_id, name: thread_name }
109
+ }
110
+
111
+ begin
112
+ @db.insert_message(msg_data)
113
+ count += 1
114
+ rescue SQLite3::ConstraintException
115
+ # Already exists
116
+ end
117
+ end
118
+ @sync_error = nil
119
+ count
120
+ rescue => e
121
+ @sync_error = "Messenger thread: #{e.message}"
122
+ 0
123
+ end
124
+
43
125
  def fetch
44
126
  return [] unless enabled?
45
127
  source = @db.get_source_by_name(@name)
@@ -124,7 +206,7 @@ module Heathrow
124
206
 
125
207
  def fetch_via_playwright
126
208
  # Use Marionette (real Firefox tab) since Meta blocks headless browsers
127
- result = `python3 #{Shellwords.escape(FETCH_SCRIPT)} 2>/dev/null`
209
+ result = `timeout 30 python3 #{Shellwords.escape(FETCH_SCRIPT)} 2>/dev/null`
128
210
  return nil if result.nil? || result.strip.empty?
129
211
 
130
212
  data = JSON.parse(result)
@@ -78,8 +78,6 @@ for (const link of links) {
78
78
  return results;
79
79
  """
80
80
 
81
-
82
-
83
81
  def main():
84
82
  try:
85
83
  from marionette_driver.marionette import Marionette
@@ -109,8 +107,10 @@ def main():
109
107
 
110
108
  threads = []
111
109
  for thread in thread_list[:MAX_THREADS]:
110
+ tid = thread['id']
112
111
  snippet = thread.get('snippet', '')
113
112
  debug(f" {thread['name']}: snippet={snippet[:50] if snippet else '(none)'}, unread={thread.get('unread')}")
113
+
114
114
  threads.append({
115
115
  "id": thread['id'],
116
116
  "name": thread['name'],
@@ -0,0 +1,131 @@
1
+ #!/usr/bin/env python3
2
+ """Fetch messages from a single Messenger conversation via Firefox Marionette.
3
+
4
+ Usage: messenger_fetch_thread.py <thread_id>
5
+
6
+ Connects to Firefox Marionette, navigates to the conversation,
7
+ scrapes visible messages, returns JSON to stdout.
8
+ """
9
+
10
+ import json
11
+ import sys
12
+ import time
13
+
14
+ DEBUG = '--debug' in sys.argv
15
+ THREAD_ID = None
16
+
17
+ for arg in sys.argv[1:]:
18
+ if arg != '--debug' and arg.isdigit():
19
+ THREAD_ID = arg
20
+
21
+
22
+ def debug(msg):
23
+ if DEBUG:
24
+ print(f"[thread-fetch] {msg}", file=sys.stderr)
25
+
26
+
27
+ def main():
28
+ if not THREAD_ID:
29
+ print(json.dumps({"error": "No thread ID provided", "messages": []}))
30
+ return
31
+
32
+ try:
33
+ from marionette_driver.marionette import Marionette
34
+ except ImportError:
35
+ print(json.dumps({"error": "marionette_driver not installed", "messages": []}))
36
+ return
37
+
38
+ client = None
39
+ try:
40
+ client = Marionette(host='localhost', port=2828)
41
+ client.start_session()
42
+ debug("Connected")
43
+
44
+ # Find Messenger tab
45
+ for handle in client.window_handles:
46
+ client.switch_to_window(handle)
47
+ if 'messenger.com' in client.get_url():
48
+ break
49
+ else:
50
+ print(json.dumps({"error": "No Messenger tab found", "messages": []}))
51
+ return
52
+
53
+ # Navigate to the conversation
54
+ target_url = f"https://www.messenger.com/t/{THREAD_ID}"
55
+ current_url = client.get_url()
56
+ if f"/t/{THREAD_ID}" not in current_url:
57
+ debug(f"Navigating to {target_url}")
58
+ client.navigate(target_url)
59
+ time.sleep(2)
60
+ else:
61
+ debug("Already on target conversation")
62
+
63
+ # Scrape messages from the main content area
64
+ messages = client.execute_script("""
65
+ const msgs = [];
66
+ const mainArea = document.querySelector('[role="main"]');
67
+ if (!mainArea) return msgs;
68
+
69
+ // Find all message groups - each group has a sender
70
+ const groups = mainArea.querySelectorAll('[role="row"]');
71
+
72
+ for (const group of groups) {
73
+ // Get text content from dir="auto" spans (actual message text)
74
+ const textEls = Array.from(group.querySelectorAll('[dir="auto"]'));
75
+ if (textEls.length === 0) continue;
76
+
77
+ // Filter out UI chrome
78
+ const texts = textEls
79
+ .map(e => e.textContent.trim())
80
+ .filter(t => {
81
+ if (t.length < 1 || t.length > 5000) return false;
82
+ if (/^(Active now|Active \\d|Seen by|You sent|\\d+ (hour|minute|day|week)|Loading|Replying to|End-to-end encrypted|Messenger|Media & files|Privacy & support)/i.test(t)) return false;
83
+ if (/^(Today|Yesterday)$/i.test(t)) return false;
84
+ if (/^\\d{1,2}:\\d{2}\\s*(AM|PM)?$/i.test(t)) return false;
85
+ if (/^\\w{3} \\d{1,2}, \\d{4}$/i.test(t)) return false;
86
+ return true;
87
+ });
88
+ if (texts.length === 0) continue;
89
+
90
+ const text = texts.join(' ');
91
+ if (text.length < 1) continue;
92
+
93
+ // Try sender from img alt
94
+ let sender = '';
95
+ const img = group.querySelector('img[alt]');
96
+ if (img && img.alt && img.alt.length < 60 && !/^\\d/.test(img.alt)) {
97
+ sender = img.alt;
98
+ }
99
+
100
+ msgs.push({sender: sender, text: text});
101
+ }
102
+ return msgs;
103
+ """) or []
104
+
105
+ debug(f"Found {len(messages)} raw messages")
106
+
107
+ # Deduplicate consecutive identical texts
108
+ deduped = []
109
+ prev_text = ''
110
+ for m in messages:
111
+ if m['text'] != prev_text:
112
+ deduped.append(m)
113
+ prev_text = m['text']
114
+
115
+ debug(f"After dedup: {len(deduped)} messages")
116
+ print(json.dumps({"messages": deduped}))
117
+
118
+ except ConnectionRefusedError:
119
+ print(json.dumps({"error": "Cannot connect to Marionette on port 2828", "messages": []}))
120
+ except Exception as e:
121
+ print(json.dumps({"error": str(e), "messages": []}))
122
+ finally:
123
+ if client:
124
+ try:
125
+ client.delete_session()
126
+ except Exception:
127
+ pass
128
+
129
+
130
+ if __name__ == '__main__':
131
+ main()