heathrow 0.7.9 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/heathrow.gemspec +1 -1
- data/lib/heathrow/message_composer.rb +1 -1
- data/lib/heathrow/sources/messenger.rb +87 -5
- data/lib/heathrow/sources/messenger_fetch_marionette.py +2 -2
- data/lib/heathrow/sources/messenger_fetch_thread.py +131 -0
- data/lib/heathrow/ui/application.rb +231 -86
- data/lib/heathrow/ui/source_wizard.rb +18 -18
- data/lib/heathrow/ui/threaded_view.rb +8 -8
- data/lib/heathrow/version.rb +1 -1
- metadata +7 -6
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e278aa83b425434144c53f99442726b3f67e2d0bc06cd34817256b7b862da924
|
|
4
|
+
data.tar.gz: 5b33d7429d0d8022393c994106d959236ea42f137401588819b49512e713755a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: '0596e613d29bcfc00ebe98afb84bb9f5fc9db44edbd33f1ef78c2604e451d639d1d526e7152808c7002cd47c2e12699e861dc4d5e972cbb51dc7457a9f38036d'
|
|
7
|
+
data.tar.gz: a29bf091fd86b3f5c2a731f42e799276260e45bee61b2bff4dee1f6f9dbb69b831f01b2dc4321635d4907b63370970f9f5181227b02b494412a7fd26c066ec46
|
data/heathrow.gemspec
CHANGED
|
@@ -29,6 +29,6 @@ Gem::Specification.new do |spec|
|
|
|
29
29
|
spec.require_paths = ['lib']
|
|
30
30
|
|
|
31
31
|
# Runtime dependencies - keep it simple!
|
|
32
|
-
spec.add_runtime_dependency 'rcurses', '
|
|
32
|
+
spec.add_runtime_dependency 'rcurses', '~> 7.0'
|
|
33
33
|
spec.add_runtime_dependency 'sqlite3', '>= 1.4'
|
|
34
34
|
end
|
|
@@ -163,7 +163,7 @@ module Heathrow
|
|
|
163
163
|
template << "Date: #{format_date(original_date)}"
|
|
164
164
|
template << "Subject: #{original_subject}"
|
|
165
165
|
template << ""
|
|
166
|
-
template << original_content
|
|
166
|
+
template << original_content.gsub("\r", "")
|
|
167
167
|
|
|
168
168
|
# Signature at the bottom
|
|
169
169
|
sig = get_signature
|
|
@@ -10,6 +10,7 @@ module Heathrow
|
|
|
10
10
|
COOKIE_DIR = File.join(Dir.home, '.heathrow', 'cookies')
|
|
11
11
|
COOKIE_FILE = File.join(COOKIE_DIR, 'messenger.json')
|
|
12
12
|
FETCH_SCRIPT = File.join(__dir__, 'messenger_fetch_marionette.py')
|
|
13
|
+
THREAD_SCRIPT = File.join(__dir__, 'messenger_fetch_thread.py')
|
|
13
14
|
|
|
14
15
|
# Required cookies for authentication
|
|
15
16
|
REQUIRED_COOKIES = %w[c_user xs]
|
|
@@ -21,25 +22,106 @@ module Heathrow
|
|
|
21
22
|
end
|
|
22
23
|
|
|
23
24
|
def sync(source_id)
|
|
24
|
-
|
|
25
|
+
unless valid_cookies?
|
|
26
|
+
@sync_error = "Messenger: invalid cookies"
|
|
27
|
+
return 0
|
|
28
|
+
end
|
|
25
29
|
|
|
26
30
|
begin
|
|
27
31
|
data = fetch_via_playwright
|
|
28
|
-
|
|
32
|
+
unless data
|
|
33
|
+
@sync_error = "Messenger: no data (is the tab open?)"
|
|
34
|
+
return 0
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
if data['error']
|
|
38
|
+
@sync_error = "Messenger: #{data['error']}"
|
|
39
|
+
return 0
|
|
40
|
+
end
|
|
41
|
+
|
|
29
42
|
threads = data['threads'] || []
|
|
30
|
-
|
|
43
|
+
if threads.empty?
|
|
44
|
+
@sync_error = "Messenger: no threads found"
|
|
45
|
+
return 0
|
|
46
|
+
end
|
|
31
47
|
|
|
32
48
|
count = 0
|
|
33
49
|
threads.each do |thread|
|
|
34
50
|
count += process_thread(source_id, thread)
|
|
35
51
|
end
|
|
52
|
+
@sync_error = nil
|
|
36
53
|
count
|
|
37
54
|
rescue => e
|
|
38
|
-
|
|
55
|
+
@sync_error = "Messenger: #{e.message}"
|
|
39
56
|
0
|
|
40
57
|
end
|
|
41
58
|
end
|
|
42
59
|
|
|
60
|
+
attr_reader :sync_error
|
|
61
|
+
|
|
62
|
+
# Deep-fetch a single thread: navigate into it and scrape visible messages
|
|
63
|
+
def sync_thread(source_id, thread_id, thread_name)
|
|
64
|
+
result = `timeout 15 python3 #{Shellwords.escape(THREAD_SCRIPT)} #{Shellwords.escape(thread_id)} 2>/dev/null`
|
|
65
|
+
return 0 if result.nil? || result.strip.empty?
|
|
66
|
+
|
|
67
|
+
data = JSON.parse(result)
|
|
68
|
+
if data['error']
|
|
69
|
+
@sync_error = "Messenger thread: #{data['error']}"
|
|
70
|
+
return 0
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
messages = data['messages'] || []
|
|
74
|
+
return 0 if messages.empty?
|
|
75
|
+
|
|
76
|
+
count = 0
|
|
77
|
+
messages.each_with_index do |msg, i|
|
|
78
|
+
text = (msg['text'] || '').strip
|
|
79
|
+
next if text.empty? || text.length < 2
|
|
80
|
+
# Skip UI garbage
|
|
81
|
+
next if text =~ /^(Today|Yesterday) at \d/i
|
|
82
|
+
next if text =~ /^Enter, Message sent/i
|
|
83
|
+
next if text =~ /^You (sent|replied|reacted)/i
|
|
84
|
+
|
|
85
|
+
sender = msg['sender'] || ''
|
|
86
|
+
sender = thread_name if sender.empty?
|
|
87
|
+
|
|
88
|
+
ext_id = "msng_#{thread_id}_d#{Digest::MD5.hexdigest(text)[0..11]}"
|
|
89
|
+
|
|
90
|
+
msg_data = {
|
|
91
|
+
source_id: source_id,
|
|
92
|
+
external_id: ext_id,
|
|
93
|
+
thread_id: thread_id.to_s,
|
|
94
|
+
sender: sender,
|
|
95
|
+
sender_name: sender,
|
|
96
|
+
recipients: [thread_name],
|
|
97
|
+
subject: thread_name,
|
|
98
|
+
content: text,
|
|
99
|
+
html_content: nil,
|
|
100
|
+
timestamp: Time.now.to_i,
|
|
101
|
+
received_at: Time.now.to_i,
|
|
102
|
+
read: true,
|
|
103
|
+
starred: false,
|
|
104
|
+
archived: false,
|
|
105
|
+
labels: ['Messenger'],
|
|
106
|
+
attachments: nil,
|
|
107
|
+
metadata: { thread_id: thread_id, message_id: ext_id, platform: 'messenger' },
|
|
108
|
+
raw_data: { thread_id: thread_id, name: thread_name }
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
begin
|
|
112
|
+
@db.insert_message(msg_data)
|
|
113
|
+
count += 1
|
|
114
|
+
rescue SQLite3::ConstraintException
|
|
115
|
+
# Already exists
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
@sync_error = nil
|
|
119
|
+
count
|
|
120
|
+
rescue => e
|
|
121
|
+
@sync_error = "Messenger thread: #{e.message}"
|
|
122
|
+
0
|
|
123
|
+
end
|
|
124
|
+
|
|
43
125
|
def fetch
|
|
44
126
|
return [] unless enabled?
|
|
45
127
|
source = @db.get_source_by_name(@name)
|
|
@@ -124,7 +206,7 @@ module Heathrow
|
|
|
124
206
|
|
|
125
207
|
def fetch_via_playwright
|
|
126
208
|
# Use Marionette (real Firefox tab) since Meta blocks headless browsers
|
|
127
|
-
result = `python3 #{Shellwords.escape(FETCH_SCRIPT)} 2>/dev/null`
|
|
209
|
+
result = `timeout 30 python3 #{Shellwords.escape(FETCH_SCRIPT)} 2>/dev/null`
|
|
128
210
|
return nil if result.nil? || result.strip.empty?
|
|
129
211
|
|
|
130
212
|
data = JSON.parse(result)
|
|
@@ -78,8 +78,6 @@ for (const link of links) {
|
|
|
78
78
|
return results;
|
|
79
79
|
"""
|
|
80
80
|
|
|
81
|
-
|
|
82
|
-
|
|
83
81
|
def main():
|
|
84
82
|
try:
|
|
85
83
|
from marionette_driver.marionette import Marionette
|
|
@@ -109,8 +107,10 @@ def main():
|
|
|
109
107
|
|
|
110
108
|
threads = []
|
|
111
109
|
for thread in thread_list[:MAX_THREADS]:
|
|
110
|
+
tid = thread['id']
|
|
112
111
|
snippet = thread.get('snippet', '')
|
|
113
112
|
debug(f" {thread['name']}: snippet={snippet[:50] if snippet else '(none)'}, unread={thread.get('unread')}")
|
|
113
|
+
|
|
114
114
|
threads.append({
|
|
115
115
|
"id": thread['id'],
|
|
116
116
|
"name": thread['name'],
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Fetch messages from a single Messenger conversation via Firefox Marionette.
|
|
3
|
+
|
|
4
|
+
Usage: messenger_fetch_thread.py <thread_id>
|
|
5
|
+
|
|
6
|
+
Connects to Firefox Marionette, navigates to the conversation,
|
|
7
|
+
scrapes visible messages, returns JSON to stdout.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import sys
|
|
12
|
+
import time
|
|
13
|
+
|
|
14
|
+
DEBUG = '--debug' in sys.argv
|
|
15
|
+
THREAD_ID = None
|
|
16
|
+
|
|
17
|
+
for arg in sys.argv[1:]:
|
|
18
|
+
if arg != '--debug' and arg.isdigit():
|
|
19
|
+
THREAD_ID = arg
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def debug(msg):
|
|
23
|
+
if DEBUG:
|
|
24
|
+
print(f"[thread-fetch] {msg}", file=sys.stderr)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def main():
|
|
28
|
+
if not THREAD_ID:
|
|
29
|
+
print(json.dumps({"error": "No thread ID provided", "messages": []}))
|
|
30
|
+
return
|
|
31
|
+
|
|
32
|
+
try:
|
|
33
|
+
from marionette_driver.marionette import Marionette
|
|
34
|
+
except ImportError:
|
|
35
|
+
print(json.dumps({"error": "marionette_driver not installed", "messages": []}))
|
|
36
|
+
return
|
|
37
|
+
|
|
38
|
+
client = None
|
|
39
|
+
try:
|
|
40
|
+
client = Marionette(host='localhost', port=2828)
|
|
41
|
+
client.start_session()
|
|
42
|
+
debug("Connected")
|
|
43
|
+
|
|
44
|
+
# Find Messenger tab
|
|
45
|
+
for handle in client.window_handles:
|
|
46
|
+
client.switch_to_window(handle)
|
|
47
|
+
if 'messenger.com' in client.get_url():
|
|
48
|
+
break
|
|
49
|
+
else:
|
|
50
|
+
print(json.dumps({"error": "No Messenger tab found", "messages": []}))
|
|
51
|
+
return
|
|
52
|
+
|
|
53
|
+
# Navigate to the conversation
|
|
54
|
+
target_url = f"https://www.messenger.com/t/{THREAD_ID}"
|
|
55
|
+
current_url = client.get_url()
|
|
56
|
+
if f"/t/{THREAD_ID}" not in current_url:
|
|
57
|
+
debug(f"Navigating to {target_url}")
|
|
58
|
+
client.navigate(target_url)
|
|
59
|
+
time.sleep(2)
|
|
60
|
+
else:
|
|
61
|
+
debug("Already on target conversation")
|
|
62
|
+
|
|
63
|
+
# Scrape messages from the main content area
|
|
64
|
+
messages = client.execute_script("""
|
|
65
|
+
const msgs = [];
|
|
66
|
+
const mainArea = document.querySelector('[role="main"]');
|
|
67
|
+
if (!mainArea) return msgs;
|
|
68
|
+
|
|
69
|
+
// Find all message groups - each group has a sender
|
|
70
|
+
const groups = mainArea.querySelectorAll('[role="row"]');
|
|
71
|
+
|
|
72
|
+
for (const group of groups) {
|
|
73
|
+
// Get text content from dir="auto" spans (actual message text)
|
|
74
|
+
const textEls = Array.from(group.querySelectorAll('[dir="auto"]'));
|
|
75
|
+
if (textEls.length === 0) continue;
|
|
76
|
+
|
|
77
|
+
// Filter out UI chrome
|
|
78
|
+
const texts = textEls
|
|
79
|
+
.map(e => e.textContent.trim())
|
|
80
|
+
.filter(t => {
|
|
81
|
+
if (t.length < 1 || t.length > 5000) return false;
|
|
82
|
+
if (/^(Active now|Active \\d|Seen by|You sent|\\d+ (hour|minute|day|week)|Loading|Replying to|End-to-end encrypted|Messenger|Media & files|Privacy & support)/i.test(t)) return false;
|
|
83
|
+
if (/^(Today|Yesterday)$/i.test(t)) return false;
|
|
84
|
+
if (/^\\d{1,2}:\\d{2}\\s*(AM|PM)?$/i.test(t)) return false;
|
|
85
|
+
if (/^\\w{3} \\d{1,2}, \\d{4}$/i.test(t)) return false;
|
|
86
|
+
return true;
|
|
87
|
+
});
|
|
88
|
+
if (texts.length === 0) continue;
|
|
89
|
+
|
|
90
|
+
const text = texts.join(' ');
|
|
91
|
+
if (text.length < 1) continue;
|
|
92
|
+
|
|
93
|
+
// Try sender from img alt
|
|
94
|
+
let sender = '';
|
|
95
|
+
const img = group.querySelector('img[alt]');
|
|
96
|
+
if (img && img.alt && img.alt.length < 60 && !/^\\d/.test(img.alt)) {
|
|
97
|
+
sender = img.alt;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
msgs.push({sender: sender, text: text});
|
|
101
|
+
}
|
|
102
|
+
return msgs;
|
|
103
|
+
""") or []
|
|
104
|
+
|
|
105
|
+
debug(f"Found {len(messages)} raw messages")
|
|
106
|
+
|
|
107
|
+
# Deduplicate consecutive identical texts
|
|
108
|
+
deduped = []
|
|
109
|
+
prev_text = ''
|
|
110
|
+
for m in messages:
|
|
111
|
+
if m['text'] != prev_text:
|
|
112
|
+
deduped.append(m)
|
|
113
|
+
prev_text = m['text']
|
|
114
|
+
|
|
115
|
+
debug(f"After dedup: {len(deduped)} messages")
|
|
116
|
+
print(json.dumps({"messages": deduped}))
|
|
117
|
+
|
|
118
|
+
except ConnectionRefusedError:
|
|
119
|
+
print(json.dumps({"error": "Cannot connect to Marionette on port 2828", "messages": []}))
|
|
120
|
+
except Exception as e:
|
|
121
|
+
print(json.dumps({"error": str(e), "messages": []}))
|
|
122
|
+
finally:
|
|
123
|
+
if client:
|
|
124
|
+
try:
|
|
125
|
+
client.delete_session()
|
|
126
|
+
except Exception:
|
|
127
|
+
pass
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
if __name__ == '__main__':
|
|
131
|
+
main()
|