heathrow 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +58 -0
- data/README.md +205 -0
- data/bin/heathrow +42 -0
- data/bin/heathrowd +283 -0
- data/docs/ARCHITECTURE.md +1172 -0
- data/docs/DATABASE_SCHEMA.md +685 -0
- data/docs/DEVELOPMENT_WORKFLOW.md +867 -0
- data/docs/DISCORD_SETUP.md +142 -0
- data/docs/GMAIL_OAUTH_SETUP.md +120 -0
- data/docs/PLUGIN_SYSTEM.md +1370 -0
- data/docs/PROJECT_PLAN.md +1022 -0
- data/docs/README.md +417 -0
- data/docs/REDDIT_SETUP.md +174 -0
- data/docs/REPLY_FORWARD.md +182 -0
- data/docs/WHATSAPP_TELEGRAM_SETUP.md +306 -0
- data/heathrow.gemspec +34 -0
- data/heathrowd.service +21 -0
- data/img/heathrow.svg +95 -0
- data/img/rss_threaded.png +0 -0
- data/img/sources.png +0 -0
- data/lib/heathrow/address_book.rb +42 -0
- data/lib/heathrow/config.rb +332 -0
- data/lib/heathrow/database.rb +731 -0
- data/lib/heathrow/database_new.rb +392 -0
- data/lib/heathrow/event_bus.rb +175 -0
- data/lib/heathrow/logger.rb +122 -0
- data/lib/heathrow/message.rb +176 -0
- data/lib/heathrow/message_composer.rb +399 -0
- data/lib/heathrow/message_organizer.rb +774 -0
- data/lib/heathrow/migrations/001_initial_schema.rb +248 -0
- data/lib/heathrow/notmuch.rb +45 -0
- data/lib/heathrow/oauth2_smtp.rb +254 -0
- data/lib/heathrow/plugin/base.rb +212 -0
- data/lib/heathrow/plugin_manager.rb +141 -0
- data/lib/heathrow/poller.rb +93 -0
- data/lib/heathrow/smtp_sender.rb +204 -0
- data/lib/heathrow/source.rb +39 -0
- data/lib/heathrow/sources/base.rb +74 -0
- data/lib/heathrow/sources/discord.rb +357 -0
- data/lib/heathrow/sources/gmail.rb +294 -0
- data/lib/heathrow/sources/imap.rb +198 -0
- data/lib/heathrow/sources/instagram.rb +307 -0
- data/lib/heathrow/sources/instagram_fetch.py +101 -0
- data/lib/heathrow/sources/instagram_send.py +55 -0
- data/lib/heathrow/sources/instagram_send_marionette.py +104 -0
- data/lib/heathrow/sources/maildir.rb +606 -0
- data/lib/heathrow/sources/messenger.rb +212 -0
- data/lib/heathrow/sources/messenger_fetch.js +297 -0
- data/lib/heathrow/sources/messenger_fetch_marionette.py +138 -0
- data/lib/heathrow/sources/messenger_send.js +32 -0
- data/lib/heathrow/sources/messenger_send.py +100 -0
- data/lib/heathrow/sources/reddit.rb +461 -0
- data/lib/heathrow/sources/rss.rb +299 -0
- data/lib/heathrow/sources/slack.rb +375 -0
- data/lib/heathrow/sources/source_manager.rb +328 -0
- data/lib/heathrow/sources/telegram.rb +498 -0
- data/lib/heathrow/sources/webpage.rb +207 -0
- data/lib/heathrow/sources/weechat.rb +479 -0
- data/lib/heathrow/sources/whatsapp.rb +474 -0
- data/lib/heathrow/ui/application.rb +8098 -0
- data/lib/heathrow/ui/navigation.rb +8 -0
- data/lib/heathrow/ui/panes.rb +8 -0
- data/lib/heathrow/ui/source_wizard.rb +567 -0
- data/lib/heathrow/ui/threaded_view.rb +780 -0
- data/lib/heathrow/ui/views.rb +8 -0
- data/lib/heathrow/version.rb +3 -0
- data/lib/heathrow/wizards/discord_wizard.rb +193 -0
- data/lib/heathrow/wizards/slack_wizard.rb +140 -0
- data/lib/heathrow.rb +55 -0
- metadata +147 -0
|
@@ -0,0 +1,498 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require 'net/http'
|
|
5
|
+
require 'json'
|
|
6
|
+
require 'uri'
|
|
7
|
+
require 'time'
|
|
8
|
+
|
|
9
|
+
module Heathrow
|
|
10
|
+
module Sources
|
|
11
|
+
class Telegram
|
|
12
|
+
attr_reader :source, :last_fetch_time
|
|
13
|
+
|
|
14
|
+
def initialize(source)
|
|
15
|
+
@source = source
|
|
16
|
+
@config = source.config.is_a?(String) ? JSON.parse(source.config) : source.config
|
|
17
|
+
@last_fetch_time = Time.now
|
|
18
|
+
@last_message_id = nil
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def fetch_messages
|
|
22
|
+
messages = []
|
|
23
|
+
|
|
24
|
+
begin
|
|
25
|
+
# Use Bot API or MTProto based on configuration
|
|
26
|
+
if @config['bot_token']
|
|
27
|
+
messages = fetch_bot_messages
|
|
28
|
+
elsif @config['api_id'] && @config['api_hash']
|
|
29
|
+
messages = fetch_mtproto_messages
|
|
30
|
+
else
|
|
31
|
+
puts "Telegram: No valid credentials configured" if ENV['DEBUG']
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
rescue => e
|
|
35
|
+
puts "Telegram fetch error: #{e.message}" if ENV['DEBUG']
|
|
36
|
+
puts e.backtrace.join("\n") if ENV['DEBUG']
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
messages
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def test_connection
|
|
43
|
+
begin
|
|
44
|
+
if @config['bot_token']
|
|
45
|
+
test_bot_connection
|
|
46
|
+
elsif @config['api_id'] && @config['api_hash']
|
|
47
|
+
test_mtproto_connection
|
|
48
|
+
else
|
|
49
|
+
{ success: false, message: "No Telegram credentials configured" }
|
|
50
|
+
end
|
|
51
|
+
rescue => e
|
|
52
|
+
{ success: false, message: "Connection test failed: #{e.message}" }
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def authenticate
|
|
57
|
+
if @config['api_id'] && @config['api_hash'] && @config['phone_number']
|
|
58
|
+
authenticate_mtproto
|
|
59
|
+
else
|
|
60
|
+
puts "For user account access, configure api_id, api_hash, and phone_number"
|
|
61
|
+
puts "For bot access, configure bot_token"
|
|
62
|
+
false
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def can_reply?
|
|
67
|
+
true
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def send_message(to, subject, body, in_reply_to = nil)
|
|
71
|
+
if @config['bot_token']
|
|
72
|
+
send_bot_message(to, body, in_reply_to)
|
|
73
|
+
elsif @config['session_string']
|
|
74
|
+
send_mtproto_message(to, body, in_reply_to)
|
|
75
|
+
else
|
|
76
|
+
{ success: false, message: "Telegram not configured for sending" }
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
private
|
|
81
|
+
|
|
82
|
+
def send_bot_message(to, body, in_reply_to = nil)
|
|
83
|
+
token = @config['bot_token']
|
|
84
|
+
|
|
85
|
+
# Parse recipient - could be chat ID or username
|
|
86
|
+
chat_id = if to =~ /^-?\d+$/
|
|
87
|
+
to # Already a chat ID
|
|
88
|
+
else
|
|
89
|
+
# For usernames, we'd need to look up the chat ID
|
|
90
|
+
# For now, require chat IDs
|
|
91
|
+
return { success: false, message: "Please use chat ID for Telegram messages" }
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
uri = URI("https://api.telegram.org/bot#{token}/sendMessage")
|
|
95
|
+
|
|
96
|
+
params = {
|
|
97
|
+
chat_id: chat_id,
|
|
98
|
+
text: body
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
# Add reply if specified
|
|
102
|
+
params[:reply_to_message_id] = in_reply_to if in_reply_to
|
|
103
|
+
|
|
104
|
+
request = Net::HTTP::Post.new(uri)
|
|
105
|
+
request['Content-Type'] = 'application/json'
|
|
106
|
+
request.body = params.to_json
|
|
107
|
+
|
|
108
|
+
begin
|
|
109
|
+
response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http|
|
|
110
|
+
http.request(request)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
if response.is_a?(Net::HTTPSuccess)
|
|
114
|
+
data = JSON.parse(response.body)
|
|
115
|
+
if data['ok']
|
|
116
|
+
{ success: true, message: "Message sent via Telegram bot" }
|
|
117
|
+
else
|
|
118
|
+
{ success: false, message: "Failed: #{data['description']}" }
|
|
119
|
+
end
|
|
120
|
+
else
|
|
121
|
+
{ success: false, message: "HTTP error: #{response.code}" }
|
|
122
|
+
end
|
|
123
|
+
rescue => e
|
|
124
|
+
{ success: false, message: "Send failed: #{e.message}" }
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def send_mtproto_message(to, body, in_reply_to = nil)
|
|
129
|
+
# This would require the MTProto server
|
|
130
|
+
api_url = @config['mtproto_api_url'] || 'http://localhost:8081'
|
|
131
|
+
|
|
132
|
+
uri = URI("#{api_url}/send")
|
|
133
|
+
request = Net::HTTP::Post.new(uri)
|
|
134
|
+
request['Content-Type'] = 'application/json'
|
|
135
|
+
request.body = {
|
|
136
|
+
session_string: @config['session_string'],
|
|
137
|
+
chat_id: to,
|
|
138
|
+
text: body,
|
|
139
|
+
reply_to: in_reply_to
|
|
140
|
+
}.to_json
|
|
141
|
+
|
|
142
|
+
begin
|
|
143
|
+
response = Net::HTTP.start(uri.hostname, uri.port) do |http|
|
|
144
|
+
http.request(request)
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
if response.is_a?(Net::HTTPSuccess)
|
|
148
|
+
{ success: true, message: "Message sent via Telegram" }
|
|
149
|
+
else
|
|
150
|
+
{ success: false, message: "Failed to send via MTProto" }
|
|
151
|
+
end
|
|
152
|
+
rescue => e
|
|
153
|
+
{ success: false, message: "MTProto server not available: #{e.message}" }
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# Bot API Methods (simpler but limited to bot interactions)
|
|
158
|
+
|
|
159
|
+
def fetch_bot_messages
|
|
160
|
+
messages = []
|
|
161
|
+
token = @config['bot_token']
|
|
162
|
+
|
|
163
|
+
uri = URI("https://api.telegram.org/bot#{token}/getUpdates")
|
|
164
|
+
params = { timeout: 0, limit: @config['fetch_limit'] || 100 }
|
|
165
|
+
|
|
166
|
+
# Use offset for incremental updates
|
|
167
|
+
if @last_message_id
|
|
168
|
+
params[:offset] = @last_message_id + 1
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
uri.query = URI.encode_www_form(params)
|
|
172
|
+
response = Net::HTTP.get_response(uri)
|
|
173
|
+
|
|
174
|
+
if response.is_a?(Net::HTTPSuccess)
|
|
175
|
+
data = JSON.parse(response.body)
|
|
176
|
+
|
|
177
|
+
if data['ok'] && data['result']
|
|
178
|
+
data['result'].each do |update|
|
|
179
|
+
if update['message']
|
|
180
|
+
msg = convert_bot_message(update['message'])
|
|
181
|
+
messages << msg if msg
|
|
182
|
+
@last_message_id = update['update_id']
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
else
|
|
187
|
+
puts "Telegram Bot API error: #{response.code}" if ENV['DEBUG']
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
messages
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def test_bot_connection
|
|
194
|
+
token = @config['bot_token']
|
|
195
|
+
uri = URI("https://api.telegram.org/bot#{token}/getMe")
|
|
196
|
+
response = Net::HTTP.get_response(uri)
|
|
197
|
+
|
|
198
|
+
if response.is_a?(Net::HTTPSuccess)
|
|
199
|
+
data = JSON.parse(response.body)
|
|
200
|
+
if data['ok']
|
|
201
|
+
bot = data['result']
|
|
202
|
+
{ success: true, message: "Connected as bot @#{bot['username']}" }
|
|
203
|
+
else
|
|
204
|
+
{ success: false, message: "Bot token invalid" }
|
|
205
|
+
end
|
|
206
|
+
else
|
|
207
|
+
{ success: false, message: "Failed to connect to Telegram Bot API" }
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
def convert_bot_message(msg)
|
|
212
|
+
# Extract sender info
|
|
213
|
+
from = msg['from']
|
|
214
|
+
sender = from['username'] || "#{from['first_name']} #{from.fetch('last_name', '')}".strip
|
|
215
|
+
|
|
216
|
+
# Extract chat info
|
|
217
|
+
chat = msg['chat']
|
|
218
|
+
recipient = case chat['type']
|
|
219
|
+
when 'private'
|
|
220
|
+
'Me (Bot)'
|
|
221
|
+
when 'group', 'supergroup'
|
|
222
|
+
chat['title']
|
|
223
|
+
when 'channel'
|
|
224
|
+
chat['title']
|
|
225
|
+
else
|
|
226
|
+
'Unknown'
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
# Extract content
|
|
230
|
+
content = msg['text'] || msg['caption'] || ''
|
|
231
|
+
subject = content[0..50]
|
|
232
|
+
subject += "..." if content.length > 50
|
|
233
|
+
|
|
234
|
+
# Handle attachments
|
|
235
|
+
attachments = extract_bot_attachments(msg)
|
|
236
|
+
|
|
237
|
+
{
|
|
238
|
+
source_id: @source.id,
|
|
239
|
+
source_type: 'telegram',
|
|
240
|
+
external_id: "telegram_#{msg['message_id']}_#{chat['id']}",
|
|
241
|
+
sender: sender,
|
|
242
|
+
recipient: recipient,
|
|
243
|
+
subject: subject,
|
|
244
|
+
content: content,
|
|
245
|
+
raw_data: msg.to_json,
|
|
246
|
+
attachments: attachments,
|
|
247
|
+
timestamp: Time.at(msg['date']).iso8601,
|
|
248
|
+
is_read: 0
|
|
249
|
+
}
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
def extract_bot_attachments(msg)
|
|
253
|
+
attachments = []
|
|
254
|
+
|
|
255
|
+
# Photo
|
|
256
|
+
if msg['photo']
|
|
257
|
+
largest = msg['photo'].max_by { |p| p['file_size'] }
|
|
258
|
+
attachments << { type: 'photo', file_id: largest['file_id'] }
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
# Video
|
|
262
|
+
if msg['video']
|
|
263
|
+
attachments << {
|
|
264
|
+
type: 'video',
|
|
265
|
+
file_id: msg['video']['file_id'],
|
|
266
|
+
duration: msg['video']['duration']
|
|
267
|
+
}
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
# Document
|
|
271
|
+
if msg['document']
|
|
272
|
+
attachments << {
|
|
273
|
+
type: 'document',
|
|
274
|
+
file_id: msg['document']['file_id'],
|
|
275
|
+
file_name: msg['document']['file_name'],
|
|
276
|
+
mime_type: msg['document']['mime_type']
|
|
277
|
+
}
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
# Voice
|
|
281
|
+
if msg['voice']
|
|
282
|
+
attachments << {
|
|
283
|
+
type: 'voice',
|
|
284
|
+
file_id: msg['voice']['file_id'],
|
|
285
|
+
duration: msg['voice']['duration']
|
|
286
|
+
}
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
# Location
|
|
290
|
+
if msg['location']
|
|
291
|
+
attachments << {
|
|
292
|
+
type: 'location',
|
|
293
|
+
latitude: msg['location']['latitude'],
|
|
294
|
+
longitude: msg['location']['longitude']
|
|
295
|
+
}
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
# Sticker
|
|
299
|
+
if msg['sticker']
|
|
300
|
+
attachments << {
|
|
301
|
+
type: 'sticker',
|
|
302
|
+
file_id: msg['sticker']['file_id'],
|
|
303
|
+
emoji: msg['sticker']['emoji']
|
|
304
|
+
}
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
attachments.empty? ? nil : attachments.to_json
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
# MTProto Methods (full user account access via proxy server)
|
|
311
|
+
|
|
312
|
+
def fetch_mtproto_messages
|
|
313
|
+
messages = []
|
|
314
|
+
|
|
315
|
+
# This requires a separate MTProto proxy server
|
|
316
|
+
# Similar to WhatsApp's whatsmeow server
|
|
317
|
+
api_url = @config['mtproto_api_url'] || 'http://localhost:8081'
|
|
318
|
+
|
|
319
|
+
uri = URI("#{api_url}/messages")
|
|
320
|
+
params = {
|
|
321
|
+
session_string: @config['session_string'],
|
|
322
|
+
limit: @config['fetch_limit'] || 100
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
if @last_fetch_time && @config['incremental_sync']
|
|
326
|
+
params[:since] = (@last_fetch_time - 300).iso8601
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
uri.query = URI.encode_www_form(params)
|
|
330
|
+
|
|
331
|
+
response = Net::HTTP.get_response(uri)
|
|
332
|
+
|
|
333
|
+
if response.is_a?(Net::HTTPSuccess)
|
|
334
|
+
data = JSON.parse(response.body)
|
|
335
|
+
|
|
336
|
+
if data['messages']
|
|
337
|
+
data['messages'].each do |msg|
|
|
338
|
+
message = convert_mtproto_message(msg)
|
|
339
|
+
messages << message if message
|
|
340
|
+
end
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
@last_fetch_time = Time.now
|
|
344
|
+
else
|
|
345
|
+
puts "Telegram MTProto API error: #{response.code}" if ENV['DEBUG']
|
|
346
|
+
end
|
|
347
|
+
|
|
348
|
+
messages
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
def test_mtproto_connection
|
|
352
|
+
api_url = @config['mtproto_api_url'] || 'http://localhost:8081'
|
|
353
|
+
|
|
354
|
+
# Check if API server is running
|
|
355
|
+
uri = URI("#{api_url}/health")
|
|
356
|
+
response = Net::HTTP.get_response(uri)
|
|
357
|
+
|
|
358
|
+
unless response.is_a?(Net::HTTPSuccess)
|
|
359
|
+
return { success: false, message: "Telegram MTProto server not running at #{api_url}" }
|
|
360
|
+
end
|
|
361
|
+
|
|
362
|
+
# Check session status
|
|
363
|
+
if @config['session_string']
|
|
364
|
+
uri = URI("#{api_url}/session/status")
|
|
365
|
+
uri.query = URI.encode_www_form(session_string: @config['session_string'])
|
|
366
|
+
response = Net::HTTP.get_response(uri)
|
|
367
|
+
|
|
368
|
+
if response.is_a?(Net::HTTPSuccess)
|
|
369
|
+
data = JSON.parse(response.body)
|
|
370
|
+
if data['authenticated']
|
|
371
|
+
{ success: true, message: "Connected as #{data['username'] || data['phone']}" }
|
|
372
|
+
else
|
|
373
|
+
{ success: false, message: "Session expired. Re-authentication required." }
|
|
374
|
+
end
|
|
375
|
+
else
|
|
376
|
+
{ success: false, message: "Failed to check session status" }
|
|
377
|
+
end
|
|
378
|
+
else
|
|
379
|
+
{ success: false, message: "No session configured. Run authentication first." }
|
|
380
|
+
end
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
def authenticate_mtproto
|
|
384
|
+
api_url = @config['mtproto_api_url'] || 'http://localhost:8081'
|
|
385
|
+
|
|
386
|
+
puts "\n=== Telegram Authentication ==="
|
|
387
|
+
puts "Phone: #{@config['phone_number']}"
|
|
388
|
+
|
|
389
|
+
# Start authentication
|
|
390
|
+
uri = URI("#{api_url}/auth/start")
|
|
391
|
+
request = Net::HTTP::Post.new(uri)
|
|
392
|
+
request['Content-Type'] = 'application/json'
|
|
393
|
+
request.body = {
|
|
394
|
+
api_id: @config['api_id'],
|
|
395
|
+
api_hash: @config['api_hash'],
|
|
396
|
+
phone_number: @config['phone_number']
|
|
397
|
+
}.to_json
|
|
398
|
+
|
|
399
|
+
response = Net::HTTP.start(uri.hostname, uri.port) do |http|
|
|
400
|
+
http.request(request)
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
if response.is_a?(Net::HTTPSuccess)
|
|
404
|
+
data = JSON.parse(response.body)
|
|
405
|
+
|
|
406
|
+
if data['code_sent']
|
|
407
|
+
print "\nEnter the code sent to your Telegram app: "
|
|
408
|
+
code = gets.chomp
|
|
409
|
+
|
|
410
|
+
# Submit code
|
|
411
|
+
uri = URI("#{api_url}/auth/code")
|
|
412
|
+
request = Net::HTTP::Post.new(uri)
|
|
413
|
+
request['Content-Type'] = 'application/json'
|
|
414
|
+
request.body = {
|
|
415
|
+
session_id: data['session_id'],
|
|
416
|
+
code: code
|
|
417
|
+
}.to_json
|
|
418
|
+
|
|
419
|
+
response = Net::HTTP.start(uri.hostname, uri.port) do |http|
|
|
420
|
+
http.request(request)
|
|
421
|
+
end
|
|
422
|
+
|
|
423
|
+
if response.is_a?(Net::HTTPSuccess)
|
|
424
|
+
auth_data = JSON.parse(response.body)
|
|
425
|
+
|
|
426
|
+
if auth_data['requires_2fa']
|
|
427
|
+
print "Enter your 2FA password: "
|
|
428
|
+
password = gets.chomp
|
|
429
|
+
|
|
430
|
+
# Submit 2FA password
|
|
431
|
+
uri = URI("#{api_url}/auth/2fa")
|
|
432
|
+
request = Net::HTTP::Post.new(uri)
|
|
433
|
+
request['Content-Type'] = 'application/json'
|
|
434
|
+
request.body = {
|
|
435
|
+
session_id: data['session_id'],
|
|
436
|
+
password: password
|
|
437
|
+
}.to_json
|
|
438
|
+
|
|
439
|
+
response = Net::HTTP.start(uri.hostname, uri.port) do |http|
|
|
440
|
+
http.request(request)
|
|
441
|
+
end
|
|
442
|
+
|
|
443
|
+
if response.is_a?(Net::HTTPSuccess)
|
|
444
|
+
auth_data = JSON.parse(response.body)
|
|
445
|
+
end
|
|
446
|
+
end
|
|
447
|
+
|
|
448
|
+
if auth_data['session_string']
|
|
449
|
+
# Save session string to config
|
|
450
|
+
@config['session_string'] = auth_data['session_string']
|
|
451
|
+
puts "\n✓ Authentication successful!"
|
|
452
|
+
puts "Session saved. You won't need to authenticate again."
|
|
453
|
+
return true
|
|
454
|
+
end
|
|
455
|
+
end
|
|
456
|
+
end
|
|
457
|
+
end
|
|
458
|
+
|
|
459
|
+
puts "\n✗ Authentication failed"
|
|
460
|
+
false
|
|
461
|
+
end
|
|
462
|
+
|
|
463
|
+
def convert_mtproto_message(msg)
|
|
464
|
+
# Convert MTProto message format to Heathrow format
|
|
465
|
+
sender = msg['sender_name'] || msg['sender_username'] || msg['sender_id'].to_s
|
|
466
|
+
recipient = msg['chat_name'] || msg['chat_id'].to_s
|
|
467
|
+
|
|
468
|
+
content = msg['text'] || ''
|
|
469
|
+
subject = content[0..50]
|
|
470
|
+
subject += "..." if content.length > 50
|
|
471
|
+
|
|
472
|
+
# Handle media
|
|
473
|
+
attachments = []
|
|
474
|
+
if msg['media']
|
|
475
|
+
attachments << {
|
|
476
|
+
type: msg['media']['type'],
|
|
477
|
+
file_id: msg['media']['file_id'],
|
|
478
|
+
caption: msg['media']['caption']
|
|
479
|
+
}
|
|
480
|
+
end
|
|
481
|
+
|
|
482
|
+
{
|
|
483
|
+
source_id: @source.id,
|
|
484
|
+
source_type: 'telegram',
|
|
485
|
+
external_id: "telegram_#{msg['id']}",
|
|
486
|
+
sender: sender,
|
|
487
|
+
recipient: recipient,
|
|
488
|
+
subject: subject,
|
|
489
|
+
content: content,
|
|
490
|
+
raw_data: msg.to_json,
|
|
491
|
+
attachments: attachments.empty? ? nil : attachments.to_json,
|
|
492
|
+
timestamp: msg['date'] || Time.now.iso8601,
|
|
493
|
+
is_read: msg['is_read'] ? 1 : 0
|
|
494
|
+
}
|
|
495
|
+
end
|
|
496
|
+
end
|
|
497
|
+
end
|
|
498
|
+
end
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
require 'digest'
|
|
2
|
+
require 'shellwords'
|
|
3
|
+
require 'json'
|
|
4
|
+
require 'time'
|
|
5
|
+
require_relative 'base'
|
|
6
|
+
|
|
7
|
+
module Heathrow
|
|
8
|
+
module Sources
|
|
9
|
+
class Webpage < Base
|
|
10
|
+
def initialize(name, config, db)
|
|
11
|
+
super
|
|
12
|
+
@pages = config['pages'] || []
|
|
13
|
+
@snapshots_dir = File.join(Dir.home, '.heathrow', 'webwatch')
|
|
14
|
+
Dir.mkdir(@snapshots_dir) unless Dir.exist?(@snapshots_dir)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def sync(source_id)
|
|
18
|
+
count = 0
|
|
19
|
+
@pages.each do |page|
|
|
20
|
+
begin
|
|
21
|
+
count += check_page(source_id, page)
|
|
22
|
+
rescue => e
|
|
23
|
+
STDERR.puts "Webwatch error #{page['url']}: #{e.message}" if ENV['DEBUG']
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
count
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def fetch
|
|
30
|
+
return [] unless enabled?
|
|
31
|
+
source = @db.get_source_by_name(@name)
|
|
32
|
+
return [] unless source
|
|
33
|
+
sync(source['id'])
|
|
34
|
+
update_last_fetch
|
|
35
|
+
[]
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def add_page(url, title: nil, selector: nil, tags: [])
|
|
39
|
+
entry = { 'url' => url, 'title' => title, 'selector' => selector, 'tags' => tags }
|
|
40
|
+
@pages << entry unless @pages.any? { |p| p['url'] == url }
|
|
41
|
+
@config['pages'] = @pages
|
|
42
|
+
save_config
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def remove_page(url)
|
|
46
|
+
@pages.reject! { |p| p['url'] == url }
|
|
47
|
+
@config['pages'] = @pages
|
|
48
|
+
save_config
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def list_pages
|
|
52
|
+
@pages.map { |p| { url: p['url'], title: p['title'], selector: p['selector'], tags: p['tags'] || [] } }
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
private
|
|
56
|
+
|
|
57
|
+
def check_page(source_id, page)
|
|
58
|
+
url = page['url']
|
|
59
|
+
title = page['title'] || url
|
|
60
|
+
selector = page['selector']
|
|
61
|
+
tags = page['tags'] || []
|
|
62
|
+
|
|
63
|
+
raw = http_get(url)
|
|
64
|
+
return 0 unless raw && !raw.empty?
|
|
65
|
+
|
|
66
|
+
# Extract relevant content
|
|
67
|
+
content = if selector && !selector.empty?
|
|
68
|
+
extract_by_selector(raw, selector)
|
|
69
|
+
else
|
|
70
|
+
extract_body(raw)
|
|
71
|
+
end
|
|
72
|
+
return 0 if content.nil? || content.empty?
|
|
73
|
+
|
|
74
|
+
# Normalize whitespace for comparison
|
|
75
|
+
normalized = content.gsub(/\s+/, ' ').strip
|
|
76
|
+
|
|
77
|
+
# Compare with stored snapshot
|
|
78
|
+
snapshot_file = File.join(@snapshots_dir, Digest::MD5.hexdigest(url))
|
|
79
|
+
old_content = File.exist?(snapshot_file) ? File.read(snapshot_file) : nil
|
|
80
|
+
|
|
81
|
+
# First run — store snapshot, no message
|
|
82
|
+
if old_content.nil?
|
|
83
|
+
File.write(snapshot_file, normalized)
|
|
84
|
+
return 0
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# No change
|
|
88
|
+
return 0 if Digest::MD5.hexdigest(normalized) == Digest::MD5.hexdigest(old_content)
|
|
89
|
+
|
|
90
|
+
# Changed! Generate diff and create message
|
|
91
|
+
diff = generate_diff(old_content, normalized)
|
|
92
|
+
File.write(snapshot_file, normalized)
|
|
93
|
+
|
|
94
|
+
ext_id = "webwatch_#{Digest::MD5.hexdigest(url + Time.now.to_i.to_s)}"
|
|
95
|
+
|
|
96
|
+
data = {
|
|
97
|
+
source_id: source_id,
|
|
98
|
+
external_id: ext_id,
|
|
99
|
+
sender: title,
|
|
100
|
+
sender_name: title,
|
|
101
|
+
recipients: ['Web Watch'],
|
|
102
|
+
subject: "Changed: #{title}",
|
|
103
|
+
content: diff,
|
|
104
|
+
html_content: diff,
|
|
105
|
+
timestamp: Time.now.to_i,
|
|
106
|
+
received_at: Time.now.to_i,
|
|
107
|
+
read: false,
|
|
108
|
+
starred: false,
|
|
109
|
+
archived: false,
|
|
110
|
+
labels: ['Web Watch'] + tags,
|
|
111
|
+
metadata: {
|
|
112
|
+
link: url,
|
|
113
|
+
page_title: title,
|
|
114
|
+
selector: selector,
|
|
115
|
+
tags: tags,
|
|
116
|
+
changed_at: Time.now.iso8601
|
|
117
|
+
},
|
|
118
|
+
raw_data: { link: url, page_title: title }
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
begin
|
|
122
|
+
@db.insert_message(data)
|
|
123
|
+
1
|
|
124
|
+
rescue SQLite3::ConstraintException
|
|
125
|
+
0
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def http_get(url)
|
|
130
|
+
result = `curl -sL --max-time 20 --max-redirs 8 -A 'Mozilla/5.0 (X11; Linux x86_64) Heathrow/1.0' #{Shellwords.escape(url)} 2>/dev/null`
|
|
131
|
+
$?.success? && !result.empty? ? result : nil
|
|
132
|
+
rescue => e
|
|
133
|
+
STDERR.puts "Webwatch fetch error #{url}: #{e.message}" if ENV['DEBUG']
|
|
134
|
+
nil
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def extract_body(html)
|
|
138
|
+
# Strip script/style/head tags, then extract text
|
|
139
|
+
html.gsub(/<script[^>]*>.*?<\/script>/mi, '')
|
|
140
|
+
.gsub(/<style[^>]*>.*?<\/style>/mi, '')
|
|
141
|
+
.gsub(/<head[^>]*>.*?<\/head>/mi, '')
|
|
142
|
+
.gsub(/<nav[^>]*>.*?<\/nav>/mi, '')
|
|
143
|
+
.then { |h| strip_html(h) }
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def extract_by_selector(html, selector)
|
|
147
|
+
# Simple CSS selector extraction via regex
|
|
148
|
+
# Supports: #id, .class, tag, tag.class
|
|
149
|
+
pattern = case selector
|
|
150
|
+
when /^#([\w-]+)$/
|
|
151
|
+
/<[^>]+id\s*=\s*["']#{Regexp.escape($1)}["'][^>]*>(.*?)<\/[^>]+>/mi
|
|
152
|
+
when /^\.([\w-]+)$/
|
|
153
|
+
/<[^>]+class\s*=\s*["'][^"']*\b#{Regexp.escape($1)}\b[^"']*["'][^>]*>(.*?)<\/[^>]+>/mi
|
|
154
|
+
when /^(\w+)$/
|
|
155
|
+
/<#{Regexp.escape($1)}[^>]*>(.*?)<\/#{Regexp.escape($1)}>/mi
|
|
156
|
+
when /^(\w+)\.([\w-]+)$/
|
|
157
|
+
/<#{Regexp.escape($1)}[^>]+class\s*=\s*["'][^"']*\b#{Regexp.escape($2)}\b[^"']*["'][^>]*>(.*?)<\/#{Regexp.escape($1)}>/mi
|
|
158
|
+
else
|
|
159
|
+
nil
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
if pattern
|
|
163
|
+
matches = html.scan(pattern).flatten
|
|
164
|
+
strip_html(matches.join("\n"))
|
|
165
|
+
else
|
|
166
|
+
extract_body(html)
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
def generate_diff(old_text, new_text)
|
|
171
|
+
old_lines = old_text.split('. ').map(&:strip).reject(&:empty?)
|
|
172
|
+
new_lines = new_text.split('. ').map(&:strip).reject(&:empty?)
|
|
173
|
+
|
|
174
|
+
removed = old_lines - new_lines
|
|
175
|
+
added = new_lines - old_lines
|
|
176
|
+
|
|
177
|
+
diff = []
|
|
178
|
+
unless removed.empty?
|
|
179
|
+
diff << "REMOVED:"
|
|
180
|
+
removed.first(15).each { |l| diff << " - #{l}" }
|
|
181
|
+
diff << " ... (#{removed.size - 15} more)" if removed.size > 15
|
|
182
|
+
end
|
|
183
|
+
unless added.empty?
|
|
184
|
+
diff << "" unless diff.empty?
|
|
185
|
+
diff << "ADDED:"
|
|
186
|
+
added.first(15).each { |l| diff << " + #{l}" }
|
|
187
|
+
diff << " ... (#{added.size - 15} more)" if added.size > 15
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
diff.empty? ? "Content changed (details differ in whitespace/structure)" : diff.join("\n")
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def strip_html(text)
|
|
194
|
+
return '' if text.nil? || text.empty?
|
|
195
|
+
text.gsub(/<[^>]*>/, '')
|
|
196
|
+
.gsub(/ /, ' ')
|
|
197
|
+
.gsub(/&/, '&')
|
|
198
|
+
.gsub(/</, '<')
|
|
199
|
+
.gsub(/>/, '>')
|
|
200
|
+
.gsub(/"/, '"')
|
|
201
|
+
.gsub(/'/, "'")
|
|
202
|
+
.gsub(/\n\s*\n\s*\n/, "\n\n")
|
|
203
|
+
.strip
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
end
|