chgksuite 0.24.0b2__py3-none-any.whl → 0.24.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,22 +1,19 @@
1
+ import json
1
2
  import os
2
3
  import random
3
4
  import re
4
- import shutil
5
5
  import sqlite3
6
+ import tempfile
6
7
  import time
8
+ import uuid
7
9
 
10
+ import requests
8
11
  import toml
9
12
  from PIL import Image, ImageOps
10
- from telethon import errors
11
- from telethon.sync import TelegramClient
12
- from telethon.tl.functions.messages import (
13
- GetDiscussionMessageRequest,
14
- )
15
- from telethon.tl.types import InputChannel
16
13
 
17
14
  from chgksuite.common import get_chgksuite_dir, init_logger, load_settings, tryint
18
15
  from chgksuite.composer.composer_common import BaseExporter, parseimg
19
- from chgksuite.composer.telegram_parser import CustomHtmlParser
16
+ from chgksuite.composer.telegram_bot import run_bot_in_thread
20
17
 
21
18
 
22
19
  class TelegramExporter(BaseExporter):
@@ -24,29 +21,118 @@ class TelegramExporter(BaseExporter):
24
21
  super().__init__(*args, **kwargs)
25
22
  self.chgksuite_dir = get_chgksuite_dir()
26
23
  self.logger = kwargs.get("logger") or init_logger("composer")
27
- try:
28
- self.init_tg()
29
- except (errors.AuthKeyUnregisteredError, sqlite3.OperationalError) as e:
30
- filepath = os.path.join(
31
- self.chgksuite_dir, self.args.tgaccount + ".session"
32
- )
33
- new_filepath = filepath + ".bak"
34
- self.logger.warning(f"Session error: {str(e)}. Moving session: {filepath} -> {new_filepath}")
35
- if os.path.isfile(filepath):
36
- shutil.move(filepath, new_filepath)
37
- self.init_tg()
38
24
  self.qcount = 1
39
25
  self.number = 1
40
26
  self.tg_heading = None
27
+ self.forwarded_message = None
28
+ self.target_channel = None
29
+ self.created_at = None
30
+ self.telegram_toml_path = os.path.join(self.chgksuite_dir, "telegram.toml")
31
+ self.resolve_db_path = os.path.join(self.chgksuite_dir, "resolve.db")
32
+ self.temp_db_path = os.path.join(
33
+ tempfile.gettempdir(), f"telegram_sidecar_{uuid.uuid4().hex}.db"
34
+ )
35
+ self.bot_token = None
36
+ self.control_chat_id = None # Chat ID where the user talks to the bot
37
+ self.channel_id = None # Target channel ID
38
+ self.chat_id = None # Discussion group ID linked to the channel
39
+ self.auth_uuid = uuid.uuid4().hex[:8]
40
+ self.init_telegram()
41
+
42
+ def check_connectivity(self):
43
+ req_me = requests.get(f"https://api.telegram.org/bot{self.bot_token}/getMe")
44
+ if req_me.status_code != 200:
45
+ raise Exception(
46
+ f"getMe request wasn't successful: {req_me.status_code} {req_me.text}"
47
+ )
48
+ obj = req_me.json()
49
+ assert obj["ok"]
50
+ if self.args.debug:
51
+ print(f"connection successful! {obj}")
52
+ self.bot_id = obj["result"]["id"]
53
+
54
+ def init_temp_db(self):
55
+ self.db_conn = sqlite3.connect(self.temp_db_path)
56
+ self.db_conn.row_factory = sqlite3.Row
41
57
 
42
- def init_tg(self):
43
- api_id, api_hash = self.get_api_credentials()
44
- self.client = TelegramClient(
45
- os.path.join(self.chgksuite_dir, self.args.tgaccount), api_id, api_hash
58
+ cursor = self.db_conn.cursor()
59
+
60
+ cursor.execute("""
61
+ CREATE TABLE IF NOT EXISTS messages (
62
+ raw_data TEXT,
63
+ chat_id TEXT,
64
+ created_at TEXT
65
+ )
66
+ """)
67
+
68
+ cursor.execute("""
69
+ CREATE TABLE IF NOT EXISTS bot_status (
70
+ raw_data TEXT,
71
+ created_at TEXT
46
72
  )
47
- self.client.start()
48
- me = self.client.get_me()
49
- self.logger.debug(f"Logged in as {me.username or me.first_name}")
73
+ """)
74
+
75
+ self.db_conn.commit()
76
+
77
+ def init_telegram(self):
78
+ """Initialize Telegram API connection and start sidecar bot."""
79
+ self.bot_token = self.get_api_credentials()
80
+ assert self.bot_token is not None
81
+
82
+ self.init_temp_db()
83
+ self.init_resolve_db()
84
+ self.check_connectivity()
85
+
86
+ # Start the sidecar bot as a daemon thread
87
+ if self.args.debug:
88
+ print(f"Starting sidecar bot with DB at {self.temp_db_path}")
89
+ self.bot_thread = run_bot_in_thread(self.bot_token, self.temp_db_path)
90
+ cur = self.db_conn.cursor()
91
+ while True:
92
+ time.sleep(2)
93
+ messages = cur.execute(
94
+ "select raw_data, created_at from bot_status"
95
+ ).fetchall()
96
+ if messages and json.loads(messages[0][0])["status"] == "ok":
97
+ break
98
+ # Request user authentication
99
+ self.authenticate_user()
100
+
101
+ def authenticate_user(self):
102
+ print("\n" + "=" * 50)
103
+ print(f"Please send the following code to the bot: {self.auth_uuid}")
104
+ print("This is for security validation.")
105
+ print("=" * 50 + "\n")
106
+
107
+ # Wait for authentication
108
+ retry_count = 0
109
+ SLEEP = 2
110
+ max_retries = 300 / SLEEP # 5 minutes
111
+
112
+ while not self.control_chat_id and retry_count < max_retries:
113
+ time.sleep(2)
114
+ cursor = self.db_conn.cursor()
115
+ cursor.execute(
116
+ f"SELECT * FROM messages m WHERE m.raw_data like '%{self.auth_uuid}%' ORDER BY m.created_at DESC LIMIT 1",
117
+ )
118
+ result = cursor.fetchone()
119
+
120
+ if result:
121
+ msg_data = json.loads(result["raw_data"])
122
+ self.control_chat_id = msg_data["message"]["chat"]["id"]
123
+ self.send_api_request(
124
+ "sendMessage",
125
+ {
126
+ "chat_id": self.control_chat_id,
127
+ "text": "✅ Authentication successful! This chat will be used for control messages.",
128
+ },
129
+ )
130
+
131
+ retry_count += 1
132
+
133
+ if not self.control_chat_id:
134
+ self.logger.error("Authentication timeout. Please try again.")
135
+ raise Exception("Authentication failed")
50
136
 
51
137
  def structure_has_stats(self):
52
138
  for element in self.structure:
@@ -54,45 +140,136 @@ class TelegramExporter(BaseExporter):
54
140
  return True
55
141
  return False
56
142
 
57
- def get_message_link(self, message, channel=None):
58
- if not channel:
59
- channel = self.client.get_entity(message.peer_id)
143
+ def get_bot_token(self, tg):
144
+ if self.args.tgaccount == "my_account":
60
145
 
61
- # Determine if the channel is public (has a username)
62
- if hasattr(channel, "username") and channel.username:
63
- # Public channel with username
64
- return f"https://t.me/{channel.username}/{message.id}"
146
+ def _getter(x):
147
+ return x["bot_token"]
148
+ else:
149
+
150
+ def _getter(x):
151
+ return x["bot_tokens"][self.args.tgaccount]
152
+
153
+ try:
154
+ return _getter(tg)
155
+ except KeyError:
156
+ bot_token = input("Please paste your bot token:").strip()
157
+
158
+ if self.args.tgaccount == "my_account":
159
+
160
+ def _setter(x, y):
161
+ x["bot_token"] = y
65
162
  else:
66
- # Private channel, use channel ID
67
- channel_id_str = str(channel.id)
68
- # Remove -100 prefix if present (common in Telethon)
69
- if channel_id_str.startswith("-100"):
70
- channel_id_str = channel_id_str[4:]
71
- return f"https://t.me/c/{channel_id_str}/{message.id}"
163
+
164
+ def _setter(x, y):
165
+ if "bot_tokens" not in y:
166
+ x["bot_tokens"] = {}
167
+ x["bot_tokens"][self.args.tgaccount] = y
168
+
169
+ _setter(tg, bot_token)
170
+ self.save_tg(tg)
171
+ return bot_token
72
172
 
73
173
  def get_api_credentials(self):
174
+ """Get or create bot token and channel/discussion IDs from telegram.toml"""
74
175
  settings = load_settings()
75
- telegram_toml_file_path = os.path.join(self.chgksuite_dir, "telegram.toml")
76
- if os.path.exists(telegram_toml_file_path) and not self.args.reset_api:
77
- with open(telegram_toml_file_path, "r", encoding="utf8") as f:
176
+
177
+ if (
178
+ settings.get("stop_if_no_stats")
179
+ and not self.structure_has_stats()
180
+ and not os.environ.get("CHGKSUITE_BYPASS_STATS_CHECK")
181
+ ):
182
+ raise Exception("don't publish questions without stats")
183
+
184
+ if os.path.exists(self.telegram_toml_path):
185
+ with open(self.telegram_toml_path, "r", encoding="utf8") as f:
78
186
  tg = toml.load(f)
79
- if (
80
- settings.get("stop_if_no_stats")
81
- and not self.structure_has_stats()
82
- and not os.environ.get("CHGKSUITE_BYPASS_STATS_CHECK")
83
- ):
84
- raise Exception("don't publish questions without stats")
85
- return tg["api_id"], tg["api_hash"]
86
187
  else:
87
- print("Please enter you api_id and api_hash.")
88
- print(
89
- "Go to https://my.telegram.org/apps, register an app and paste the credentials here."
90
- )
91
- api_id = input("Enter your api_id: ").strip()
92
- api_hash = input("Enter your api_hash: ").strip()
93
- with open(telegram_toml_file_path, "w", encoding="utf8") as f:
94
- toml.dump({"api_id": api_id, "api_hash": api_hash}, f)
95
- return api_id, api_hash
188
+ tg = {}
189
+ return self.get_bot_token(tg)
190
+
191
+ def save_tg(self, tg):
192
+ self.logger.info(f"saving {tg}")
193
+ with open(self.telegram_toml_path, "w", encoding="utf8") as f:
194
+ toml.dump(tg, f)
195
+
196
+ def send_api_request(self, method, data=None, files=None):
197
+ """Send a request to the Telegram Bot API."""
198
+ url = f"https://api.telegram.org/bot{self.bot_token}/{method}"
199
+
200
+ try:
201
+ if files:
202
+ response = requests.post(url, data=data, files=files, timeout=60)
203
+ else:
204
+ response = requests.post(url, json=data, timeout=30)
205
+
206
+ response_data = response.json()
207
+
208
+ if not response_data.get("ok"):
209
+ error_message = response_data.get("description", "Unknown error")
210
+ self.logger.error(f"Telegram API error: {error_message}")
211
+
212
+ # Handle rate limiting
213
+ if "retry_after" in response_data:
214
+ retry_after = response_data["retry_after"]
215
+ self.logger.info(f"Rate limited. Waiting for {retry_after} seconds")
216
+ time.sleep(retry_after + 1)
217
+ return self.send_api_request(method, data, files)
218
+
219
+ raise Exception(f"Telegram API error: {error_message}")
220
+
221
+ return response_data["result"]
222
+ except requests.exceptions.RequestException as e:
223
+ self.logger.error(f"Request error: {e}")
224
+ raise
225
+
226
+ def get_message_link(self, chat_id, message_id, username=None):
227
+ """Generate a link to a Telegram message."""
228
+ if username:
229
+ # Public channel with username
230
+ return f"https://t.me/{username}/{message_id}"
231
+ else:
232
+ # Private channel, use channel ID
233
+ channel_id_str = str(chat_id)
234
+ # Remove -100 prefix if present
235
+ if channel_id_str.startswith("-100"):
236
+ channel_id_str = channel_id_str[4:]
237
+ return f"https://t.me/c/{channel_id_str}/{message_id}"
238
+
239
+ def extract_id_from_link(self, link) -> int | str | None:
240
+ """
241
+ Extract channel or chat ID from a Telegram link.
242
+ Examples:
243
+ - https://t.me/c/1234567890/123 -> 1234567890
244
+ - https://t.me/joinchat/CkzknkZnxkZkZWM0 -> None (not supported)
245
+ - -1001234567890 -> 1234567890
246
+ - @username -> (username, None) # Returns username for resolution later
247
+ """
248
+ if link is None:
249
+ return None
250
+
251
+ if tryint(link) and link.startswith("-100"):
252
+ return int(link[4:])
253
+ elif tryint(link):
254
+ return int(link)
255
+
256
+ # Handle username format
257
+ if link.startswith("@"):
258
+ return link[1:]
259
+
260
+ # Handle URL format for private channels (with numeric ID)
261
+ link_pattern = r"https?://t\.me/c/(\d+)"
262
+ match = re.search(link_pattern, link)
263
+ if match:
264
+ return int(match.group(1))
265
+
266
+ # Handle URL format for public channels (with username)
267
+ public_pattern = r"https?://t\.me/([^/]+)"
268
+ match = re.search(public_pattern, link)
269
+ if match:
270
+ return match.group(1)
271
+
272
+ return link
96
273
 
97
274
  def tgyapper(self, e):
98
275
  if isinstance(e, str):
@@ -166,6 +343,7 @@ class TelegramExporter(BaseExporter):
166
343
 
167
344
  @classmethod
168
345
  def prepare_image_for_telegram(cls, imgfile):
346
+ """Prepare an image for uploading to Telegram (resize if needed)."""
169
347
  img = Image.open(imgfile)
170
348
  width, height = img.size
171
349
  file_size = os.path.getsize(imgfile)
@@ -250,111 +428,146 @@ class TelegramExporter(BaseExporter):
250
428
  return res, images
251
429
 
252
430
  def _post(self, chat_id, text, photo, reply_to_message_id=None):
253
- self.logger.info(f"Posting message `{text}`")
254
- if photo:
255
- if not text:
256
- caption = ""
257
- elif text == "---":
258
- caption = "--"
431
+ """Send a message to Telegram using API requests."""
432
+ self.logger.info(f"Posting message: {text[:50]}...")
433
+
434
+ try:
435
+ if photo:
436
+ # Step 1: Upload the photo first
437
+ with open(photo, "rb") as photo_file:
438
+ files = {"photo": photo_file}
439
+ caption = "" if not text else ("---" if text != "---" else "--")
440
+
441
+ data = {
442
+ "chat_id": chat_id,
443
+ "caption": caption,
444
+ "parse_mode": "HTML",
445
+ "disable_notification": True,
446
+ }
447
+
448
+ if reply_to_message_id:
449
+ data["reply_to_message_id"] = reply_to_message_id
450
+
451
+ result = self.send_api_request("sendPhoto", data, files)
452
+ msg_id = result["message_id"]
453
+
454
+ # Step 2: Edit the message if needed to add full text
455
+ if text and text != "---":
456
+ time.sleep(2) # Slight delay before editing
457
+ edit_data = {
458
+ "chat_id": chat_id,
459
+ "message_id": msg_id,
460
+ "caption": text,
461
+ "parse_mode": "HTML",
462
+ "disable_web_page_preview": True,
463
+ }
464
+ result = self.send_api_request("editMessageCaption", edit_data)
465
+
466
+ return {"message_id": msg_id, "chat": {"id": chat_id}}
259
467
  else:
260
- caption = "---"
261
- msg = self.client.send_file(
262
- chat_id,
263
- photo,
264
- caption=caption,
265
- parse_mode=CustomHtmlParser,
266
- reply_to=reply_to_message_id,
267
- silent=True,
268
- )
269
- if text:
270
- time.sleep(2)
271
- msg = self.client.edit_message(
272
- chat_id,
273
- msg.id,
274
- text=text,
275
- parse_mode=CustomHtmlParser,
276
- link_preview=False,
277
- )
278
- else:
279
- msg = self.client.send_message(
280
- chat_id,
281
- text,
282
- parse_mode=CustomHtmlParser,
283
- link_preview=False,
284
- reply_to=reply_to_message_id,
285
- silent=True,
286
- )
287
- return msg
288
-
289
- def __post(self, *args, **kwargs):
290
- retries = 0
291
- while retries <= 2:
292
- try:
293
- return self._post(*args, **kwargs)
294
- except errors.FloodWaitError as e:
295
- secs_to_wait = e.seconds + 30
296
- self.logger.error(
297
- f"Telegram thinks we are spammers, waiting for {secs_to_wait} seconds"
298
- )
299
- time.sleep(secs_to_wait)
300
- retries += 1
468
+ # Simple text message
469
+ data = {
470
+ "chat_id": chat_id,
471
+ "text": text,
472
+ "parse_mode": "HTML",
473
+ "disable_web_page_preview": True,
474
+ "disable_notification": True,
475
+ }
476
+
477
+ if reply_to_message_id:
478
+ data["reply_to_message_id"] = reply_to_message_id
479
+
480
+ result = self.send_api_request("sendMessage", data)
481
+ return {"message_id": result["message_id"], "chat": {"id": chat_id}}
482
+
483
+ except Exception as e:
484
+ self.logger.error(f"Error posting message: {str(e)}")
485
+ raise
301
486
 
302
487
  def post(self, posts):
488
+ """Post a series of messages, handling the channel and discussion group."""
303
489
  if self.args.dry_run:
304
- self.logger.info("skipping posting due to dry run")
490
+ self.logger.info("Skipping posting due to dry run")
305
491
  for post in posts:
306
492
  self.logger.info(post)
307
493
  return
494
+
308
495
  messages = []
309
496
  text, im = posts[0]
310
- root_msg = self.__post(
497
+
498
+ # Step 1: Post the root message to the channel
499
+ root_msg = self._post(
311
500
  self.channel_id,
312
501
  self.labels["general"]["handout_for_question"].format(text[3:])
313
502
  if text.startswith("QQQ")
314
503
  else text,
315
504
  im,
316
505
  )
317
- if (
318
- len(posts) >= 2 and text.startswith("QQQ") and im and posts[1][0]
319
- ): # crutch for case when the question doesn't fit without image
506
+
507
+ # Handle special case for questions with images
508
+ if len(posts) >= 2 and text.startswith("QQQ") and im and posts[1][0]:
320
509
  prev_root_msg = root_msg
321
- root_msg = self.__post(self.channel_id, posts[1][0], posts[1][1])
510
+ root_msg = self._post(self.channel_id, posts[1][0], posts[1][1])
322
511
  posts = posts[1:]
323
512
  messages.append(root_msg)
324
513
  messages.append(prev_root_msg)
514
+
325
515
  time.sleep(2.1)
326
516
 
327
- result = self.client(
328
- GetDiscussionMessageRequest(peer=self.channel_entity, msg_id=root_msg.id)
517
+ # Step 2: Wait for the message to appear in the discussion group
518
+ root_msg_in_discussion_id = self.get_discussion_message(
519
+ self.channel_id, root_msg["message_id"]
329
520
  )
330
- root_msg_in_chat = result.messages[0]
331
521
 
332
- root_msg_link = self.get_message_link(root_msg, self.channel_entity)
333
- root_msg_in_chat_link = self.get_message_link(root_msg_in_chat, self.chat_entity)
522
+ if not root_msg_in_discussion_id:
523
+ self.logger.error("Failed to find discussion message")
524
+ return
525
+
526
+ root_msg_in_discussion = {
527
+ "message_id": root_msg_in_discussion_id,
528
+ "chat": {"id": self.chat_id},
529
+ }
530
+
531
+ # Create message links
532
+ root_msg_link = self.get_message_link(self.channel_id, root_msg["message_id"])
533
+ root_msg_in_discussion_link = self.get_message_link(
534
+ self.chat_id, root_msg_in_discussion_id
535
+ )
334
536
 
335
537
  self.logger.info(
336
- f"Posted message {root_msg_link} ({root_msg_in_chat_link} in chat)"
538
+ f"Posted message {root_msg_link} ({root_msg_in_discussion_link} in discussion group)"
337
539
  )
540
+
338
541
  time.sleep(random.randint(5, 7))
542
+
339
543
  if root_msg not in messages:
340
544
  messages.append(root_msg)
341
- messages.append(root_msg_in_chat)
545
+ messages.append(root_msg_in_discussion)
546
+
547
+ # Step 3: Post replies in the discussion group
342
548
  for post in posts[1:]:
343
549
  text, im = post
344
- reply_msg = self.__post(
345
- self.chat_id, text, im, reply_to_message_id=root_msg_in_chat.id
550
+ reply_msg = self._post(
551
+ self.chat_id,
552
+ text,
553
+ im,
554
+ reply_to_message_id=root_msg_in_discussion_id,
346
555
  )
347
556
  self.logger.info(
348
- f"Replied to message {root_msg_in_chat_link} with reply message"
557
+ f"Replied to message {root_msg_in_discussion_link} with reply message"
349
558
  )
350
559
  time.sleep(random.randint(5, 7))
351
560
  messages.append(reply_msg)
561
+
352
562
  return messages
353
563
 
354
564
  def post_wrapper(self, posts):
565
+ """Wrapper for post() that handles section links."""
355
566
  messages = self.post(posts)
356
- if self.section and not self.args.dry_run:
357
- self.section_links.append(self.get_message_link(messages[0]))
567
+ if messages and self.section and not self.args.dry_run:
568
+ self.section_links.append(
569
+ self.get_message_link(self.channel_id, messages[0]["message_id"])
570
+ )
358
571
  self.section = False
359
572
 
360
573
  def tg_process_element(self, pair):
@@ -410,20 +623,20 @@ class TelegramExporter(BaseExporter):
410
623
  list_ = [
411
624
  x.strip()
412
625
  for x in list_
413
- if not x.startswith(("\n</spoiler>", "\n<spoiler>"))
626
+ if not x.startswith(("\n</tg-spoiler>", "\n<tg-spoiler>"))
414
627
  ]
415
628
  if lb_after_first:
416
629
  list_[0] = list_[0] + "\n"
417
630
  res = "\n".join(list_)
418
- res = res.replace("\n</spoiler>\n", "\n</spoiler>")
419
- res = res.replace("\n<spoiler>\n", "\n<spoiler>")
631
+ res = res.replace("\n</tg-spoiler>\n", "\n</tg-spoiler>")
632
+ res = res.replace("\n<tg-spoiler>\n", "\n<tg-spoiler>")
420
633
  while res.endswith("\n"):
421
634
  res = res[:-1]
422
- if res.endswith("\n</spoiler>"):
423
- res = res[:-3] + "</spoiler>"
635
+ if res.endswith("\n</tg-spoiler>"):
636
+ res = res[:-3] + "</tg-spoiler>"
424
637
  if self.args.nospoilers:
425
- res = res.replace("<spoiler>", "")
426
- res = res.replace("</spoiler>", "")
638
+ res = res.replace("<tg-spoiler>", "")
639
+ res = res.replace("</tg-spoiler>", "")
427
640
  res = res.replace("`", "'") # hack so spoilers don't break
428
641
  return res
429
642
 
@@ -454,9 +667,9 @@ class TelegramExporter(BaseExporter):
454
667
  threshold_ = threshold - 3
455
668
  chunk = texts[0][:threshold_]
456
669
  rest = texts[0][threshold_:]
457
- if texts[0].endswith("</spoiler>"):
458
- chunk += "</spoiler>"
459
- rest = "<spoiler>" + rest
670
+ if texts[0].endswith("</tg-spoiler>"):
671
+ chunk += "</tg-spoiler>"
672
+ rest = "<tg-spoiler>" + rest
460
673
  texts[0] = rest
461
674
  return chunk, im, texts, images
462
675
 
@@ -474,11 +687,11 @@ class TelegramExporter(BaseExporter):
474
687
  if self.args.nospoilers:
475
688
  res = s_
476
689
  elif t == "both":
477
- res = "<spoiler>" + s_ + "</spoiler>"
690
+ res = "<tg-spoiler>" + s_ + "</tg-spoiler>"
478
691
  elif t == "left":
479
- res = "<spoiler>" + s_
692
+ res = "<tg-spoiler>" + s_
480
693
  elif t == "right":
481
- res = s_ + "</spoiler>"
694
+ res = s_ + "</tg-spoiler>"
482
695
  return res
483
696
 
484
697
  @staticmethod
@@ -613,50 +826,112 @@ class TelegramExporter(BaseExporter):
613
826
  return tryint(str_)
614
827
 
615
828
  def export(self):
829
+ """Main export function to send the structure to Telegram."""
616
830
  self.section_links = []
617
831
  self.buffer_texts = []
618
832
  self.buffer_images = []
619
833
  self.section = False
620
834
 
621
- # Find channel and chat
622
- self.channel_entity = None
623
- self.chat_entity = None
624
-
625
- if self.is_valid_tg_identifier(
626
- self.args.tgchannel
627
- ) and self.is_valid_tg_identifier(self.args.tgchat):
628
- self.channel_id = self.is_valid_tg_identifier(self.args.tgchannel)
629
- self.chat_id = self.is_valid_tg_identifier(self.args.tgchat)
630
- self.channel_entity = InputChannel(self.channel_id, 0)
631
- self.chat_entity = InputChannel(self.chat_id, 0)
835
+ if not self.args.tgchannel or not self.args.tgchat:
836
+ raise Exception("Please provide channel and chat links or IDs.")
837
+
838
+ # Try to extract IDs from links or direct ID inputs
839
+ channel_result = self.extract_id_from_link(self.args.tgchannel)
840
+ chat_result = self.extract_id_from_link(self.args.tgchat)
841
+
842
+ # Handle channel resolution
843
+ if isinstance(channel_result, int):
844
+ channel_id = channel_result
845
+ elif isinstance(channel_result, str):
846
+ channel_id = self.resolve_username_to_id(channel_result)
847
+ if not channel_id:
848
+ print("\n" + "=" * 50)
849
+ print("Please forward any message from the target channel to the bot.")
850
+ print("This will allow me to extract the channel ID automatically.")
851
+ print("=" * 50 + "\n")
852
+
853
+ # Wait for a forwarded message with channel information
854
+ channel_id = self.wait_for_forwarded_message(
855
+ entity_type="channel", check_type=True, string_id=channel_result
856
+ )
857
+ if channel_id:
858
+ self.save_username(channel_result, channel_id)
859
+ else:
860
+ raise Exception("Failed to get channel ID from forwarded message")
632
861
  else:
633
- # Get dialogs and find the channel and chat by title
634
- dialogs = self.client.get_dialogs()
635
- for dialog in dialogs:
636
- if (dialog.title or "").strip() == self.args.tgchannel.strip():
637
- self.channel_entity = dialog.entity
638
- self.channel_id = dialog.id
639
- if (dialog.title or "").strip() == self.args.tgchat.strip():
640
- self.chat_entity = dialog.entity
641
- self.chat_id = dialog.id
642
- if self.channel_entity is not None and self.chat_entity is not None:
643
- break
862
+ raise Exception("Channel ID is undefined")
863
+ # Handle chat resolution
864
+ if isinstance(chat_result, int):
865
+ chat_id = chat_result
866
+ elif isinstance(chat_result, str):
867
+ chat_id = self.resolve_username_to_id(chat_result)
868
+ if not chat_id:
869
+ print("\n" + "=" * 50)
870
+ print(
871
+ "Please forward any message from the discussion group to the bot."
872
+ )
873
+ print("This will allow me to extract the group ID automatically.")
874
+ print("=" * 50 + "\n")
644
875
 
645
- if not self.channel_entity:
646
- raise Exception("Channel not found, please check provided name")
647
- if not self.chat_entity:
648
- raise Exception("Linked chat not found, please check provided name")
876
+ # Wait for a forwarded message with chat information
877
+ chat_id = self.wait_for_forwarded_message(
878
+ entity_type="chat", check_type=False, string_id=chat_result
879
+ )
880
+ if not chat_id:
881
+ self.logger.error("Failed to get chat ID from forwarded message")
882
+ return False
883
+ while chat_id == channel_id:
884
+ error_msg = (
885
+ "Chat ID and channel ID are the same. The problem may be that "
886
+ "you forwarded a message from discussion group that itself was automatically forwarded "
887
+ "from the channel by Telegram. Please forward a message that was sent directly in the discussion group."
888
+ )
889
+ self.logger.error(error_msg)
890
+ chat_id = self.wait_for_forwarded_message(
891
+ entity_type="chat",
892
+ check_type=False,
893
+ add_msg=error_msg,
894
+ string_id=chat_result,
895
+ )
896
+ if chat_id:
897
+ self.save_username(chat_result, chat_id)
898
+ else:
899
+ raise Exception("Chat ID is undefined")
900
+
901
+ if not channel_id:
902
+ raise Exception("Channel ID is undefined")
903
+ if not chat_id:
904
+ raise Exception("Chat ID is undefined")
905
+
906
+ self.channel_id = f"-100{channel_id}"
907
+ self.chat_id = f"-100{chat_id}"
908
+
909
+ self.logger.info(
910
+ f"Using channel ID {self.channel_id} and discussion group ID {self.chat_id}"
911
+ )
912
+
913
+ channel_access = self.verify_access(self.channel_id, hr_type="channel")
914
+ chat_access = self.verify_access(self.chat_id, hr_type="chat")
915
+ if not (channel_access and chat_access):
916
+ bad = []
917
+ if not channel_access:
918
+ bad.append("channel")
919
+ if not chat_access:
920
+ bad.append("discussion group")
921
+ raise Exception(f"The bot doesn't have access to {' and '.join(bad)}")
649
922
 
650
923
  # Process all elements
651
924
  for pair in self.structure:
652
925
  self.tg_process_element(pair)
653
926
 
927
+ # Handle any remaining buffer
654
928
  if self.buffer_texts or self.buffer_images:
655
929
  posts = self.split_to_messages(self.buffer_texts, self.buffer_images)
656
930
  self.post_wrapper(posts)
657
931
  self.buffer_texts = []
658
932
  self.buffer_images = []
659
933
 
934
+ # Create and pin navigation message with links to sections
660
935
  if not self.args.skip_until:
661
936
  navigation_text = [self.labels["general"]["general_impressions_text"]]
662
937
  if self.tg_heading:
@@ -669,10 +944,267 @@ class TelegramExporter(BaseExporter):
669
944
  f"{self.labels['general']['section']} {i + 1}: {link}"
670
945
  )
671
946
  navigation_text = "\n".join(navigation_text)
672
- messages = self.post([(navigation_text.strip(), None)])
947
+
948
+ # Post the navigation message
673
949
  if not self.args.dry_run:
674
- self.client.pin_message(
675
- self.channel_entity,
676
- messages[0].id,
677
- notify=False,
678
- )
950
+ message = self._post(self.channel_id, navigation_text.strip(), None)
951
+
952
+ # Pin the message
953
+ try:
954
+ self.send_api_request(
955
+ "pinChatMessage",
956
+ {
957
+ "chat_id": self.channel_id,
958
+ "message_id": message["message_id"],
959
+ "disable_notification": True,
960
+ },
961
+ )
962
+ except Exception as e:
963
+ self.logger.error(f"Failed to pin message: {str(e)}")
964
+ return True
965
+
966
+ def init_resolve_db(self):
967
+ if not os.path.exists(self.resolve_db_path):
968
+ self.resolve_db_conn = sqlite3.connect(self.resolve_db_path)
969
+ self.resolve_db_conn.execute(
970
+ "CREATE TABLE IF NOT EXISTS resolve (username TEXT PRIMARY KEY, id INTEGER)"
971
+ )
972
+ self.resolve_db_conn.commit()
973
+ else:
974
+ self.resolve_db_conn = sqlite3.connect(self.resolve_db_path)
975
+
976
+ def resolve_username_to_id(self, username):
977
+ assert username is not None
978
+ cur = self.resolve_db_conn.cursor()
979
+ cur.execute("SELECT id FROM resolve WHERE username = ?", (username,))
980
+ res = cur.fetchone()
981
+ if res:
982
+ return res[0]
983
+ return None
984
+
985
+ def save_username(self, username, id_):
986
+ assert username is not None
987
+ assert id_ is not None
988
+ self.logger.info(f"Saving username {username} as ID {id_}")
989
+ cur = self.resolve_db_conn.cursor()
990
+ cur.execute("INSERT INTO resolve (username, id) VALUES (?, ?)", (username, id_))
991
+ self.resolve_db_conn.commit()
992
+
993
+ def get_discussion_message(self, channel_id, message_id):
994
+ """
995
+ Find the corresponding message in the discussion group for a channel message.
996
+ Returns the message_id in the discussion group.
997
+ """
998
+ # Format the channel ID correctly for comparison
999
+ if not str(channel_id).startswith("-100"):
1000
+ formatted_channel_id = f"-100{channel_id}"
1001
+ else:
1002
+ formatted_channel_id = str(channel_id)
1003
+
1004
+ search_channel_id = int(formatted_channel_id)
1005
+
1006
+ self.logger.info(
1007
+ f"Looking for discussion message for channel post {message_id}"
1008
+ )
1009
+
1010
+ # Wait for the message to appear in the discussion group
1011
+ retry_count = 0
1012
+ max_retries = 30
1013
+
1014
+ while retry_count < max_retries:
1015
+ # Query database for recent messages that might be our discussion message
1016
+ cursor = self.db_conn.cursor()
1017
+ cursor.execute(
1018
+ """
1019
+ SELECT raw_data
1020
+ FROM messages
1021
+ WHERE chat_id = ? AND created_at > datetime('now', '-5 minutes')
1022
+ ORDER BY created_at DESC
1023
+ LIMIT 20
1024
+ """,
1025
+ (self.chat_id,),
1026
+ )
1027
+
1028
+ messages = cursor.fetchall()
1029
+
1030
+ for msg_row in messages:
1031
+ try:
1032
+ msg_data = json.loads(msg_row["raw_data"])
1033
+
1034
+ # Check if this is a forwarded message from our channel
1035
+ if (
1036
+ "message" in msg_data
1037
+ and "forward_from_chat" in msg_data["message"]
1038
+ ):
1039
+ forward_info = msg_data["message"]["forward_from_chat"]
1040
+ forward_msg_id = msg_data["message"].get(
1041
+ "forward_from_message_id"
1042
+ )
1043
+ self.logger.info(
1044
+ f"forward_msg_id: {forward_msg_id}, forward_id: {forward_info.get('id')}, search_channel_id: {search_channel_id}, message_id: {message_id}"
1045
+ )
1046
+ # Check if this matches our original message
1047
+ if (
1048
+ forward_info.get("id") == search_channel_id
1049
+ and forward_msg_id == message_id
1050
+ ):
1051
+ discussion_msg_id = msg_data["message"]["message_id"]
1052
+ self.logger.info(
1053
+ f"Found discussion message {discussion_msg_id} for channel post {message_id}"
1054
+ )
1055
+ return discussion_msg_id
1056
+ except Exception as e:
1057
+ self.logger.error(f"Error parsing message: {e}")
1058
+ continue
1059
+
1060
+ retry_count += 1
1061
+ time.sleep(3)
1062
+
1063
+ self.logger.error(
1064
+ f"Could not find discussion message for channel message {message_id}"
1065
+ )
1066
+ return None
1067
+
1068
+ def wait_for_forwarded_message(
1069
+ self, entity_type="channel", check_type=True, add_msg=None
1070
+ ):
1071
+ """
1072
+ Wait for the user to forward a message from a channel or chat to extract its ID.
1073
+
1074
+ Args:
1075
+ entity_type (str): "channel" or "chat" - used for proper prompting
1076
+ check_type (bool): Whether to check if the forwarded message is from a channel
1077
+
1078
+ Returns the numeric ID without the -100 prefix.
1079
+ """
1080
+
1081
+ # Customize messages based on entity type
1082
+ if entity_type == "channel":
1083
+ entity_name = "channel"
1084
+ instruction_message = (
1085
+ "🔄 Please forward any message from the target channel"
1086
+ )
1087
+ success_message = "✅ Successfully extracted channel ID: {}"
1088
+ failure_message = "❌ Failed to extract channel ID."
1089
+ else:
1090
+ entity_name = "discussion group"
1091
+ instruction_message = "🔄 Please forward any message from the discussion group\n\n⚠️ IMPORTANT: Do NOT forward messages that were automatically posted from the channel. Forward messages that were sent directly in the discussion group."
1092
+ success_message = "✅ Successfully extracted discussion group ID: {}"
1093
+ failure_message = "❌ Failed to extract discussion group ID."
1094
+
1095
+ if add_msg:
1096
+ instruction_message = add_msg + "\n\n" + instruction_message
1097
+
1098
+ # Send instructions to the user
1099
+ self.send_api_request(
1100
+ "sendMessage",
1101
+ {"chat_id": self.control_chat_id, "text": instruction_message},
1102
+ )
1103
+
1104
+ # Wait for a forwarded message
1105
+ resolved = False
1106
+ retry_count = 0
1107
+ max_retries = 30 # 5 minutes (10 seconds per retry)
1108
+
1109
+ # Extract channel ID for comparison if we're looking for a discussion group
1110
+ channel_numeric_id = None
1111
+ if entity_type == "chat" and self.channel_id:
1112
+ if str(self.channel_id).startswith("-100"):
1113
+ channel_numeric_id = int(str(self.channel_id)[4:])
1114
+
1115
+ while not resolved and retry_count < max_retries:
1116
+ time.sleep(10) # Check every 10 seconds
1117
+
1118
+ # Look for a forwarded message in recent messages
1119
+ cursor = self.db_conn.cursor()
1120
+ cursor.execute(
1121
+ """
1122
+ SELECT raw_data, created_at
1123
+ FROM messages
1124
+ WHERE created_at > datetime('now', '-2 minutes')
1125
+ ORDER BY created_at DESC
1126
+ """
1127
+ )
1128
+
1129
+ messages = cursor.fetchall()
1130
+
1131
+ for row in messages:
1132
+ if self.created_at and row["created_at"] < self.created_at:
1133
+ break
1134
+ msg_data = json.loads(row["raw_data"])
1135
+ if msg_data["message"]["chat"]["id"] != self.control_chat_id:
1136
+ continue
1137
+ if "message" in msg_data and "forward_from_chat" in msg_data["message"]:
1138
+ forward_info = msg_data["message"]["forward_from_chat"]
1139
+
1140
+ # Extract chat ID from the message
1141
+ chat_id = forward_info.get("id")
1142
+ # Remove -100 prefix if present
1143
+ if str(chat_id).startswith("-100"):
1144
+ extracted_id = int(str(chat_id)[4:])
1145
+ else:
1146
+ extracted_id = chat_id
1147
+
1148
+ # If we're looking for a discussion group, verify it's not the same as the channel ID
1149
+ if entity_type == "chat" and channel_numeric_id:
1150
+ if extracted_id == channel_numeric_id:
1151
+ self.logger.warning(
1152
+ "User forwarded a message from the channel, not the discussion group"
1153
+ )
1154
+ self.send_api_request(
1155
+ "sendMessage",
1156
+ {
1157
+ "chat_id": self.control_chat_id,
1158
+ "text": "⚠️ You forwarded a message from the channel, not from the discussion group.\n\nPlease forward a message that was originally sent IN the discussion group, not an automatic repost from the channel.",
1159
+ },
1160
+ )
1161
+ # Skip this message and continue waiting
1162
+ continue
1163
+
1164
+ # For channels, check the type; for chats, accept any type except "channel" if check_type is False
1165
+ if (check_type and forward_info.get("type") == "channel") or (
1166
+ not check_type
1167
+ ):
1168
+ resolved = True
1169
+ self.created_at = row["created_at"]
1170
+ self.logger.info(
1171
+ f"Extracted {entity_name} ID: {extracted_id} from forwarded message"
1172
+ )
1173
+
1174
+ # Send confirmation message
1175
+ self.send_api_request(
1176
+ "sendMessage",
1177
+ {
1178
+ "chat_id": self.control_chat_id,
1179
+ "text": success_message.format(extracted_id),
1180
+ },
1181
+ )
1182
+
1183
+ return extracted_id
1184
+
1185
+ retry_count += 1
1186
+
1187
+ print(f"Waiting for forwarded message... ({retry_count}/{max_retries})")
1188
+
1189
+ if not resolved:
1190
+ self.logger.error(
1191
+ f"Failed to extract {entity_name} ID from forwarded message"
1192
+ )
1193
+ self.send_api_request(
1194
+ "sendMessage",
1195
+ {"chat_id": self.control_chat_id, "text": failure_message},
1196
+ )
1197
+ return None
1198
+
1199
+ def verify_access(self, telegram_id, hr_type=None):
1200
+ url = f"https://api.telegram.org/bot{self.bot_token}/getChatAdministrators"
1201
+ if not str(telegram_id).startswith("-100"):
1202
+ telegram_id = f"-100{telegram_id}"
1203
+ req = requests.post(url, data={"chat_id": telegram_id})
1204
+ if self.args.debug:
1205
+ print(req.status_code, req.text)
1206
+ if req.status_code != 200:
1207
+ raise Exception(f"Bot isn't added to {hr_type}")
1208
+ obj = req.json()
1209
+ admin_ids = {x["user"]["id"] for x in obj["result"]}
1210
+ return self.bot_id in admin_ids