koztv-blog-tools 1.2.9 → 1.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
package/dist/index.d.mts CHANGED
@@ -178,6 +178,18 @@ interface TelegramExportOptions {
178
178
  /** Callback when session string is generated (save this for future use) */
179
179
  onSession?: (session: string) => void;
180
180
  }
181
+ interface ForwardInfo {
182
+ /** Original channel/user ID */
183
+ fromId?: number;
184
+ /** Original channel username (if channel) */
185
+ fromUsername?: string;
186
+ /** Original channel/user name */
187
+ fromName?: string;
188
+ /** Original post ID in source channel */
189
+ postId?: number;
190
+ /** Original post date */
191
+ date?: Date;
192
+ }
181
193
  interface ExportedPost {
182
194
  msgId: number;
183
195
  date: Date;
@@ -190,6 +202,10 @@ interface ExportedPost {
190
202
  channelUsername: string;
191
203
  channelTitle: string;
192
204
  replyToMsgId?: number;
205
+ /** Message IDs merged into this post (for grouped posts) */
206
+ mergedMsgIds?: number[];
207
+ /** Forwarding info if this post was forwarded from another source */
208
+ fwdFrom?: ForwardInfo;
193
209
  }
194
210
  interface ExportResult {
195
211
  channelMeta: {
@@ -280,9 +296,34 @@ interface ExportAndTranslateResult {
280
296
  /** Session string for future use */
281
297
  session: string;
282
298
  }
299
+ /**
300
+ * Options for processing posts from intermediate files
301
+ */
302
+ interface ProcessFromFilesOptions {
303
+ /** Directory with intermediate export files (.telegram-export) */
304
+ exportDir: string;
305
+ /** Output directory for posts */
306
+ outputDir: string;
307
+ /** Translation config (optional) */
308
+ translate?: TranslationConfig;
309
+ /** Progress callback */
310
+ onProgress?: (message: string) => void;
311
+ /** Only process specific message IDs (if empty, process all) */
312
+ msgIds?: number[];
313
+ /** Force reprocess even if file exists */
314
+ force?: boolean;
315
+ }
316
+ /**
317
+ * Process posts from intermediate files (Step 2 only, no Telegram API)
318
+ * Use this after running exportTelegramChannel separately
319
+ */
320
+ declare function processFromFiles(options: ProcessFromFilesOptions): Promise<{
321
+ processed: number;
322
+ skipped: number;
323
+ }>;
283
324
  /**
284
325
  * Export posts from Telegram channel with optional translation
285
326
  */
286
327
  declare function exportAndTranslate(options: ExportAndTranslateOptions): Promise<ExportAndTranslateResult>;
287
328
 
288
- export { type AnalyticsConfig, type ExportAndTranslateOptions, type ExportAndTranslateResult, type ExportResult, type ExportedPost, type GoalName, type GoalParams, type GroupedPost, type ParsePostOptions, type Post, type TelegramExportOptions, type TranslateOptions, type TranslationConfig, categorizePost, cleanContent, configureAnalytics, deduplicatePosts, exportAndTranslate, exportTelegramChannel, extractAttachments, extractExcerpt, extractTitle, formatPostMarkdown, generateEnglishSlug, generateSlug, groupPosts, parsePost, resumeExport, trackBookAppointment, trackGoal, trackLearnMore, trackServiceClick, trackTelegramClick, translateContent, translateTitle };
329
+ export { type AnalyticsConfig, type ExportAndTranslateOptions, type ExportAndTranslateResult, type ExportResult, type ExportedPost, type GoalName, type GoalParams, type GroupedPost, type ParsePostOptions, type Post, type ProcessFromFilesOptions, type TelegramExportOptions, type TranslateOptions, type TranslationConfig, categorizePost, cleanContent, configureAnalytics, deduplicatePosts, exportAndTranslate, exportTelegramChannel, extractAttachments, extractExcerpt, extractTitle, formatPostMarkdown, generateEnglishSlug, generateSlug, groupPosts, parsePost, processFromFiles, resumeExport, trackBookAppointment, trackGoal, trackLearnMore, trackServiceClick, trackTelegramClick, translateContent, translateTitle };
package/dist/index.d.ts CHANGED
@@ -178,6 +178,18 @@ interface TelegramExportOptions {
178
178
  /** Callback when session string is generated (save this for future use) */
179
179
  onSession?: (session: string) => void;
180
180
  }
181
+ interface ForwardInfo {
182
+ /** Original channel/user ID */
183
+ fromId?: number;
184
+ /** Original channel username (if channel) */
185
+ fromUsername?: string;
186
+ /** Original channel/user name */
187
+ fromName?: string;
188
+ /** Original post ID in source channel */
189
+ postId?: number;
190
+ /** Original post date */
191
+ date?: Date;
192
+ }
181
193
  interface ExportedPost {
182
194
  msgId: number;
183
195
  date: Date;
@@ -190,6 +202,10 @@ interface ExportedPost {
190
202
  channelUsername: string;
191
203
  channelTitle: string;
192
204
  replyToMsgId?: number;
205
+ /** Message IDs merged into this post (for grouped posts) */
206
+ mergedMsgIds?: number[];
207
+ /** Forwarding info if this post was forwarded from another source */
208
+ fwdFrom?: ForwardInfo;
193
209
  }
194
210
  interface ExportResult {
195
211
  channelMeta: {
@@ -280,9 +296,34 @@ interface ExportAndTranslateResult {
280
296
  /** Session string for future use */
281
297
  session: string;
282
298
  }
299
+ /**
300
+ * Options for processing posts from intermediate files
301
+ */
302
+ interface ProcessFromFilesOptions {
303
+ /** Directory with intermediate export files (.telegram-export) */
304
+ exportDir: string;
305
+ /** Output directory for posts */
306
+ outputDir: string;
307
+ /** Translation config (optional) */
308
+ translate?: TranslationConfig;
309
+ /** Progress callback */
310
+ onProgress?: (message: string) => void;
311
+ /** Only process specific message IDs (if empty, process all) */
312
+ msgIds?: number[];
313
+ /** Force reprocess even if file exists */
314
+ force?: boolean;
315
+ }
316
+ /**
317
+ * Process posts from intermediate files (Step 2 only, no Telegram API)
318
+ * Use this after running exportTelegramChannel separately
319
+ */
320
+ declare function processFromFiles(options: ProcessFromFilesOptions): Promise<{
321
+ processed: number;
322
+ skipped: number;
323
+ }>;
283
324
  /**
284
325
  * Export posts from Telegram channel with optional translation
285
326
  */
286
327
  declare function exportAndTranslate(options: ExportAndTranslateOptions): Promise<ExportAndTranslateResult>;
287
328
 
288
- export { type AnalyticsConfig, type ExportAndTranslateOptions, type ExportAndTranslateResult, type ExportResult, type ExportedPost, type GoalName, type GoalParams, type GroupedPost, type ParsePostOptions, type Post, type TelegramExportOptions, type TranslateOptions, type TranslationConfig, categorizePost, cleanContent, configureAnalytics, deduplicatePosts, exportAndTranslate, exportTelegramChannel, extractAttachments, extractExcerpt, extractTitle, formatPostMarkdown, generateEnglishSlug, generateSlug, groupPosts, parsePost, resumeExport, trackBookAppointment, trackGoal, trackLearnMore, trackServiceClick, trackTelegramClick, translateContent, translateTitle };
329
+ export { type AnalyticsConfig, type ExportAndTranslateOptions, type ExportAndTranslateResult, type ExportResult, type ExportedPost, type GoalName, type GoalParams, type GroupedPost, type ParsePostOptions, type Post, type ProcessFromFilesOptions, type TelegramExportOptions, type TranslateOptions, type TranslationConfig, categorizePost, cleanContent, configureAnalytics, deduplicatePosts, exportAndTranslate, exportTelegramChannel, extractAttachments, extractExcerpt, extractTitle, formatPostMarkdown, generateEnglishSlug, generateSlug, groupPosts, parsePost, processFromFiles, resumeExport, trackBookAppointment, trackGoal, trackLearnMore, trackServiceClick, trackTelegramClick, translateContent, translateTitle };
package/dist/index.js CHANGED
@@ -44,6 +44,7 @@ __export(index_exports, {
44
44
  generateSlug: () => generateSlug,
45
45
  groupPosts: () => groupPosts,
46
46
  parsePost: () => parsePost,
47
+ processFromFiles: () => processFromFiles,
47
48
  resumeExport: () => resumeExport,
48
49
  trackBookAppointment: () => trackBookAppointment,
49
50
  trackGoal: () => trackGoal,
@@ -333,7 +334,7 @@ Do not add any explanations or notes.`
333
334
  let lastError = null;
334
335
  for (let attempt = 0; attempt < maxRetries; attempt++) {
335
336
  if (attempt > 0) {
336
- const delay = 3e3 * Math.pow(2, attempt - 1);
337
+ const delay = 1e3 * Math.pow(2, attempt - 1);
337
338
  await new Promise((r) => setTimeout(r, delay));
338
339
  }
339
340
  const response = await fetch(endpoint, {
@@ -370,44 +371,123 @@ function generateEnglishSlug(title) {
370
371
 
371
372
  // src/telegram.ts
372
373
  var import_telegram = require("telegram");
373
- var import_sessions = require("telegram/sessions");
374
+ var import_sessions = require("telegram/sessions/index.js");
374
375
  var fs = __toESM(require("fs"));
375
376
  var path = __toESM(require("path"));
376
377
  var readline = __toESM(require("readline"));
377
378
  function entitiesToMarkdown(text, entities) {
378
379
  if (!entities || entities.length === 0) return text;
379
- const sorted = [...entities].sort((a, b) => b.offset - a.offset);
380
- let result = text;
380
+ const mergedEntities = [];
381
+ const processedIndices = /* @__PURE__ */ new Set();
382
+ const links = [];
383
+ for (const entity of entities) {
384
+ if (entity instanceof import_telegram.Api.MessageEntityTextUrl) {
385
+ links.push({
386
+ start: entity.offset,
387
+ end: entity.offset + entity.length,
388
+ entity
389
+ });
390
+ }
391
+ }
392
+ const formatEntities = entities.filter(
393
+ (e) => e instanceof import_telegram.Api.MessageEntityBold || e instanceof import_telegram.Api.MessageEntityItalic
394
+ );
395
+ for (let i = 0; i < formatEntities.length; i++) {
396
+ if (processedIndices.has(entities.indexOf(formatEntities[i]))) continue;
397
+ const current = formatEntities[i];
398
+ const currentEnd = current.offset + current.length;
399
+ const bridgingLink = links.find((l) => l.start === currentEnd);
400
+ if (bridgingLink) {
401
+ const nextEntity = formatEntities.find(
402
+ (e) => e !== current && e.constructor === current.constructor && e.offset === bridgingLink.end
403
+ );
404
+ if (nextEntity) {
405
+ const mergedLength = nextEntity.offset + nextEntity.length - current.offset;
406
+ if (current instanceof import_telegram.Api.MessageEntityBold) {
407
+ mergedEntities.push(new import_telegram.Api.MessageEntityBold({ offset: current.offset, length: mergedLength }));
408
+ } else if (current instanceof import_telegram.Api.MessageEntityItalic) {
409
+ mergedEntities.push(new import_telegram.Api.MessageEntityItalic({ offset: current.offset, length: mergedLength }));
410
+ }
411
+ processedIndices.add(entities.indexOf(current));
412
+ processedIndices.add(entities.indexOf(nextEntity));
413
+ continue;
414
+ }
415
+ }
416
+ if (!processedIndices.has(entities.indexOf(current))) {
417
+ mergedEntities.push(current);
418
+ processedIndices.add(entities.indexOf(current));
419
+ }
420
+ }
421
+ for (let i = 0; i < entities.length; i++) {
422
+ if (processedIndices.has(i)) continue;
423
+ const entity = entities[i];
424
+ if (!(entity instanceof import_telegram.Api.MessageEntityBold) && !(entity instanceof import_telegram.Api.MessageEntityItalic)) {
425
+ mergedEntities.push(entity);
426
+ }
427
+ }
428
+ const sorted = [...mergedEntities].sort((a, b) => {
429
+ if (a.offset !== b.offset) return a.offset - b.offset;
430
+ return b.length - a.length;
431
+ });
432
+ const markers = [];
381
433
  for (const entity of sorted) {
382
434
  const start = entity.offset;
383
435
  const end = entity.offset + entity.length;
384
- const content = result.substring(start, end);
385
- let replacement = content;
436
+ let startMark = "";
437
+ let endMark = "";
438
+ let priority = 0;
386
439
  if (entity instanceof import_telegram.Api.MessageEntityBold) {
387
- replacement = `**${content}**`;
440
+ startMark = "**";
441
+ endMark = "**";
442
+ priority = 1;
388
443
  } else if (entity instanceof import_telegram.Api.MessageEntityItalic) {
389
- replacement = `*${content}*`;
444
+ startMark = "*";
445
+ endMark = "*";
446
+ priority = 1;
390
447
  } else if (entity instanceof import_telegram.Api.MessageEntityCode) {
391
- replacement = `\`${content}\``;
448
+ startMark = "`";
449
+ endMark = "`";
450
+ priority = 2;
392
451
  } else if (entity instanceof import_telegram.Api.MessageEntityPre) {
393
- replacement = `\`\`\`
394
- ${content}
395
- \`\`\``;
452
+ startMark = "```\n";
453
+ endMark = "\n```";
454
+ priority = 2;
396
455
  } else if (entity instanceof import_telegram.Api.MessageEntityStrike) {
397
- replacement = `~~${content}~~`;
456
+ startMark = "~~";
457
+ endMark = "~~";
458
+ priority = 1;
398
459
  } else if (entity instanceof import_telegram.Api.MessageEntityUnderline) {
399
- replacement = `**${content}**`;
460
+ startMark = "**";
461
+ endMark = "**";
462
+ priority = 1;
400
463
  } else if (entity instanceof import_telegram.Api.MessageEntityTextUrl) {
401
- replacement = `[${content}](${entity.url})`;
402
- } else if (entity instanceof import_telegram.Api.MessageEntityUrl) {
403
- replacement = content;
404
- } else if (entity instanceof import_telegram.Api.MessageEntityMention) {
405
- replacement = content;
406
- } else if (entity instanceof import_telegram.Api.MessageEntityHashtag) {
407
- replacement = content;
464
+ startMark = "[";
465
+ endMark = `](${entity.url})`;
466
+ priority = 10;
467
+ }
468
+ if (startMark) {
469
+ markers.push({ pos: start, insert: startMark, priority });
470
+ markers.push({ pos: end, insert: endMark, priority: -priority });
408
471
  }
409
- result = result.substring(0, start) + replacement + result.substring(end);
410
472
  }
473
+ markers.sort((a, b) => {
474
+ if (a.pos !== b.pos) return a.pos - b.pos;
475
+ if (a.priority < 0 && b.priority < 0) {
476
+ return a.priority - b.priority;
477
+ }
478
+ if (a.priority > 0 && b.priority > 0) {
479
+ return b.priority - a.priority;
480
+ }
481
+ return a.priority - b.priority;
482
+ });
483
+ let result = "";
484
+ let lastPos = 0;
485
+ for (const marker of markers) {
486
+ result += text.substring(lastPos, marker.pos) + marker.insert;
487
+ lastPos = marker.pos;
488
+ }
489
+ result += text.substring(lastPos);
490
+ result = result.replace(/(\S) \*\*/g, "$1** ").replace(/\*\* (\S)/g, " **$1").replace(/(\S) \*/g, "$1* ").replace(/\* (\S)/g, " *$1");
411
491
  return result;
412
492
  }
413
493
  async function defaultReadline(prompt) {
@@ -579,6 +659,43 @@ async function exportTelegramChannel(options) {
579
659
  if (message.replyTo && "replyToMsgId" in message.replyTo) {
580
660
  replyToMsgId = message.replyTo.replyToMsgId;
581
661
  }
662
+ let fwdFrom;
663
+ if (message.fwdFrom) {
664
+ fwdFrom = {};
665
+ if (message.fwdFrom.date) {
666
+ fwdFrom.date = new Date(message.fwdFrom.date * 1e3);
667
+ }
668
+ const fromId = message.fwdFrom.fromId;
669
+ if (fromId) {
670
+ if (fromId instanceof import_telegram.Api.PeerChannel) {
671
+ fwdFrom.fromId = fromId.channelId.toJSNumber();
672
+ try {
673
+ const channelEntity = await client.getEntity(fromId.channelId);
674
+ if (channelEntity instanceof import_telegram.Api.Channel) {
675
+ fwdFrom.fromUsername = channelEntity.username || void 0;
676
+ fwdFrom.fromName = channelEntity.title;
677
+ }
678
+ } catch (e) {
679
+ }
680
+ } else if (fromId instanceof import_telegram.Api.PeerUser) {
681
+ fwdFrom.fromId = fromId.userId.toJSNumber();
682
+ try {
683
+ const userEntity = await client.getEntity(fromId.userId);
684
+ if (userEntity instanceof import_telegram.Api.User) {
685
+ fwdFrom.fromUsername = userEntity.username || void 0;
686
+ fwdFrom.fromName = [userEntity.firstName, userEntity.lastName].filter(Boolean).join(" ");
687
+ }
688
+ } catch (e) {
689
+ }
690
+ }
691
+ }
692
+ if (!fwdFrom.fromName && message.fwdFrom.fromName) {
693
+ fwdFrom.fromName = message.fwdFrom.fromName;
694
+ }
695
+ if (message.fwdFrom.channelPost) {
696
+ fwdFrom.postId = message.fwdFrom.channelPost;
697
+ }
698
+ }
582
699
  const post = {
583
700
  msgId,
584
701
  date: new Date(message.date * 1e3),
@@ -590,7 +707,8 @@ async function exportTelegramChannel(options) {
590
707
  link,
591
708
  channelUsername: channelMeta.username,
592
709
  channelTitle: channelMeta.title,
593
- replyToMsgId
710
+ replyToMsgId,
711
+ fwdFrom
594
712
  };
595
713
  posts.push(post);
596
714
  const markdown = formatPostMarkdown(post);
@@ -628,6 +746,29 @@ forwards: ${post.forwards}`;
628
746
  frontmatter += `
629
747
  reply_to_msg_id: ${post.replyToMsgId}`;
630
748
  }
749
+ if (post.fwdFrom) {
750
+ frontmatter += "\nfwd_from:";
751
+ if (post.fwdFrom.fromName) {
752
+ frontmatter += `
753
+ from_name: "${post.fwdFrom.fromName.replace(/"/g, '\\"')}"`;
754
+ }
755
+ if (post.fwdFrom.fromUsername) {
756
+ frontmatter += `
757
+ from_username: "${post.fwdFrom.fromUsername}"`;
758
+ }
759
+ if (post.fwdFrom.fromId) {
760
+ frontmatter += `
761
+ from_id: ${post.fwdFrom.fromId}`;
762
+ }
763
+ if (post.fwdFrom.postId) {
764
+ frontmatter += `
765
+ post_id: ${post.fwdFrom.postId}`;
766
+ }
767
+ if (post.fwdFrom.date) {
768
+ frontmatter += `
769
+ date: ${post.fwdFrom.date.toISOString()}`;
770
+ }
771
+ }
631
772
  frontmatter += "\n---\n\n";
632
773
  let body = post.content || "";
633
774
  if (post.mediaFiles.length > 0) {
@@ -700,8 +841,68 @@ function generateSlug2(text, lang) {
700
841
  }
701
842
  return processed.toLowerCase().replace(/[^a-z0-9\s-]/g, "").replace(/\s+/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "").substring(0, 60) || "untitled";
702
843
  }
844
+ function groupPostsByTime(posts, timeWindowMs = 5 * 60 * 1e3) {
845
+ const sortedPosts = [...posts].sort(
846
+ (a, b) => new Date(a.date).getTime() - new Date(b.date).getTime()
847
+ );
848
+ const groups = [];
849
+ let currentGroup = [];
850
+ let lastDate = null;
851
+ for (const post of sortedPosts) {
852
+ const postDate = new Date(post.date);
853
+ if (lastDate && Math.abs(postDate.getTime() - lastDate.getTime()) <= timeWindowMs) {
854
+ currentGroup.push(post);
855
+ } else {
856
+ if (currentGroup.length > 0) {
857
+ groups.push(currentGroup);
858
+ }
859
+ currentGroup = [post];
860
+ }
861
+ lastDate = postDate;
862
+ }
863
+ if (currentGroup.length > 0) {
864
+ groups.push(currentGroup);
865
+ }
866
+ return groups;
867
+ }
868
+ function mergePostGroup(posts) {
869
+ const sorted = [...posts].sort(
870
+ (a, b) => new Date(a.date).getTime() - new Date(b.date).getTime()
871
+ );
872
+ const mainPostIndex = sorted.findIndex((p) => !p.fwdFrom);
873
+ const mainPost = mainPostIndex >= 0 ? sorted[mainPostIndex] : sorted[0];
874
+ const contentParts = [];
875
+ for (const p of sorted) {
876
+ if (!p.content && !p.fwdFrom) continue;
877
+ if (p.fwdFrom && p === mainPost) {
878
+ contentParts.push(p.content || "");
879
+ } else if (p.fwdFrom) {
880
+ const fwd = p.fwdFrom;
881
+ const quotedContent = (p.content || "").split("\n").map((line) => `> ${line}`).join("\n");
882
+ const sourceLink = fwd.fromUsername && fwd.postId ? `https://t.me/${fwd.fromUsername}/${fwd.postId}` : fwd.fromUsername ? `https://t.me/${fwd.fromUsername}` : null;
883
+ const sourceName = fwd.fromName || fwd.fromUsername || "Unknown";
884
+ const attribution = sourceLink ? `\u2014 [${sourceName}](${sourceLink})` : `\u2014 ${sourceName}`;
885
+ contentParts.push(`${quotedContent}
886
+ >
887
+ > ${attribution}`);
888
+ } else {
889
+ contentParts.push(p.content || "");
890
+ }
891
+ }
892
+ const allMediaFiles = sorted.flatMap((p) => p.mediaFiles || []);
893
+ return {
894
+ ...mainPost,
895
+ content: contentParts.filter(Boolean).join("\n\n"),
896
+ hasMedia: sorted.some((p) => p.hasMedia),
897
+ mediaFiles: allMediaFiles,
898
+ // Store all msgIds for reference
899
+ mergedMsgIds: sorted.map((p) => p.msgId),
900
+ // Keep fwdFrom only if main post is forwarded
901
+ fwdFrom: mainPost.fwdFrom
902
+ };
903
+ }
703
904
  function cleanContent2(text) {
704
- return text.replace(/##\s*Attachments\n(?:- [^\n]+\n?)*/g, "").replace(/- media\/\d+\/[^\n]+/g, "").replace(/#\w+@\w+/g, "").trim();
905
+ return text.replace(/##\s*Attachments\n(?:- [^\n]+\n?)*/g, "").replace(/- media\/\d+\/[^\n]+/g, "").replace(/!\[\]\(\/media\/\d+\/[^)]+\)/g, "").replace(/<video src="\/media\/\d+\/[^"]+"\s*[^>]*><\/video>/g, "").replace(/#\w+@\w+/g, "").trim();
705
906
  }
706
907
  function extractTitleAndBody(content) {
707
908
  const lines = content.split("\n").filter((l) => l.trim());
@@ -716,39 +917,23 @@ async function processPost(post, options, exportDir) {
716
917
  const { title: originalTitle, body: originalBody } = extractTitleAndBody(cleanedContent);
717
918
  const date = post.date.toISOString().split("T")[0];
718
919
  const languages = [];
719
- const translationExists = (lang) => {
720
- const langDir = path2.join(outputDir, lang);
721
- if (!fs2.existsSync(langDir)) return false;
722
- try {
723
- for (const slug of fs2.readdirSync(langDir)) {
724
- const indexPath = path2.join(langDir, slug, "index.md");
725
- if (fs2.existsSync(indexPath)) {
726
- const content = fs2.readFileSync(indexPath, "utf-8");
727
- if (content.includes(`original_link: "${post.link}"`)) {
728
- return true;
729
- }
730
- }
920
+ const baseSlug = String(post.msgId);
921
+ const postDir = path2.join(outputDir, baseSlug);
922
+ const translationExistsInDir = (lang) => {
923
+ if (!fs2.existsSync(postDir)) return false;
924
+ const langFile = path2.join(postDir, `${lang}.md`);
925
+ if (fs2.existsSync(langFile)) {
926
+ const content = fs2.readFileSync(langFile, "utf-8");
927
+ if (content.includes(`original_link: "${post.link}"`)) {
928
+ return true;
731
929
  }
732
- } catch {
733
930
  }
734
931
  return false;
735
932
  };
736
933
  if (translate && translate.targetLangs.length > 0) {
737
934
  const sourceLang = translate.sourceLang || "ru";
738
- if (translate.keepOriginal && !translationExists(sourceLang)) {
739
- languages.push({
740
- lang: sourceLang,
741
- title: originalTitle,
742
- body: originalBody,
743
- isOriginal: true
744
- });
745
- }
746
935
  for (const targetLang of translate.targetLangs) {
747
936
  if (targetLang === sourceLang) continue;
748
- if (translationExists(targetLang)) {
749
- onProgress?.(` ${targetLang}: already exists, skipping`);
750
- continue;
751
- }
752
937
  onProgress?.(` Translating to ${targetLang}...`);
753
938
  const translateOpts = {
754
939
  apiKey: translate.apiKey,
@@ -759,18 +944,34 @@ async function processPost(post, options, exportDir) {
759
944
  };
760
945
  try {
761
946
  const translatedTitle = await translateTitle(originalTitle, translateOpts);
947
+ if (originalBody) {
948
+ await new Promise((r) => setTimeout(r, 2e3));
949
+ }
762
950
  const translatedBody = originalBody ? await translateContent(originalBody, translateOpts) : "";
763
951
  languages.push({
764
952
  lang: targetLang,
765
953
  title: translatedTitle,
766
954
  body: translatedBody,
767
- isOriginal: false
955
+ isOriginal: false,
956
+ slug: generateEnglishSlug(translatedTitle)
957
+ // slug from translated title for URL
768
958
  });
769
959
  onProgress?.(` \u2192 ${targetLang}: "${translatedTitle.substring(0, 40)}..."`);
770
960
  } catch (error) {
771
961
  onProgress?.(` Error translating to ${targetLang}: ${error.message}`);
772
962
  }
773
- await new Promise((r) => setTimeout(r, 2e3));
963
+ await new Promise((r) => setTimeout(r, 5e3));
964
+ }
965
+ if (translate.keepOriginal) {
966
+ const russianSlug = generateSlug2(originalTitle);
967
+ languages.push({
968
+ lang: sourceLang,
969
+ title: originalTitle,
970
+ body: originalBody,
971
+ isOriginal: true,
972
+ slug: russianSlug
973
+ // Custom slug for Russian URL
974
+ });
774
975
  }
775
976
  } else {
776
977
  const defaultLang = translate?.sourceLang || "ru";
@@ -781,25 +982,21 @@ async function processPost(post, options, exportDir) {
781
982
  isOriginal: true
782
983
  });
783
984
  }
784
- for (const { lang, title, body, isOriginal } of languages) {
785
- const slug = generateSlug2(title, lang);
786
- const postDir = path2.join(outputDir, lang, slug);
787
- if (fs2.existsSync(postDir)) {
788
- onProgress?.(` Skipping existing: ${lang}/${slug}`);
985
+ for (const { lang, title, body, isOriginal, slug: customSlug } of languages) {
986
+ const langFile = path2.join(postDir, `${lang}.md`);
987
+ if (fs2.existsSync(langFile) || translationExistsInDir(lang)) {
988
+ onProgress?.(` Skipping existing: ${baseSlug}/${lang}.md`);
789
989
  continue;
790
990
  }
791
991
  fs2.mkdirSync(postDir, { recursive: true });
792
992
  let finalBody = body;
793
993
  const mediaFiles = [];
794
994
  if (post.mediaFiles && post.mediaFiles.length > 0) {
795
- const paddedId = String(post.msgId).padStart(6, "0");
796
- const targetMediaDir = mediaDir ? path2.join(mediaDir, paddedId) : path2.join(outputDir, "..", "media", paddedId);
797
- fs2.mkdirSync(targetMediaDir, { recursive: true });
798
995
  for (let i = 0; i < post.mediaFiles.length; i++) {
799
996
  const sourcePath = path2.join(exportDir, post.mediaFiles[i]);
800
997
  const ext = path2.extname(sourcePath).toLowerCase() || ".jpg";
801
998
  const newName = `image${i + 1}${ext}`;
802
- const targetPath = path2.join(targetMediaDir, newName);
999
+ const targetPath = path2.join(postDir, newName);
803
1000
  if (fs2.existsSync(sourcePath)) {
804
1001
  if (!fs2.existsSync(targetPath)) {
805
1002
  fs2.copyFileSync(sourcePath, targetPath);
@@ -810,8 +1007,8 @@ async function processPost(post, options, exportDir) {
810
1007
  if (mediaFiles.length > 0) {
811
1008
  const images = mediaFiles.filter((f) => !f.match(/\.(mp4|mov|webm|m4v)$/i));
812
1009
  const videos = mediaFiles.filter((f) => f.match(/\.(mp4|mov|webm|m4v)$/i));
813
- const imageMarkdown = images.map((f) => `![](/media/${paddedId}/${f})`).join("\n\n");
814
- const videoMarkdown = videos.map((f) => `<video src="/media/${paddedId}/${f}" controls></video>`).join("\n\n");
1010
+ const imageMarkdown = images.map((f) => `![](${f})`).join("\n\n");
1011
+ const videoMarkdown = videos.map((f) => `<video src="${f}" controls></video>`).join("\n\n");
815
1012
  const mediaMarkdown = [imageMarkdown, videoMarkdown].filter(Boolean).join("\n\n");
816
1013
  if (mediaMarkdown) {
817
1014
  finalBody = finalBody + "\n\n" + mediaMarkdown;
@@ -820,22 +1017,116 @@ async function processPost(post, options, exportDir) {
820
1017
  }
821
1018
  const replyLine = post.replyToMsgId ? `reply_to_msg_id: ${post.replyToMsgId}
822
1019
  ` : "";
1020
+ const slugLine = customSlug ? `slug: "${customSlug}"
1021
+ ` : "";
1022
+ let fwdFromLine = "";
1023
+ let bodyWithForwardQuote = finalBody;
1024
+ if (post.fwdFrom) {
1025
+ const fwdFrom = post.fwdFrom;
1026
+ fwdFromLine = "fwd_from:\n";
1027
+ if (fwdFrom.fromName) {
1028
+ fwdFromLine += ` from_name: "${fwdFrom.fromName.replace(/"/g, '\\"')}"
1029
+ `;
1030
+ }
1031
+ if (fwdFrom.fromUsername) {
1032
+ fwdFromLine += ` from_username: "${fwdFrom.fromUsername}"
1033
+ `;
1034
+ }
1035
+ if (fwdFrom.postId) {
1036
+ fwdFromLine += ` post_id: ${fwdFrom.postId}
1037
+ `;
1038
+ }
1039
+ const quotedBody = finalBody.split("\n").map((line) => `> ${line}`).join("\n");
1040
+ const sourceLink = fwdFrom.fromUsername && fwdFrom.postId ? `https://t.me/${fwdFrom.fromUsername}/${fwdFrom.postId}` : fwdFrom.fromUsername ? `https://t.me/${fwdFrom.fromUsername}` : null;
1041
+ const sourceName = fwdFrom.fromName || fwdFrom.fromUsername || "Unknown";
1042
+ const attribution = sourceLink ? `\u2014 [${sourceName}](${sourceLink})` : `\u2014 ${sourceName}`;
1043
+ bodyWithForwardQuote = `${quotedBody}
1044
+ >
1045
+ > ${attribution}`;
1046
+ }
823
1047
  const markdown = `---
824
1048
  title: "${title.replace(/"/g, '\\"')}"
825
1049
  date: ${date}
826
1050
  lang: ${lang}
827
1051
  original_link: "${post.link || ""}"
828
- ${replyLine}${isOriginal ? "" : `translated_from: "${translate?.sourceLang || "ru"}"
1052
+ ${slugLine}${replyLine}${fwdFromLine}${isOriginal ? "" : `translated_from: "${translate?.sourceLang || "ru"}"
829
1053
  `}---
830
1054
 
831
- ${finalBody}
1055
+ ${bodyWithForwardQuote}
832
1056
  `;
833
- fs2.writeFileSync(path2.join(postDir, "index.md"), markdown);
834
- createdPosts.push(`${lang}/${slug}`);
835
- onProgress?.(` Created: ${lang}/${slug}`);
1057
+ fs2.writeFileSync(langFile, markdown);
1058
+ createdPosts.push(`${baseSlug}/${lang}.md`);
1059
+ onProgress?.(` Created: ${baseSlug}/${lang}.md`);
836
1060
  }
837
1061
  return createdPosts;
838
1062
  }
1063
+ async function processFromFiles(options) {
1064
+ const { exportDir, outputDir, translate, onProgress, msgIds, force } = options;
1065
+ const ndjsonPath = path2.join(exportDir, "posts.ndjson");
1066
+ if (!fs2.existsSync(ndjsonPath)) {
1067
+ throw new Error(`posts.ndjson not found in ${exportDir}`);
1068
+ }
1069
+ fs2.mkdirSync(outputDir, { recursive: true });
1070
+ const lines = fs2.readFileSync(ndjsonPath, "utf-8").split("\n").filter((l) => l.trim());
1071
+ const posts = lines.map((line) => {
1072
+ const data = JSON.parse(line);
1073
+ return {
1074
+ ...data,
1075
+ date: new Date(data.date)
1076
+ };
1077
+ });
1078
+ onProgress?.(`Found ${posts.length} posts in ${ndjsonPath}`);
1079
+ const postGroups = groupPostsByTime(posts, 5 * 60 * 1e3);
1080
+ onProgress?.(`Grouped into ${postGroups.length} post groups`);
1081
+ const mergedPosts = postGroups.map(
1082
+ (group) => group.length > 1 ? mergePostGroup(group) : group[0]
1083
+ );
1084
+ let processed = 0;
1085
+ let skipped = 0;
1086
+ for (const post of mergedPosts) {
1087
+ const postMsgIds = post.mergedMsgIds || [post.msgId];
1088
+ if (msgIds && msgIds.length > 0 && !postMsgIds.some((id) => msgIds.includes(id))) {
1089
+ continue;
1090
+ }
1091
+ const postId = `${post.channelUsername}-${post.msgId}`;
1092
+ if (!post.content && !post.hasMedia) {
1093
+ skipped++;
1094
+ continue;
1095
+ }
1096
+ onProgress?.(`Processing: ${postId}`);
1097
+ const processOptions = {
1098
+ apiId: 0,
1099
+ apiHash: "",
1100
+ channel: "",
1101
+ outputDir,
1102
+ translate,
1103
+ onProgress
1104
+ };
1105
+ try {
1106
+ if (force) {
1107
+ const postDir = path2.join(outputDir, String(post.msgId));
1108
+ if (fs2.existsSync(postDir)) {
1109
+ for (const file of fs2.readdirSync(postDir)) {
1110
+ if (file.endsWith(".md")) {
1111
+ fs2.unlinkSync(path2.join(postDir, file));
1112
+ }
1113
+ }
1114
+ }
1115
+ }
1116
+ const created = await processPost(post, processOptions, exportDir);
1117
+ if (created.length > 0) {
1118
+ processed++;
1119
+ } else {
1120
+ skipped++;
1121
+ }
1122
+ } catch (error) {
1123
+ onProgress?.(` Error: ${error.message}`);
1124
+ skipped++;
1125
+ }
1126
+ await new Promise((r) => setTimeout(r, 50));
1127
+ }
1128
+ return { processed, skipped };
1129
+ }
839
1130
  async function exportAndTranslate(options) {
840
1131
  const {
841
1132
  apiId,
@@ -871,9 +1162,14 @@ async function exportAndTranslate(options) {
871
1162
  });
872
1163
  onProgress?.(` Exported ${exportResult.posts.length} posts from "${exportResult.channelMeta.title}"`);
873
1164
  onProgress?.("\nStep 2: Processing posts...");
1165
+ const postGroups = groupPostsByTime(exportResult.posts, 5 * 60 * 1e3);
1166
+ onProgress?.(` Grouped into ${postGroups.length} post groups`);
1167
+ const mergedPosts = postGroups.map(
1168
+ (group) => group.length > 1 ? mergePostGroup(group) : group[0]
1169
+ );
874
1170
  let processed = 0;
875
1171
  let skipped = 0;
876
- for (const post of exportResult.posts) {
1172
+ for (const post of mergedPosts) {
877
1173
  const postId = `${post.channelUsername}-${post.msgId}`;
878
1174
  if (!post.content && !post.hasMedia) {
879
1175
  skipped++;
@@ -925,6 +1221,7 @@ Processing: ${postId}`);
925
1221
  generateSlug,
926
1222
  groupPosts,
927
1223
  parsePost,
1224
+ processFromFiles,
928
1225
  resumeExport,
929
1226
  trackBookAppointment,
930
1227
  trackGoal,
package/dist/index.mjs CHANGED
@@ -276,7 +276,7 @@ Do not add any explanations or notes.`
276
276
  let lastError = null;
277
277
  for (let attempt = 0; attempt < maxRetries; attempt++) {
278
278
  if (attempt > 0) {
279
- const delay = 3e3 * Math.pow(2, attempt - 1);
279
+ const delay = 1e3 * Math.pow(2, attempt - 1);
280
280
  await new Promise((r) => setTimeout(r, delay));
281
281
  }
282
282
  const response = await fetch(endpoint, {
@@ -313,44 +313,123 @@ function generateEnglishSlug(title) {
313
313
 
314
314
  // src/telegram.ts
315
315
  import { TelegramClient, Api } from "telegram";
316
- import { StringSession } from "telegram/sessions";
316
+ import { StringSession } from "telegram/sessions/index.js";
317
317
  import * as fs from "fs";
318
318
  import * as path from "path";
319
319
  import * as readline from "readline";
320
320
  function entitiesToMarkdown(text, entities) {
321
321
  if (!entities || entities.length === 0) return text;
322
- const sorted = [...entities].sort((a, b) => b.offset - a.offset);
323
- let result = text;
322
+ const mergedEntities = [];
323
+ const processedIndices = /* @__PURE__ */ new Set();
324
+ const links = [];
325
+ for (const entity of entities) {
326
+ if (entity instanceof Api.MessageEntityTextUrl) {
327
+ links.push({
328
+ start: entity.offset,
329
+ end: entity.offset + entity.length,
330
+ entity
331
+ });
332
+ }
333
+ }
334
+ const formatEntities = entities.filter(
335
+ (e) => e instanceof Api.MessageEntityBold || e instanceof Api.MessageEntityItalic
336
+ );
337
+ for (let i = 0; i < formatEntities.length; i++) {
338
+ if (processedIndices.has(entities.indexOf(formatEntities[i]))) continue;
339
+ const current = formatEntities[i];
340
+ const currentEnd = current.offset + current.length;
341
+ const bridgingLink = links.find((l) => l.start === currentEnd);
342
+ if (bridgingLink) {
343
+ const nextEntity = formatEntities.find(
344
+ (e) => e !== current && e.constructor === current.constructor && e.offset === bridgingLink.end
345
+ );
346
+ if (nextEntity) {
347
+ const mergedLength = nextEntity.offset + nextEntity.length - current.offset;
348
+ if (current instanceof Api.MessageEntityBold) {
349
+ mergedEntities.push(new Api.MessageEntityBold({ offset: current.offset, length: mergedLength }));
350
+ } else if (current instanceof Api.MessageEntityItalic) {
351
+ mergedEntities.push(new Api.MessageEntityItalic({ offset: current.offset, length: mergedLength }));
352
+ }
353
+ processedIndices.add(entities.indexOf(current));
354
+ processedIndices.add(entities.indexOf(nextEntity));
355
+ continue;
356
+ }
357
+ }
358
+ if (!processedIndices.has(entities.indexOf(current))) {
359
+ mergedEntities.push(current);
360
+ processedIndices.add(entities.indexOf(current));
361
+ }
362
+ }
363
+ for (let i = 0; i < entities.length; i++) {
364
+ if (processedIndices.has(i)) continue;
365
+ const entity = entities[i];
366
+ if (!(entity instanceof Api.MessageEntityBold) && !(entity instanceof Api.MessageEntityItalic)) {
367
+ mergedEntities.push(entity);
368
+ }
369
+ }
370
+ const sorted = [...mergedEntities].sort((a, b) => {
371
+ if (a.offset !== b.offset) return a.offset - b.offset;
372
+ return b.length - a.length;
373
+ });
374
+ const markers = [];
324
375
  for (const entity of sorted) {
325
376
  const start = entity.offset;
326
377
  const end = entity.offset + entity.length;
327
- const content = result.substring(start, end);
328
- let replacement = content;
378
+ let startMark = "";
379
+ let endMark = "";
380
+ let priority = 0;
329
381
  if (entity instanceof Api.MessageEntityBold) {
330
- replacement = `**${content}**`;
382
+ startMark = "**";
383
+ endMark = "**";
384
+ priority = 1;
331
385
  } else if (entity instanceof Api.MessageEntityItalic) {
332
- replacement = `*${content}*`;
386
+ startMark = "*";
387
+ endMark = "*";
388
+ priority = 1;
333
389
  } else if (entity instanceof Api.MessageEntityCode) {
334
- replacement = `\`${content}\``;
390
+ startMark = "`";
391
+ endMark = "`";
392
+ priority = 2;
335
393
  } else if (entity instanceof Api.MessageEntityPre) {
336
- replacement = `\`\`\`
337
- ${content}
338
- \`\`\``;
394
+ startMark = "```\n";
395
+ endMark = "\n```";
396
+ priority = 2;
339
397
  } else if (entity instanceof Api.MessageEntityStrike) {
340
- replacement = `~~${content}~~`;
398
+ startMark = "~~";
399
+ endMark = "~~";
400
+ priority = 1;
341
401
  } else if (entity instanceof Api.MessageEntityUnderline) {
342
- replacement = `**${content}**`;
402
+ startMark = "**";
403
+ endMark = "**";
404
+ priority = 1;
343
405
  } else if (entity instanceof Api.MessageEntityTextUrl) {
344
- replacement = `[${content}](${entity.url})`;
345
- } else if (entity instanceof Api.MessageEntityUrl) {
346
- replacement = content;
347
- } else if (entity instanceof Api.MessageEntityMention) {
348
- replacement = content;
349
- } else if (entity instanceof Api.MessageEntityHashtag) {
350
- replacement = content;
406
+ startMark = "[";
407
+ endMark = `](${entity.url})`;
408
+ priority = 10;
409
+ }
410
+ if (startMark) {
411
+ markers.push({ pos: start, insert: startMark, priority });
412
+ markers.push({ pos: end, insert: endMark, priority: -priority });
351
413
  }
352
- result = result.substring(0, start) + replacement + result.substring(end);
353
414
  }
415
+ markers.sort((a, b) => {
416
+ if (a.pos !== b.pos) return a.pos - b.pos;
417
+ if (a.priority < 0 && b.priority < 0) {
418
+ return a.priority - b.priority;
419
+ }
420
+ if (a.priority > 0 && b.priority > 0) {
421
+ return b.priority - a.priority;
422
+ }
423
+ return a.priority - b.priority;
424
+ });
425
+ let result = "";
426
+ let lastPos = 0;
427
+ for (const marker of markers) {
428
+ result += text.substring(lastPos, marker.pos) + marker.insert;
429
+ lastPos = marker.pos;
430
+ }
431
+ result += text.substring(lastPos);
432
+ result = result.replace(/(\S) \*\*/g, "$1** ").replace(/\*\* (\S)/g, " **$1").replace(/(\S) \*/g, "$1* ").replace(/\* (\S)/g, " *$1");
354
433
  return result;
355
434
  }
356
435
  async function defaultReadline(prompt) {
@@ -522,6 +601,43 @@ async function exportTelegramChannel(options) {
522
601
  if (message.replyTo && "replyToMsgId" in message.replyTo) {
523
602
  replyToMsgId = message.replyTo.replyToMsgId;
524
603
  }
604
+ let fwdFrom;
605
+ if (message.fwdFrom) {
606
+ fwdFrom = {};
607
+ if (message.fwdFrom.date) {
608
+ fwdFrom.date = new Date(message.fwdFrom.date * 1e3);
609
+ }
610
+ const fromId = message.fwdFrom.fromId;
611
+ if (fromId) {
612
+ if (fromId instanceof Api.PeerChannel) {
613
+ fwdFrom.fromId = fromId.channelId.toJSNumber();
614
+ try {
615
+ const channelEntity = await client.getEntity(fromId.channelId);
616
+ if (channelEntity instanceof Api.Channel) {
617
+ fwdFrom.fromUsername = channelEntity.username || void 0;
618
+ fwdFrom.fromName = channelEntity.title;
619
+ }
620
+ } catch (e) {
621
+ }
622
+ } else if (fromId instanceof Api.PeerUser) {
623
+ fwdFrom.fromId = fromId.userId.toJSNumber();
624
+ try {
625
+ const userEntity = await client.getEntity(fromId.userId);
626
+ if (userEntity instanceof Api.User) {
627
+ fwdFrom.fromUsername = userEntity.username || void 0;
628
+ fwdFrom.fromName = [userEntity.firstName, userEntity.lastName].filter(Boolean).join(" ");
629
+ }
630
+ } catch (e) {
631
+ }
632
+ }
633
+ }
634
+ if (!fwdFrom.fromName && message.fwdFrom.fromName) {
635
+ fwdFrom.fromName = message.fwdFrom.fromName;
636
+ }
637
+ if (message.fwdFrom.channelPost) {
638
+ fwdFrom.postId = message.fwdFrom.channelPost;
639
+ }
640
+ }
525
641
  const post = {
526
642
  msgId,
527
643
  date: new Date(message.date * 1e3),
@@ -533,7 +649,8 @@ async function exportTelegramChannel(options) {
533
649
  link,
534
650
  channelUsername: channelMeta.username,
535
651
  channelTitle: channelMeta.title,
536
- replyToMsgId
652
+ replyToMsgId,
653
+ fwdFrom
537
654
  };
538
655
  posts.push(post);
539
656
  const markdown = formatPostMarkdown(post);
@@ -571,6 +688,29 @@ forwards: ${post.forwards}`;
571
688
  frontmatter += `
572
689
  reply_to_msg_id: ${post.replyToMsgId}`;
573
690
  }
691
+ if (post.fwdFrom) {
692
+ frontmatter += "\nfwd_from:";
693
+ if (post.fwdFrom.fromName) {
694
+ frontmatter += `
695
+ from_name: "${post.fwdFrom.fromName.replace(/"/g, '\\"')}"`;
696
+ }
697
+ if (post.fwdFrom.fromUsername) {
698
+ frontmatter += `
699
+ from_username: "${post.fwdFrom.fromUsername}"`;
700
+ }
701
+ if (post.fwdFrom.fromId) {
702
+ frontmatter += `
703
+ from_id: ${post.fwdFrom.fromId}`;
704
+ }
705
+ if (post.fwdFrom.postId) {
706
+ frontmatter += `
707
+ post_id: ${post.fwdFrom.postId}`;
708
+ }
709
+ if (post.fwdFrom.date) {
710
+ frontmatter += `
711
+ date: ${post.fwdFrom.date.toISOString()}`;
712
+ }
713
+ }
574
714
  frontmatter += "\n---\n\n";
575
715
  let body = post.content || "";
576
716
  if (post.mediaFiles.length > 0) {
@@ -643,8 +783,68 @@ function generateSlug2(text, lang) {
643
783
  }
644
784
  return processed.toLowerCase().replace(/[^a-z0-9\s-]/g, "").replace(/\s+/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "").substring(0, 60) || "untitled";
645
785
  }
786
+ function groupPostsByTime(posts, timeWindowMs = 5 * 60 * 1e3) {
787
+ const sortedPosts = [...posts].sort(
788
+ (a, b) => new Date(a.date).getTime() - new Date(b.date).getTime()
789
+ );
790
+ const groups = [];
791
+ let currentGroup = [];
792
+ let lastDate = null;
793
+ for (const post of sortedPosts) {
794
+ const postDate = new Date(post.date);
795
+ if (lastDate && Math.abs(postDate.getTime() - lastDate.getTime()) <= timeWindowMs) {
796
+ currentGroup.push(post);
797
+ } else {
798
+ if (currentGroup.length > 0) {
799
+ groups.push(currentGroup);
800
+ }
801
+ currentGroup = [post];
802
+ }
803
+ lastDate = postDate;
804
+ }
805
+ if (currentGroup.length > 0) {
806
+ groups.push(currentGroup);
807
+ }
808
+ return groups;
809
+ }
810
+ function mergePostGroup(posts) {
811
+ const sorted = [...posts].sort(
812
+ (a, b) => new Date(a.date).getTime() - new Date(b.date).getTime()
813
+ );
814
+ const mainPostIndex = sorted.findIndex((p) => !p.fwdFrom);
815
+ const mainPost = mainPostIndex >= 0 ? sorted[mainPostIndex] : sorted[0];
816
+ const contentParts = [];
817
+ for (const p of sorted) {
818
+ if (!p.content && !p.fwdFrom) continue;
819
+ if (p.fwdFrom && p === mainPost) {
820
+ contentParts.push(p.content || "");
821
+ } else if (p.fwdFrom) {
822
+ const fwd = p.fwdFrom;
823
+ const quotedContent = (p.content || "").split("\n").map((line) => `> ${line}`).join("\n");
824
+ const sourceLink = fwd.fromUsername && fwd.postId ? `https://t.me/${fwd.fromUsername}/${fwd.postId}` : fwd.fromUsername ? `https://t.me/${fwd.fromUsername}` : null;
825
+ const sourceName = fwd.fromName || fwd.fromUsername || "Unknown";
826
+ const attribution = sourceLink ? `\u2014 [${sourceName}](${sourceLink})` : `\u2014 ${sourceName}`;
827
+ contentParts.push(`${quotedContent}
828
+ >
829
+ > ${attribution}`);
830
+ } else {
831
+ contentParts.push(p.content || "");
832
+ }
833
+ }
834
+ const allMediaFiles = sorted.flatMap((p) => p.mediaFiles || []);
835
+ return {
836
+ ...mainPost,
837
+ content: contentParts.filter(Boolean).join("\n\n"),
838
+ hasMedia: sorted.some((p) => p.hasMedia),
839
+ mediaFiles: allMediaFiles,
840
+ // Store all msgIds for reference
841
+ mergedMsgIds: sorted.map((p) => p.msgId),
842
+ // Keep fwdFrom only if main post is forwarded
843
+ fwdFrom: mainPost.fwdFrom
844
+ };
845
+ }
646
846
  function cleanContent2(text) {
647
- return text.replace(/##\s*Attachments\n(?:- [^\n]+\n?)*/g, "").replace(/- media\/\d+\/[^\n]+/g, "").replace(/#\w+@\w+/g, "").trim();
847
+ return text.replace(/##\s*Attachments\n(?:- [^\n]+\n?)*/g, "").replace(/- media\/\d+\/[^\n]+/g, "").replace(/!\[\]\(\/media\/\d+\/[^)]+\)/g, "").replace(/<video src="\/media\/\d+\/[^"]+"\s*[^>]*><\/video>/g, "").replace(/#\w+@\w+/g, "").trim();
648
848
  }
649
849
  function extractTitleAndBody(content) {
650
850
  const lines = content.split("\n").filter((l) => l.trim());
@@ -659,39 +859,23 @@ async function processPost(post, options, exportDir) {
659
859
  const { title: originalTitle, body: originalBody } = extractTitleAndBody(cleanedContent);
660
860
  const date = post.date.toISOString().split("T")[0];
661
861
  const languages = [];
662
- const translationExists = (lang) => {
663
- const langDir = path2.join(outputDir, lang);
664
- if (!fs2.existsSync(langDir)) return false;
665
- try {
666
- for (const slug of fs2.readdirSync(langDir)) {
667
- const indexPath = path2.join(langDir, slug, "index.md");
668
- if (fs2.existsSync(indexPath)) {
669
- const content = fs2.readFileSync(indexPath, "utf-8");
670
- if (content.includes(`original_link: "${post.link}"`)) {
671
- return true;
672
- }
673
- }
862
+ const baseSlug = String(post.msgId);
863
+ const postDir = path2.join(outputDir, baseSlug);
864
+ const translationExistsInDir = (lang) => {
865
+ if (!fs2.existsSync(postDir)) return false;
866
+ const langFile = path2.join(postDir, `${lang}.md`);
867
+ if (fs2.existsSync(langFile)) {
868
+ const content = fs2.readFileSync(langFile, "utf-8");
869
+ if (content.includes(`original_link: "${post.link}"`)) {
870
+ return true;
674
871
  }
675
- } catch {
676
872
  }
677
873
  return false;
678
874
  };
679
875
  if (translate && translate.targetLangs.length > 0) {
680
876
  const sourceLang = translate.sourceLang || "ru";
681
- if (translate.keepOriginal && !translationExists(sourceLang)) {
682
- languages.push({
683
- lang: sourceLang,
684
- title: originalTitle,
685
- body: originalBody,
686
- isOriginal: true
687
- });
688
- }
689
877
  for (const targetLang of translate.targetLangs) {
690
878
  if (targetLang === sourceLang) continue;
691
- if (translationExists(targetLang)) {
692
- onProgress?.(` ${targetLang}: already exists, skipping`);
693
- continue;
694
- }
695
879
  onProgress?.(` Translating to ${targetLang}...`);
696
880
  const translateOpts = {
697
881
  apiKey: translate.apiKey,
@@ -702,18 +886,34 @@ async function processPost(post, options, exportDir) {
702
886
  };
703
887
  try {
704
888
  const translatedTitle = await translateTitle(originalTitle, translateOpts);
889
+ if (originalBody) {
890
+ await new Promise((r) => setTimeout(r, 2e3));
891
+ }
705
892
  const translatedBody = originalBody ? await translateContent(originalBody, translateOpts) : "";
706
893
  languages.push({
707
894
  lang: targetLang,
708
895
  title: translatedTitle,
709
896
  body: translatedBody,
710
- isOriginal: false
897
+ isOriginal: false,
898
+ slug: generateEnglishSlug(translatedTitle)
899
+ // slug from translated title for URL
711
900
  });
712
901
  onProgress?.(` \u2192 ${targetLang}: "${translatedTitle.substring(0, 40)}..."`);
713
902
  } catch (error) {
714
903
  onProgress?.(` Error translating to ${targetLang}: ${error.message}`);
715
904
  }
716
- await new Promise((r) => setTimeout(r, 2e3));
905
+ await new Promise((r) => setTimeout(r, 5e3));
906
+ }
907
+ if (translate.keepOriginal) {
908
+ const russianSlug = generateSlug2(originalTitle);
909
+ languages.push({
910
+ lang: sourceLang,
911
+ title: originalTitle,
912
+ body: originalBody,
913
+ isOriginal: true,
914
+ slug: russianSlug
915
+ // Custom slug for Russian URL
916
+ });
717
917
  }
718
918
  } else {
719
919
  const defaultLang = translate?.sourceLang || "ru";
@@ -724,25 +924,21 @@ async function processPost(post, options, exportDir) {
724
924
  isOriginal: true
725
925
  });
726
926
  }
727
- for (const { lang, title, body, isOriginal } of languages) {
728
- const slug = generateSlug2(title, lang);
729
- const postDir = path2.join(outputDir, lang, slug);
730
- if (fs2.existsSync(postDir)) {
731
- onProgress?.(` Skipping existing: ${lang}/${slug}`);
927
+ for (const { lang, title, body, isOriginal, slug: customSlug } of languages) {
928
+ const langFile = path2.join(postDir, `${lang}.md`);
929
+ if (fs2.existsSync(langFile) || translationExistsInDir(lang)) {
930
+ onProgress?.(` Skipping existing: ${baseSlug}/${lang}.md`);
732
931
  continue;
733
932
  }
734
933
  fs2.mkdirSync(postDir, { recursive: true });
735
934
  let finalBody = body;
736
935
  const mediaFiles = [];
737
936
  if (post.mediaFiles && post.mediaFiles.length > 0) {
738
- const paddedId = String(post.msgId).padStart(6, "0");
739
- const targetMediaDir = mediaDir ? path2.join(mediaDir, paddedId) : path2.join(outputDir, "..", "media", paddedId);
740
- fs2.mkdirSync(targetMediaDir, { recursive: true });
741
937
  for (let i = 0; i < post.mediaFiles.length; i++) {
742
938
  const sourcePath = path2.join(exportDir, post.mediaFiles[i]);
743
939
  const ext = path2.extname(sourcePath).toLowerCase() || ".jpg";
744
940
  const newName = `image${i + 1}${ext}`;
745
- const targetPath = path2.join(targetMediaDir, newName);
941
+ const targetPath = path2.join(postDir, newName);
746
942
  if (fs2.existsSync(sourcePath)) {
747
943
  if (!fs2.existsSync(targetPath)) {
748
944
  fs2.copyFileSync(sourcePath, targetPath);
@@ -753,8 +949,8 @@ async function processPost(post, options, exportDir) {
753
949
  if (mediaFiles.length > 0) {
754
950
  const images = mediaFiles.filter((f) => !f.match(/\.(mp4|mov|webm|m4v)$/i));
755
951
  const videos = mediaFiles.filter((f) => f.match(/\.(mp4|mov|webm|m4v)$/i));
756
- const imageMarkdown = images.map((f) => `![](/media/${paddedId}/${f})`).join("\n\n");
757
- const videoMarkdown = videos.map((f) => `<video src="/media/${paddedId}/${f}" controls></video>`).join("\n\n");
952
+ const imageMarkdown = images.map((f) => `![](${f})`).join("\n\n");
953
+ const videoMarkdown = videos.map((f) => `<video src="${f}" controls></video>`).join("\n\n");
758
954
  const mediaMarkdown = [imageMarkdown, videoMarkdown].filter(Boolean).join("\n\n");
759
955
  if (mediaMarkdown) {
760
956
  finalBody = finalBody + "\n\n" + mediaMarkdown;
@@ -763,22 +959,116 @@ async function processPost(post, options, exportDir) {
763
959
  }
764
960
  const replyLine = post.replyToMsgId ? `reply_to_msg_id: ${post.replyToMsgId}
765
961
  ` : "";
962
+ const slugLine = customSlug ? `slug: "${customSlug}"
963
+ ` : "";
964
+ let fwdFromLine = "";
965
+ let bodyWithForwardQuote = finalBody;
966
+ if (post.fwdFrom) {
967
+ const fwdFrom = post.fwdFrom;
968
+ fwdFromLine = "fwd_from:\n";
969
+ if (fwdFrom.fromName) {
970
+ fwdFromLine += ` from_name: "${fwdFrom.fromName.replace(/"/g, '\\"')}"
971
+ `;
972
+ }
973
+ if (fwdFrom.fromUsername) {
974
+ fwdFromLine += ` from_username: "${fwdFrom.fromUsername}"
975
+ `;
976
+ }
977
+ if (fwdFrom.postId) {
978
+ fwdFromLine += ` post_id: ${fwdFrom.postId}
979
+ `;
980
+ }
981
+ const quotedBody = finalBody.split("\n").map((line) => `> ${line}`).join("\n");
982
+ const sourceLink = fwdFrom.fromUsername && fwdFrom.postId ? `https://t.me/${fwdFrom.fromUsername}/${fwdFrom.postId}` : fwdFrom.fromUsername ? `https://t.me/${fwdFrom.fromUsername}` : null;
983
+ const sourceName = fwdFrom.fromName || fwdFrom.fromUsername || "Unknown";
984
+ const attribution = sourceLink ? `\u2014 [${sourceName}](${sourceLink})` : `\u2014 ${sourceName}`;
985
+ bodyWithForwardQuote = `${quotedBody}
986
+ >
987
+ > ${attribution}`;
988
+ }
766
989
  const markdown = `---
767
990
  title: "${title.replace(/"/g, '\\"')}"
768
991
  date: ${date}
769
992
  lang: ${lang}
770
993
  original_link: "${post.link || ""}"
771
- ${replyLine}${isOriginal ? "" : `translated_from: "${translate?.sourceLang || "ru"}"
994
+ ${slugLine}${replyLine}${fwdFromLine}${isOriginal ? "" : `translated_from: "${translate?.sourceLang || "ru"}"
772
995
  `}---
773
996
 
774
- ${finalBody}
997
+ ${bodyWithForwardQuote}
775
998
  `;
776
- fs2.writeFileSync(path2.join(postDir, "index.md"), markdown);
777
- createdPosts.push(`${lang}/${slug}`);
778
- onProgress?.(` Created: ${lang}/${slug}`);
999
+ fs2.writeFileSync(langFile, markdown);
1000
+ createdPosts.push(`${baseSlug}/${lang}.md`);
1001
+ onProgress?.(` Created: ${baseSlug}/${lang}.md`);
779
1002
  }
780
1003
  return createdPosts;
781
1004
  }
1005
+ async function processFromFiles(options) {
1006
+ const { exportDir, outputDir, translate, onProgress, msgIds, force } = options;
1007
+ const ndjsonPath = path2.join(exportDir, "posts.ndjson");
1008
+ if (!fs2.existsSync(ndjsonPath)) {
1009
+ throw new Error(`posts.ndjson not found in ${exportDir}`);
1010
+ }
1011
+ fs2.mkdirSync(outputDir, { recursive: true });
1012
+ const lines = fs2.readFileSync(ndjsonPath, "utf-8").split("\n").filter((l) => l.trim());
1013
+ const posts = lines.map((line) => {
1014
+ const data = JSON.parse(line);
1015
+ return {
1016
+ ...data,
1017
+ date: new Date(data.date)
1018
+ };
1019
+ });
1020
+ onProgress?.(`Found ${posts.length} posts in ${ndjsonPath}`);
1021
+ const postGroups = groupPostsByTime(posts, 5 * 60 * 1e3);
1022
+ onProgress?.(`Grouped into ${postGroups.length} post groups`);
1023
+ const mergedPosts = postGroups.map(
1024
+ (group) => group.length > 1 ? mergePostGroup(group) : group[0]
1025
+ );
1026
+ let processed = 0;
1027
+ let skipped = 0;
1028
+ for (const post of mergedPosts) {
1029
+ const postMsgIds = post.mergedMsgIds || [post.msgId];
1030
+ if (msgIds && msgIds.length > 0 && !postMsgIds.some((id) => msgIds.includes(id))) {
1031
+ continue;
1032
+ }
1033
+ const postId = `${post.channelUsername}-${post.msgId}`;
1034
+ if (!post.content && !post.hasMedia) {
1035
+ skipped++;
1036
+ continue;
1037
+ }
1038
+ onProgress?.(`Processing: ${postId}`);
1039
+ const processOptions = {
1040
+ apiId: 0,
1041
+ apiHash: "",
1042
+ channel: "",
1043
+ outputDir,
1044
+ translate,
1045
+ onProgress
1046
+ };
1047
+ try {
1048
+ if (force) {
1049
+ const postDir = path2.join(outputDir, String(post.msgId));
1050
+ if (fs2.existsSync(postDir)) {
1051
+ for (const file of fs2.readdirSync(postDir)) {
1052
+ if (file.endsWith(".md")) {
1053
+ fs2.unlinkSync(path2.join(postDir, file));
1054
+ }
1055
+ }
1056
+ }
1057
+ }
1058
+ const created = await processPost(post, processOptions, exportDir);
1059
+ if (created.length > 0) {
1060
+ processed++;
1061
+ } else {
1062
+ skipped++;
1063
+ }
1064
+ } catch (error) {
1065
+ onProgress?.(` Error: ${error.message}`);
1066
+ skipped++;
1067
+ }
1068
+ await new Promise((r) => setTimeout(r, 50));
1069
+ }
1070
+ return { processed, skipped };
1071
+ }
782
1072
  async function exportAndTranslate(options) {
783
1073
  const {
784
1074
  apiId,
@@ -814,9 +1104,14 @@ async function exportAndTranslate(options) {
814
1104
  });
815
1105
  onProgress?.(` Exported ${exportResult.posts.length} posts from "${exportResult.channelMeta.title}"`);
816
1106
  onProgress?.("\nStep 2: Processing posts...");
1107
+ const postGroups = groupPostsByTime(exportResult.posts, 5 * 60 * 1e3);
1108
+ onProgress?.(` Grouped into ${postGroups.length} post groups`);
1109
+ const mergedPosts = postGroups.map(
1110
+ (group) => group.length > 1 ? mergePostGroup(group) : group[0]
1111
+ );
817
1112
  let processed = 0;
818
1113
  let skipped = 0;
819
- for (const post of exportResult.posts) {
1114
+ for (const post of mergedPosts) {
820
1115
  const postId = `${post.channelUsername}-${post.msgId}`;
821
1116
  if (!post.content && !post.hasMedia) {
822
1117
  skipped++;
@@ -867,6 +1162,7 @@ export {
867
1162
  generateSlug,
868
1163
  groupPosts,
869
1164
  parsePost,
1165
+ processFromFiles,
870
1166
  resumeExport,
871
1167
  trackBookAppointment,
872
1168
  trackGoal,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "koztv-blog-tools",
3
- "version": "1.2.9",
3
+ "version": "1.2.11",
4
4
  "description": "Shared utilities for Telegram-based blog sites",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.mjs",