koztv-blog-tools 1.2.9 → 1.2.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/export-telegram.js +0 -0
- package/dist/index.d.mts +42 -1
- package/dist/index.d.ts +42 -1
- package/dist/index.js +365 -68
- package/dist/index.mjs +364 -68
- package/package.json +1 -1
package/bin/export-telegram.js
CHANGED
|
File without changes
|
package/dist/index.d.mts
CHANGED
|
@@ -178,6 +178,18 @@ interface TelegramExportOptions {
|
|
|
178
178
|
/** Callback when session string is generated (save this for future use) */
|
|
179
179
|
onSession?: (session: string) => void;
|
|
180
180
|
}
|
|
181
|
+
interface ForwardInfo {
|
|
182
|
+
/** Original channel/user ID */
|
|
183
|
+
fromId?: number;
|
|
184
|
+
/** Original channel username (if channel) */
|
|
185
|
+
fromUsername?: string;
|
|
186
|
+
/** Original channel/user name */
|
|
187
|
+
fromName?: string;
|
|
188
|
+
/** Original post ID in source channel */
|
|
189
|
+
postId?: number;
|
|
190
|
+
/** Original post date */
|
|
191
|
+
date?: Date;
|
|
192
|
+
}
|
|
181
193
|
interface ExportedPost {
|
|
182
194
|
msgId: number;
|
|
183
195
|
date: Date;
|
|
@@ -190,6 +202,10 @@ interface ExportedPost {
|
|
|
190
202
|
channelUsername: string;
|
|
191
203
|
channelTitle: string;
|
|
192
204
|
replyToMsgId?: number;
|
|
205
|
+
/** Message IDs merged into this post (for grouped posts) */
|
|
206
|
+
mergedMsgIds?: number[];
|
|
207
|
+
/** Forwarding info if this post was forwarded from another source */
|
|
208
|
+
fwdFrom?: ForwardInfo;
|
|
193
209
|
}
|
|
194
210
|
interface ExportResult {
|
|
195
211
|
channelMeta: {
|
|
@@ -280,9 +296,34 @@ interface ExportAndTranslateResult {
|
|
|
280
296
|
/** Session string for future use */
|
|
281
297
|
session: string;
|
|
282
298
|
}
|
|
299
|
+
/**
|
|
300
|
+
* Options for processing posts from intermediate files
|
|
301
|
+
*/
|
|
302
|
+
interface ProcessFromFilesOptions {
|
|
303
|
+
/** Directory with intermediate export files (.telegram-export) */
|
|
304
|
+
exportDir: string;
|
|
305
|
+
/** Output directory for posts */
|
|
306
|
+
outputDir: string;
|
|
307
|
+
/** Translation config (optional) */
|
|
308
|
+
translate?: TranslationConfig;
|
|
309
|
+
/** Progress callback */
|
|
310
|
+
onProgress?: (message: string) => void;
|
|
311
|
+
/** Only process specific message IDs (if empty, process all) */
|
|
312
|
+
msgIds?: number[];
|
|
313
|
+
/** Force reprocess even if file exists */
|
|
314
|
+
force?: boolean;
|
|
315
|
+
}
|
|
316
|
+
/**
|
|
317
|
+
* Process posts from intermediate files (Step 2 only, no Telegram API)
|
|
318
|
+
* Use this after running exportTelegramChannel separately
|
|
319
|
+
*/
|
|
320
|
+
declare function processFromFiles(options: ProcessFromFilesOptions): Promise<{
|
|
321
|
+
processed: number;
|
|
322
|
+
skipped: number;
|
|
323
|
+
}>;
|
|
283
324
|
/**
|
|
284
325
|
* Export posts from Telegram channel with optional translation
|
|
285
326
|
*/
|
|
286
327
|
declare function exportAndTranslate(options: ExportAndTranslateOptions): Promise<ExportAndTranslateResult>;
|
|
287
328
|
|
|
288
|
-
export { type AnalyticsConfig, type ExportAndTranslateOptions, type ExportAndTranslateResult, type ExportResult, type ExportedPost, type GoalName, type GoalParams, type GroupedPost, type ParsePostOptions, type Post, type TelegramExportOptions, type TranslateOptions, type TranslationConfig, categorizePost, cleanContent, configureAnalytics, deduplicatePosts, exportAndTranslate, exportTelegramChannel, extractAttachments, extractExcerpt, extractTitle, formatPostMarkdown, generateEnglishSlug, generateSlug, groupPosts, parsePost, resumeExport, trackBookAppointment, trackGoal, trackLearnMore, trackServiceClick, trackTelegramClick, translateContent, translateTitle };
|
|
329
|
+
export { type AnalyticsConfig, type ExportAndTranslateOptions, type ExportAndTranslateResult, type ExportResult, type ExportedPost, type GoalName, type GoalParams, type GroupedPost, type ParsePostOptions, type Post, type ProcessFromFilesOptions, type TelegramExportOptions, type TranslateOptions, type TranslationConfig, categorizePost, cleanContent, configureAnalytics, deduplicatePosts, exportAndTranslate, exportTelegramChannel, extractAttachments, extractExcerpt, extractTitle, formatPostMarkdown, generateEnglishSlug, generateSlug, groupPosts, parsePost, processFromFiles, resumeExport, trackBookAppointment, trackGoal, trackLearnMore, trackServiceClick, trackTelegramClick, translateContent, translateTitle };
|
package/dist/index.d.ts
CHANGED
|
@@ -178,6 +178,18 @@ interface TelegramExportOptions {
|
|
|
178
178
|
/** Callback when session string is generated (save this for future use) */
|
|
179
179
|
onSession?: (session: string) => void;
|
|
180
180
|
}
|
|
181
|
+
interface ForwardInfo {
|
|
182
|
+
/** Original channel/user ID */
|
|
183
|
+
fromId?: number;
|
|
184
|
+
/** Original channel username (if channel) */
|
|
185
|
+
fromUsername?: string;
|
|
186
|
+
/** Original channel/user name */
|
|
187
|
+
fromName?: string;
|
|
188
|
+
/** Original post ID in source channel */
|
|
189
|
+
postId?: number;
|
|
190
|
+
/** Original post date */
|
|
191
|
+
date?: Date;
|
|
192
|
+
}
|
|
181
193
|
interface ExportedPost {
|
|
182
194
|
msgId: number;
|
|
183
195
|
date: Date;
|
|
@@ -190,6 +202,10 @@ interface ExportedPost {
|
|
|
190
202
|
channelUsername: string;
|
|
191
203
|
channelTitle: string;
|
|
192
204
|
replyToMsgId?: number;
|
|
205
|
+
/** Message IDs merged into this post (for grouped posts) */
|
|
206
|
+
mergedMsgIds?: number[];
|
|
207
|
+
/** Forwarding info if this post was forwarded from another source */
|
|
208
|
+
fwdFrom?: ForwardInfo;
|
|
193
209
|
}
|
|
194
210
|
interface ExportResult {
|
|
195
211
|
channelMeta: {
|
|
@@ -280,9 +296,34 @@ interface ExportAndTranslateResult {
|
|
|
280
296
|
/** Session string for future use */
|
|
281
297
|
session: string;
|
|
282
298
|
}
|
|
299
|
+
/**
|
|
300
|
+
* Options for processing posts from intermediate files
|
|
301
|
+
*/
|
|
302
|
+
interface ProcessFromFilesOptions {
|
|
303
|
+
/** Directory with intermediate export files (.telegram-export) */
|
|
304
|
+
exportDir: string;
|
|
305
|
+
/** Output directory for posts */
|
|
306
|
+
outputDir: string;
|
|
307
|
+
/** Translation config (optional) */
|
|
308
|
+
translate?: TranslationConfig;
|
|
309
|
+
/** Progress callback */
|
|
310
|
+
onProgress?: (message: string) => void;
|
|
311
|
+
/** Only process specific message IDs (if empty, process all) */
|
|
312
|
+
msgIds?: number[];
|
|
313
|
+
/** Force reprocess even if file exists */
|
|
314
|
+
force?: boolean;
|
|
315
|
+
}
|
|
316
|
+
/**
|
|
317
|
+
* Process posts from intermediate files (Step 2 only, no Telegram API)
|
|
318
|
+
* Use this after running exportTelegramChannel separately
|
|
319
|
+
*/
|
|
320
|
+
declare function processFromFiles(options: ProcessFromFilesOptions): Promise<{
|
|
321
|
+
processed: number;
|
|
322
|
+
skipped: number;
|
|
323
|
+
}>;
|
|
283
324
|
/**
|
|
284
325
|
* Export posts from Telegram channel with optional translation
|
|
285
326
|
*/
|
|
286
327
|
declare function exportAndTranslate(options: ExportAndTranslateOptions): Promise<ExportAndTranslateResult>;
|
|
287
328
|
|
|
288
|
-
export { type AnalyticsConfig, type ExportAndTranslateOptions, type ExportAndTranslateResult, type ExportResult, type ExportedPost, type GoalName, type GoalParams, type GroupedPost, type ParsePostOptions, type Post, type TelegramExportOptions, type TranslateOptions, type TranslationConfig, categorizePost, cleanContent, configureAnalytics, deduplicatePosts, exportAndTranslate, exportTelegramChannel, extractAttachments, extractExcerpt, extractTitle, formatPostMarkdown, generateEnglishSlug, generateSlug, groupPosts, parsePost, resumeExport, trackBookAppointment, trackGoal, trackLearnMore, trackServiceClick, trackTelegramClick, translateContent, translateTitle };
|
|
329
|
+
export { type AnalyticsConfig, type ExportAndTranslateOptions, type ExportAndTranslateResult, type ExportResult, type ExportedPost, type GoalName, type GoalParams, type GroupedPost, type ParsePostOptions, type Post, type ProcessFromFilesOptions, type TelegramExportOptions, type TranslateOptions, type TranslationConfig, categorizePost, cleanContent, configureAnalytics, deduplicatePosts, exportAndTranslate, exportTelegramChannel, extractAttachments, extractExcerpt, extractTitle, formatPostMarkdown, generateEnglishSlug, generateSlug, groupPosts, parsePost, processFromFiles, resumeExport, trackBookAppointment, trackGoal, trackLearnMore, trackServiceClick, trackTelegramClick, translateContent, translateTitle };
|
package/dist/index.js
CHANGED
|
@@ -44,6 +44,7 @@ __export(index_exports, {
|
|
|
44
44
|
generateSlug: () => generateSlug,
|
|
45
45
|
groupPosts: () => groupPosts,
|
|
46
46
|
parsePost: () => parsePost,
|
|
47
|
+
processFromFiles: () => processFromFiles,
|
|
47
48
|
resumeExport: () => resumeExport,
|
|
48
49
|
trackBookAppointment: () => trackBookAppointment,
|
|
49
50
|
trackGoal: () => trackGoal,
|
|
@@ -333,7 +334,7 @@ Do not add any explanations or notes.`
|
|
|
333
334
|
let lastError = null;
|
|
334
335
|
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
335
336
|
if (attempt > 0) {
|
|
336
|
-
const delay =
|
|
337
|
+
const delay = 1e3 * Math.pow(2, attempt - 1);
|
|
337
338
|
await new Promise((r) => setTimeout(r, delay));
|
|
338
339
|
}
|
|
339
340
|
const response = await fetch(endpoint, {
|
|
@@ -370,44 +371,123 @@ function generateEnglishSlug(title) {
|
|
|
370
371
|
|
|
371
372
|
// src/telegram.ts
|
|
372
373
|
var import_telegram = require("telegram");
|
|
373
|
-
var import_sessions = require("telegram/sessions");
|
|
374
|
+
var import_sessions = require("telegram/sessions/index.js");
|
|
374
375
|
var fs = __toESM(require("fs"));
|
|
375
376
|
var path = __toESM(require("path"));
|
|
376
377
|
var readline = __toESM(require("readline"));
|
|
377
378
|
function entitiesToMarkdown(text, entities) {
|
|
378
379
|
if (!entities || entities.length === 0) return text;
|
|
379
|
-
const
|
|
380
|
-
|
|
380
|
+
const mergedEntities = [];
|
|
381
|
+
const processedIndices = /* @__PURE__ */ new Set();
|
|
382
|
+
const links = [];
|
|
383
|
+
for (const entity of entities) {
|
|
384
|
+
if (entity instanceof import_telegram.Api.MessageEntityTextUrl) {
|
|
385
|
+
links.push({
|
|
386
|
+
start: entity.offset,
|
|
387
|
+
end: entity.offset + entity.length,
|
|
388
|
+
entity
|
|
389
|
+
});
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
const formatEntities = entities.filter(
|
|
393
|
+
(e) => e instanceof import_telegram.Api.MessageEntityBold || e instanceof import_telegram.Api.MessageEntityItalic
|
|
394
|
+
);
|
|
395
|
+
for (let i = 0; i < formatEntities.length; i++) {
|
|
396
|
+
if (processedIndices.has(entities.indexOf(formatEntities[i]))) continue;
|
|
397
|
+
const current = formatEntities[i];
|
|
398
|
+
const currentEnd = current.offset + current.length;
|
|
399
|
+
const bridgingLink = links.find((l) => l.start === currentEnd);
|
|
400
|
+
if (bridgingLink) {
|
|
401
|
+
const nextEntity = formatEntities.find(
|
|
402
|
+
(e) => e !== current && e.constructor === current.constructor && e.offset === bridgingLink.end
|
|
403
|
+
);
|
|
404
|
+
if (nextEntity) {
|
|
405
|
+
const mergedLength = nextEntity.offset + nextEntity.length - current.offset;
|
|
406
|
+
if (current instanceof import_telegram.Api.MessageEntityBold) {
|
|
407
|
+
mergedEntities.push(new import_telegram.Api.MessageEntityBold({ offset: current.offset, length: mergedLength }));
|
|
408
|
+
} else if (current instanceof import_telegram.Api.MessageEntityItalic) {
|
|
409
|
+
mergedEntities.push(new import_telegram.Api.MessageEntityItalic({ offset: current.offset, length: mergedLength }));
|
|
410
|
+
}
|
|
411
|
+
processedIndices.add(entities.indexOf(current));
|
|
412
|
+
processedIndices.add(entities.indexOf(nextEntity));
|
|
413
|
+
continue;
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
if (!processedIndices.has(entities.indexOf(current))) {
|
|
417
|
+
mergedEntities.push(current);
|
|
418
|
+
processedIndices.add(entities.indexOf(current));
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
for (let i = 0; i < entities.length; i++) {
|
|
422
|
+
if (processedIndices.has(i)) continue;
|
|
423
|
+
const entity = entities[i];
|
|
424
|
+
if (!(entity instanceof import_telegram.Api.MessageEntityBold) && !(entity instanceof import_telegram.Api.MessageEntityItalic)) {
|
|
425
|
+
mergedEntities.push(entity);
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
const sorted = [...mergedEntities].sort((a, b) => {
|
|
429
|
+
if (a.offset !== b.offset) return a.offset - b.offset;
|
|
430
|
+
return b.length - a.length;
|
|
431
|
+
});
|
|
432
|
+
const markers = [];
|
|
381
433
|
for (const entity of sorted) {
|
|
382
434
|
const start = entity.offset;
|
|
383
435
|
const end = entity.offset + entity.length;
|
|
384
|
-
|
|
385
|
-
let
|
|
436
|
+
let startMark = "";
|
|
437
|
+
let endMark = "";
|
|
438
|
+
let priority = 0;
|
|
386
439
|
if (entity instanceof import_telegram.Api.MessageEntityBold) {
|
|
387
|
-
|
|
440
|
+
startMark = "**";
|
|
441
|
+
endMark = "**";
|
|
442
|
+
priority = 1;
|
|
388
443
|
} else if (entity instanceof import_telegram.Api.MessageEntityItalic) {
|
|
389
|
-
|
|
444
|
+
startMark = "*";
|
|
445
|
+
endMark = "*";
|
|
446
|
+
priority = 1;
|
|
390
447
|
} else if (entity instanceof import_telegram.Api.MessageEntityCode) {
|
|
391
|
-
|
|
448
|
+
startMark = "`";
|
|
449
|
+
endMark = "`";
|
|
450
|
+
priority = 2;
|
|
392
451
|
} else if (entity instanceof import_telegram.Api.MessageEntityPre) {
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
452
|
+
startMark = "```\n";
|
|
453
|
+
endMark = "\n```";
|
|
454
|
+
priority = 2;
|
|
396
455
|
} else if (entity instanceof import_telegram.Api.MessageEntityStrike) {
|
|
397
|
-
|
|
456
|
+
startMark = "~~";
|
|
457
|
+
endMark = "~~";
|
|
458
|
+
priority = 1;
|
|
398
459
|
} else if (entity instanceof import_telegram.Api.MessageEntityUnderline) {
|
|
399
|
-
|
|
460
|
+
startMark = "**";
|
|
461
|
+
endMark = "**";
|
|
462
|
+
priority = 1;
|
|
400
463
|
} else if (entity instanceof import_telegram.Api.MessageEntityTextUrl) {
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
}
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
464
|
+
startMark = "[";
|
|
465
|
+
endMark = `](${entity.url})`;
|
|
466
|
+
priority = 10;
|
|
467
|
+
}
|
|
468
|
+
if (startMark) {
|
|
469
|
+
markers.push({ pos: start, insert: startMark, priority });
|
|
470
|
+
markers.push({ pos: end, insert: endMark, priority: -priority });
|
|
408
471
|
}
|
|
409
|
-
result = result.substring(0, start) + replacement + result.substring(end);
|
|
410
472
|
}
|
|
473
|
+
markers.sort((a, b) => {
|
|
474
|
+
if (a.pos !== b.pos) return a.pos - b.pos;
|
|
475
|
+
if (a.priority < 0 && b.priority < 0) {
|
|
476
|
+
return a.priority - b.priority;
|
|
477
|
+
}
|
|
478
|
+
if (a.priority > 0 && b.priority > 0) {
|
|
479
|
+
return b.priority - a.priority;
|
|
480
|
+
}
|
|
481
|
+
return a.priority - b.priority;
|
|
482
|
+
});
|
|
483
|
+
let result = "";
|
|
484
|
+
let lastPos = 0;
|
|
485
|
+
for (const marker of markers) {
|
|
486
|
+
result += text.substring(lastPos, marker.pos) + marker.insert;
|
|
487
|
+
lastPos = marker.pos;
|
|
488
|
+
}
|
|
489
|
+
result += text.substring(lastPos);
|
|
490
|
+
result = result.replace(/(\S) \*\*/g, "$1** ").replace(/\*\* (\S)/g, " **$1").replace(/(\S) \*/g, "$1* ").replace(/\* (\S)/g, " *$1");
|
|
411
491
|
return result;
|
|
412
492
|
}
|
|
413
493
|
async function defaultReadline(prompt) {
|
|
@@ -579,6 +659,43 @@ async function exportTelegramChannel(options) {
|
|
|
579
659
|
if (message.replyTo && "replyToMsgId" in message.replyTo) {
|
|
580
660
|
replyToMsgId = message.replyTo.replyToMsgId;
|
|
581
661
|
}
|
|
662
|
+
let fwdFrom;
|
|
663
|
+
if (message.fwdFrom) {
|
|
664
|
+
fwdFrom = {};
|
|
665
|
+
if (message.fwdFrom.date) {
|
|
666
|
+
fwdFrom.date = new Date(message.fwdFrom.date * 1e3);
|
|
667
|
+
}
|
|
668
|
+
const fromId = message.fwdFrom.fromId;
|
|
669
|
+
if (fromId) {
|
|
670
|
+
if (fromId instanceof import_telegram.Api.PeerChannel) {
|
|
671
|
+
fwdFrom.fromId = fromId.channelId.toJSNumber();
|
|
672
|
+
try {
|
|
673
|
+
const channelEntity = await client.getEntity(fromId.channelId);
|
|
674
|
+
if (channelEntity instanceof import_telegram.Api.Channel) {
|
|
675
|
+
fwdFrom.fromUsername = channelEntity.username || void 0;
|
|
676
|
+
fwdFrom.fromName = channelEntity.title;
|
|
677
|
+
}
|
|
678
|
+
} catch (e) {
|
|
679
|
+
}
|
|
680
|
+
} else if (fromId instanceof import_telegram.Api.PeerUser) {
|
|
681
|
+
fwdFrom.fromId = fromId.userId.toJSNumber();
|
|
682
|
+
try {
|
|
683
|
+
const userEntity = await client.getEntity(fromId.userId);
|
|
684
|
+
if (userEntity instanceof import_telegram.Api.User) {
|
|
685
|
+
fwdFrom.fromUsername = userEntity.username || void 0;
|
|
686
|
+
fwdFrom.fromName = [userEntity.firstName, userEntity.lastName].filter(Boolean).join(" ");
|
|
687
|
+
}
|
|
688
|
+
} catch (e) {
|
|
689
|
+
}
|
|
690
|
+
}
|
|
691
|
+
}
|
|
692
|
+
if (!fwdFrom.fromName && message.fwdFrom.fromName) {
|
|
693
|
+
fwdFrom.fromName = message.fwdFrom.fromName;
|
|
694
|
+
}
|
|
695
|
+
if (message.fwdFrom.channelPost) {
|
|
696
|
+
fwdFrom.postId = message.fwdFrom.channelPost;
|
|
697
|
+
}
|
|
698
|
+
}
|
|
582
699
|
const post = {
|
|
583
700
|
msgId,
|
|
584
701
|
date: new Date(message.date * 1e3),
|
|
@@ -590,7 +707,8 @@ async function exportTelegramChannel(options) {
|
|
|
590
707
|
link,
|
|
591
708
|
channelUsername: channelMeta.username,
|
|
592
709
|
channelTitle: channelMeta.title,
|
|
593
|
-
replyToMsgId
|
|
710
|
+
replyToMsgId,
|
|
711
|
+
fwdFrom
|
|
594
712
|
};
|
|
595
713
|
posts.push(post);
|
|
596
714
|
const markdown = formatPostMarkdown(post);
|
|
@@ -628,6 +746,29 @@ forwards: ${post.forwards}`;
|
|
|
628
746
|
frontmatter += `
|
|
629
747
|
reply_to_msg_id: ${post.replyToMsgId}`;
|
|
630
748
|
}
|
|
749
|
+
if (post.fwdFrom) {
|
|
750
|
+
frontmatter += "\nfwd_from:";
|
|
751
|
+
if (post.fwdFrom.fromName) {
|
|
752
|
+
frontmatter += `
|
|
753
|
+
from_name: "${post.fwdFrom.fromName.replace(/"/g, '\\"')}"`;
|
|
754
|
+
}
|
|
755
|
+
if (post.fwdFrom.fromUsername) {
|
|
756
|
+
frontmatter += `
|
|
757
|
+
from_username: "${post.fwdFrom.fromUsername}"`;
|
|
758
|
+
}
|
|
759
|
+
if (post.fwdFrom.fromId) {
|
|
760
|
+
frontmatter += `
|
|
761
|
+
from_id: ${post.fwdFrom.fromId}`;
|
|
762
|
+
}
|
|
763
|
+
if (post.fwdFrom.postId) {
|
|
764
|
+
frontmatter += `
|
|
765
|
+
post_id: ${post.fwdFrom.postId}`;
|
|
766
|
+
}
|
|
767
|
+
if (post.fwdFrom.date) {
|
|
768
|
+
frontmatter += `
|
|
769
|
+
date: ${post.fwdFrom.date.toISOString()}`;
|
|
770
|
+
}
|
|
771
|
+
}
|
|
631
772
|
frontmatter += "\n---\n\n";
|
|
632
773
|
let body = post.content || "";
|
|
633
774
|
if (post.mediaFiles.length > 0) {
|
|
@@ -700,8 +841,68 @@ function generateSlug2(text, lang) {
|
|
|
700
841
|
}
|
|
701
842
|
return processed.toLowerCase().replace(/[^a-z0-9\s-]/g, "").replace(/\s+/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "").substring(0, 60) || "untitled";
|
|
702
843
|
}
|
|
844
|
+
function groupPostsByTime(posts, timeWindowMs = 5 * 60 * 1e3) {
|
|
845
|
+
const sortedPosts = [...posts].sort(
|
|
846
|
+
(a, b) => new Date(a.date).getTime() - new Date(b.date).getTime()
|
|
847
|
+
);
|
|
848
|
+
const groups = [];
|
|
849
|
+
let currentGroup = [];
|
|
850
|
+
let lastDate = null;
|
|
851
|
+
for (const post of sortedPosts) {
|
|
852
|
+
const postDate = new Date(post.date);
|
|
853
|
+
if (lastDate && Math.abs(postDate.getTime() - lastDate.getTime()) <= timeWindowMs) {
|
|
854
|
+
currentGroup.push(post);
|
|
855
|
+
} else {
|
|
856
|
+
if (currentGroup.length > 0) {
|
|
857
|
+
groups.push(currentGroup);
|
|
858
|
+
}
|
|
859
|
+
currentGroup = [post];
|
|
860
|
+
}
|
|
861
|
+
lastDate = postDate;
|
|
862
|
+
}
|
|
863
|
+
if (currentGroup.length > 0) {
|
|
864
|
+
groups.push(currentGroup);
|
|
865
|
+
}
|
|
866
|
+
return groups;
|
|
867
|
+
}
|
|
868
|
+
function mergePostGroup(posts) {
|
|
869
|
+
const sorted = [...posts].sort(
|
|
870
|
+
(a, b) => new Date(a.date).getTime() - new Date(b.date).getTime()
|
|
871
|
+
);
|
|
872
|
+
const mainPostIndex = sorted.findIndex((p) => !p.fwdFrom);
|
|
873
|
+
const mainPost = mainPostIndex >= 0 ? sorted[mainPostIndex] : sorted[0];
|
|
874
|
+
const contentParts = [];
|
|
875
|
+
for (const p of sorted) {
|
|
876
|
+
if (!p.content && !p.fwdFrom) continue;
|
|
877
|
+
if (p.fwdFrom && p === mainPost) {
|
|
878
|
+
contentParts.push(p.content || "");
|
|
879
|
+
} else if (p.fwdFrom) {
|
|
880
|
+
const fwd = p.fwdFrom;
|
|
881
|
+
const quotedContent = (p.content || "").split("\n").map((line) => `> ${line}`).join("\n");
|
|
882
|
+
const sourceLink = fwd.fromUsername && fwd.postId ? `https://t.me/${fwd.fromUsername}/${fwd.postId}` : fwd.fromUsername ? `https://t.me/${fwd.fromUsername}` : null;
|
|
883
|
+
const sourceName = fwd.fromName || fwd.fromUsername || "Unknown";
|
|
884
|
+
const attribution = sourceLink ? `\u2014 [${sourceName}](${sourceLink})` : `\u2014 ${sourceName}`;
|
|
885
|
+
contentParts.push(`${quotedContent}
|
|
886
|
+
>
|
|
887
|
+
> ${attribution}`);
|
|
888
|
+
} else {
|
|
889
|
+
contentParts.push(p.content || "");
|
|
890
|
+
}
|
|
891
|
+
}
|
|
892
|
+
const allMediaFiles = sorted.flatMap((p) => p.mediaFiles || []);
|
|
893
|
+
return {
|
|
894
|
+
...mainPost,
|
|
895
|
+
content: contentParts.filter(Boolean).join("\n\n"),
|
|
896
|
+
hasMedia: sorted.some((p) => p.hasMedia),
|
|
897
|
+
mediaFiles: allMediaFiles,
|
|
898
|
+
// Store all msgIds for reference
|
|
899
|
+
mergedMsgIds: sorted.map((p) => p.msgId),
|
|
900
|
+
// Keep fwdFrom only if main post is forwarded
|
|
901
|
+
fwdFrom: mainPost.fwdFrom
|
|
902
|
+
};
|
|
903
|
+
}
|
|
703
904
|
function cleanContent2(text) {
|
|
704
|
-
return text.replace(/##\s*Attachments\n(?:- [^\n]+\n?)*/g, "").replace(/- media\/\d+\/[^\n]+/g, "").replace(/#\w+@\w+/g, "").trim();
|
|
905
|
+
return text.replace(/##\s*Attachments\n(?:- [^\n]+\n?)*/g, "").replace(/- media\/\d+\/[^\n]+/g, "").replace(/!\[\]\(\/media\/\d+\/[^)]+\)/g, "").replace(/<video src="\/media\/\d+\/[^"]+"\s*[^>]*><\/video>/g, "").replace(/#\w+@\w+/g, "").trim();
|
|
705
906
|
}
|
|
706
907
|
function extractTitleAndBody(content) {
|
|
707
908
|
const lines = content.split("\n").filter((l) => l.trim());
|
|
@@ -716,39 +917,23 @@ async function processPost(post, options, exportDir) {
|
|
|
716
917
|
const { title: originalTitle, body: originalBody } = extractTitleAndBody(cleanedContent);
|
|
717
918
|
const date = post.date.toISOString().split("T")[0];
|
|
718
919
|
const languages = [];
|
|
719
|
-
const
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
return true;
|
|
729
|
-
}
|
|
730
|
-
}
|
|
920
|
+
const baseSlug = String(post.msgId);
|
|
921
|
+
const postDir = path2.join(outputDir, baseSlug);
|
|
922
|
+
const translationExistsInDir = (lang) => {
|
|
923
|
+
if (!fs2.existsSync(postDir)) return false;
|
|
924
|
+
const langFile = path2.join(postDir, `${lang}.md`);
|
|
925
|
+
if (fs2.existsSync(langFile)) {
|
|
926
|
+
const content = fs2.readFileSync(langFile, "utf-8");
|
|
927
|
+
if (content.includes(`original_link: "${post.link}"`)) {
|
|
928
|
+
return true;
|
|
731
929
|
}
|
|
732
|
-
} catch {
|
|
733
930
|
}
|
|
734
931
|
return false;
|
|
735
932
|
};
|
|
736
933
|
if (translate && translate.targetLangs.length > 0) {
|
|
737
934
|
const sourceLang = translate.sourceLang || "ru";
|
|
738
|
-
if (translate.keepOriginal && !translationExists(sourceLang)) {
|
|
739
|
-
languages.push({
|
|
740
|
-
lang: sourceLang,
|
|
741
|
-
title: originalTitle,
|
|
742
|
-
body: originalBody,
|
|
743
|
-
isOriginal: true
|
|
744
|
-
});
|
|
745
|
-
}
|
|
746
935
|
for (const targetLang of translate.targetLangs) {
|
|
747
936
|
if (targetLang === sourceLang) continue;
|
|
748
|
-
if (translationExists(targetLang)) {
|
|
749
|
-
onProgress?.(` ${targetLang}: already exists, skipping`);
|
|
750
|
-
continue;
|
|
751
|
-
}
|
|
752
937
|
onProgress?.(` Translating to ${targetLang}...`);
|
|
753
938
|
const translateOpts = {
|
|
754
939
|
apiKey: translate.apiKey,
|
|
@@ -759,18 +944,34 @@ async function processPost(post, options, exportDir) {
|
|
|
759
944
|
};
|
|
760
945
|
try {
|
|
761
946
|
const translatedTitle = await translateTitle(originalTitle, translateOpts);
|
|
947
|
+
if (originalBody) {
|
|
948
|
+
await new Promise((r) => setTimeout(r, 2e3));
|
|
949
|
+
}
|
|
762
950
|
const translatedBody = originalBody ? await translateContent(originalBody, translateOpts) : "";
|
|
763
951
|
languages.push({
|
|
764
952
|
lang: targetLang,
|
|
765
953
|
title: translatedTitle,
|
|
766
954
|
body: translatedBody,
|
|
767
|
-
isOriginal: false
|
|
955
|
+
isOriginal: false,
|
|
956
|
+
slug: generateEnglishSlug(translatedTitle)
|
|
957
|
+
// slug from translated title for URL
|
|
768
958
|
});
|
|
769
959
|
onProgress?.(` \u2192 ${targetLang}: "${translatedTitle.substring(0, 40)}..."`);
|
|
770
960
|
} catch (error) {
|
|
771
961
|
onProgress?.(` Error translating to ${targetLang}: ${error.message}`);
|
|
772
962
|
}
|
|
773
|
-
await new Promise((r) => setTimeout(r,
|
|
963
|
+
await new Promise((r) => setTimeout(r, 5e3));
|
|
964
|
+
}
|
|
965
|
+
if (translate.keepOriginal) {
|
|
966
|
+
const russianSlug = generateSlug2(originalTitle);
|
|
967
|
+
languages.push({
|
|
968
|
+
lang: sourceLang,
|
|
969
|
+
title: originalTitle,
|
|
970
|
+
body: originalBody,
|
|
971
|
+
isOriginal: true,
|
|
972
|
+
slug: russianSlug
|
|
973
|
+
// Custom slug for Russian URL
|
|
974
|
+
});
|
|
774
975
|
}
|
|
775
976
|
} else {
|
|
776
977
|
const defaultLang = translate?.sourceLang || "ru";
|
|
@@ -781,25 +982,21 @@ async function processPost(post, options, exportDir) {
|
|
|
781
982
|
isOriginal: true
|
|
782
983
|
});
|
|
783
984
|
}
|
|
784
|
-
for (const { lang, title, body, isOriginal } of languages) {
|
|
785
|
-
const
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
onProgress?.(` Skipping existing: ${lang}/${slug}`);
|
|
985
|
+
for (const { lang, title, body, isOriginal, slug: customSlug } of languages) {
|
|
986
|
+
const langFile = path2.join(postDir, `${lang}.md`);
|
|
987
|
+
if (fs2.existsSync(langFile) || translationExistsInDir(lang)) {
|
|
988
|
+
onProgress?.(` Skipping existing: ${baseSlug}/${lang}.md`);
|
|
789
989
|
continue;
|
|
790
990
|
}
|
|
791
991
|
fs2.mkdirSync(postDir, { recursive: true });
|
|
792
992
|
let finalBody = body;
|
|
793
993
|
const mediaFiles = [];
|
|
794
994
|
if (post.mediaFiles && post.mediaFiles.length > 0) {
|
|
795
|
-
const paddedId = String(post.msgId).padStart(6, "0");
|
|
796
|
-
const targetMediaDir = mediaDir ? path2.join(mediaDir, paddedId) : path2.join(outputDir, "..", "media", paddedId);
|
|
797
|
-
fs2.mkdirSync(targetMediaDir, { recursive: true });
|
|
798
995
|
for (let i = 0; i < post.mediaFiles.length; i++) {
|
|
799
996
|
const sourcePath = path2.join(exportDir, post.mediaFiles[i]);
|
|
800
997
|
const ext = path2.extname(sourcePath).toLowerCase() || ".jpg";
|
|
801
998
|
const newName = `image${i + 1}${ext}`;
|
|
802
|
-
const targetPath = path2.join(
|
|
999
|
+
const targetPath = path2.join(postDir, newName);
|
|
803
1000
|
if (fs2.existsSync(sourcePath)) {
|
|
804
1001
|
if (!fs2.existsSync(targetPath)) {
|
|
805
1002
|
fs2.copyFileSync(sourcePath, targetPath);
|
|
@@ -810,8 +1007,8 @@ async function processPost(post, options, exportDir) {
|
|
|
810
1007
|
if (mediaFiles.length > 0) {
|
|
811
1008
|
const images = mediaFiles.filter((f) => !f.match(/\.(mp4|mov|webm|m4v)$/i));
|
|
812
1009
|
const videos = mediaFiles.filter((f) => f.match(/\.(mp4|mov|webm|m4v)$/i));
|
|
813
|
-
const imageMarkdown = images.map((f) => ` => `<video src="
|
|
1010
|
+
const imageMarkdown = images.map((f) => ``).join("\n\n");
|
|
1011
|
+
const videoMarkdown = videos.map((f) => `<video src="${f}" controls></video>`).join("\n\n");
|
|
815
1012
|
const mediaMarkdown = [imageMarkdown, videoMarkdown].filter(Boolean).join("\n\n");
|
|
816
1013
|
if (mediaMarkdown) {
|
|
817
1014
|
finalBody = finalBody + "\n\n" + mediaMarkdown;
|
|
@@ -820,22 +1017,116 @@ async function processPost(post, options, exportDir) {
|
|
|
820
1017
|
}
|
|
821
1018
|
const replyLine = post.replyToMsgId ? `reply_to_msg_id: ${post.replyToMsgId}
|
|
822
1019
|
` : "";
|
|
1020
|
+
const slugLine = customSlug ? `slug: "${customSlug}"
|
|
1021
|
+
` : "";
|
|
1022
|
+
let fwdFromLine = "";
|
|
1023
|
+
let bodyWithForwardQuote = finalBody;
|
|
1024
|
+
if (post.fwdFrom) {
|
|
1025
|
+
const fwdFrom = post.fwdFrom;
|
|
1026
|
+
fwdFromLine = "fwd_from:\n";
|
|
1027
|
+
if (fwdFrom.fromName) {
|
|
1028
|
+
fwdFromLine += ` from_name: "${fwdFrom.fromName.replace(/"/g, '\\"')}"
|
|
1029
|
+
`;
|
|
1030
|
+
}
|
|
1031
|
+
if (fwdFrom.fromUsername) {
|
|
1032
|
+
fwdFromLine += ` from_username: "${fwdFrom.fromUsername}"
|
|
1033
|
+
`;
|
|
1034
|
+
}
|
|
1035
|
+
if (fwdFrom.postId) {
|
|
1036
|
+
fwdFromLine += ` post_id: ${fwdFrom.postId}
|
|
1037
|
+
`;
|
|
1038
|
+
}
|
|
1039
|
+
const quotedBody = finalBody.split("\n").map((line) => `> ${line}`).join("\n");
|
|
1040
|
+
const sourceLink = fwdFrom.fromUsername && fwdFrom.postId ? `https://t.me/${fwdFrom.fromUsername}/${fwdFrom.postId}` : fwdFrom.fromUsername ? `https://t.me/${fwdFrom.fromUsername}` : null;
|
|
1041
|
+
const sourceName = fwdFrom.fromName || fwdFrom.fromUsername || "Unknown";
|
|
1042
|
+
const attribution = sourceLink ? `\u2014 [${sourceName}](${sourceLink})` : `\u2014 ${sourceName}`;
|
|
1043
|
+
bodyWithForwardQuote = `${quotedBody}
|
|
1044
|
+
>
|
|
1045
|
+
> ${attribution}`;
|
|
1046
|
+
}
|
|
823
1047
|
const markdown = `---
|
|
824
1048
|
title: "${title.replace(/"/g, '\\"')}"
|
|
825
1049
|
date: ${date}
|
|
826
1050
|
lang: ${lang}
|
|
827
1051
|
original_link: "${post.link || ""}"
|
|
828
|
-
${replyLine}${isOriginal ? "" : `translated_from: "${translate?.sourceLang || "ru"}"
|
|
1052
|
+
${slugLine}${replyLine}${fwdFromLine}${isOriginal ? "" : `translated_from: "${translate?.sourceLang || "ru"}"
|
|
829
1053
|
`}---
|
|
830
1054
|
|
|
831
|
-
${
|
|
1055
|
+
${bodyWithForwardQuote}
|
|
832
1056
|
`;
|
|
833
|
-
fs2.writeFileSync(
|
|
834
|
-
createdPosts.push(`${
|
|
835
|
-
onProgress?.(` Created: ${
|
|
1057
|
+
fs2.writeFileSync(langFile, markdown);
|
|
1058
|
+
createdPosts.push(`${baseSlug}/${lang}.md`);
|
|
1059
|
+
onProgress?.(` Created: ${baseSlug}/${lang}.md`);
|
|
836
1060
|
}
|
|
837
1061
|
return createdPosts;
|
|
838
1062
|
}
|
|
1063
|
+
async function processFromFiles(options) {
|
|
1064
|
+
const { exportDir, outputDir, translate, onProgress, msgIds, force } = options;
|
|
1065
|
+
const ndjsonPath = path2.join(exportDir, "posts.ndjson");
|
|
1066
|
+
if (!fs2.existsSync(ndjsonPath)) {
|
|
1067
|
+
throw new Error(`posts.ndjson not found in ${exportDir}`);
|
|
1068
|
+
}
|
|
1069
|
+
fs2.mkdirSync(outputDir, { recursive: true });
|
|
1070
|
+
const lines = fs2.readFileSync(ndjsonPath, "utf-8").split("\n").filter((l) => l.trim());
|
|
1071
|
+
const posts = lines.map((line) => {
|
|
1072
|
+
const data = JSON.parse(line);
|
|
1073
|
+
return {
|
|
1074
|
+
...data,
|
|
1075
|
+
date: new Date(data.date)
|
|
1076
|
+
};
|
|
1077
|
+
});
|
|
1078
|
+
onProgress?.(`Found ${posts.length} posts in ${ndjsonPath}`);
|
|
1079
|
+
const postGroups = groupPostsByTime(posts, 5 * 60 * 1e3);
|
|
1080
|
+
onProgress?.(`Grouped into ${postGroups.length} post groups`);
|
|
1081
|
+
const mergedPosts = postGroups.map(
|
|
1082
|
+
(group) => group.length > 1 ? mergePostGroup(group) : group[0]
|
|
1083
|
+
);
|
|
1084
|
+
let processed = 0;
|
|
1085
|
+
let skipped = 0;
|
|
1086
|
+
for (const post of mergedPosts) {
|
|
1087
|
+
const postMsgIds = post.mergedMsgIds || [post.msgId];
|
|
1088
|
+
if (msgIds && msgIds.length > 0 && !postMsgIds.some((id) => msgIds.includes(id))) {
|
|
1089
|
+
continue;
|
|
1090
|
+
}
|
|
1091
|
+
const postId = `${post.channelUsername}-${post.msgId}`;
|
|
1092
|
+
if (!post.content && !post.hasMedia) {
|
|
1093
|
+
skipped++;
|
|
1094
|
+
continue;
|
|
1095
|
+
}
|
|
1096
|
+
onProgress?.(`Processing: ${postId}`);
|
|
1097
|
+
const processOptions = {
|
|
1098
|
+
apiId: 0,
|
|
1099
|
+
apiHash: "",
|
|
1100
|
+
channel: "",
|
|
1101
|
+
outputDir,
|
|
1102
|
+
translate,
|
|
1103
|
+
onProgress
|
|
1104
|
+
};
|
|
1105
|
+
try {
|
|
1106
|
+
if (force) {
|
|
1107
|
+
const postDir = path2.join(outputDir, String(post.msgId));
|
|
1108
|
+
if (fs2.existsSync(postDir)) {
|
|
1109
|
+
for (const file of fs2.readdirSync(postDir)) {
|
|
1110
|
+
if (file.endsWith(".md")) {
|
|
1111
|
+
fs2.unlinkSync(path2.join(postDir, file));
|
|
1112
|
+
}
|
|
1113
|
+
}
|
|
1114
|
+
}
|
|
1115
|
+
}
|
|
1116
|
+
const created = await processPost(post, processOptions, exportDir);
|
|
1117
|
+
if (created.length > 0) {
|
|
1118
|
+
processed++;
|
|
1119
|
+
} else {
|
|
1120
|
+
skipped++;
|
|
1121
|
+
}
|
|
1122
|
+
} catch (error) {
|
|
1123
|
+
onProgress?.(` Error: ${error.message}`);
|
|
1124
|
+
skipped++;
|
|
1125
|
+
}
|
|
1126
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
1127
|
+
}
|
|
1128
|
+
return { processed, skipped };
|
|
1129
|
+
}
|
|
839
1130
|
async function exportAndTranslate(options) {
|
|
840
1131
|
const {
|
|
841
1132
|
apiId,
|
|
@@ -871,9 +1162,14 @@ async function exportAndTranslate(options) {
|
|
|
871
1162
|
});
|
|
872
1163
|
onProgress?.(` Exported ${exportResult.posts.length} posts from "${exportResult.channelMeta.title}"`);
|
|
873
1164
|
onProgress?.("\nStep 2: Processing posts...");
|
|
1165
|
+
const postGroups = groupPostsByTime(exportResult.posts, 5 * 60 * 1e3);
|
|
1166
|
+
onProgress?.(` Grouped into ${postGroups.length} post groups`);
|
|
1167
|
+
const mergedPosts = postGroups.map(
|
|
1168
|
+
(group) => group.length > 1 ? mergePostGroup(group) : group[0]
|
|
1169
|
+
);
|
|
874
1170
|
let processed = 0;
|
|
875
1171
|
let skipped = 0;
|
|
876
|
-
for (const post of
|
|
1172
|
+
for (const post of mergedPosts) {
|
|
877
1173
|
const postId = `${post.channelUsername}-${post.msgId}`;
|
|
878
1174
|
if (!post.content && !post.hasMedia) {
|
|
879
1175
|
skipped++;
|
|
@@ -925,6 +1221,7 @@ Processing: ${postId}`);
|
|
|
925
1221
|
generateSlug,
|
|
926
1222
|
groupPosts,
|
|
927
1223
|
parsePost,
|
|
1224
|
+
processFromFiles,
|
|
928
1225
|
resumeExport,
|
|
929
1226
|
trackBookAppointment,
|
|
930
1227
|
trackGoal,
|
package/dist/index.mjs
CHANGED
|
@@ -276,7 +276,7 @@ Do not add any explanations or notes.`
|
|
|
276
276
|
let lastError = null;
|
|
277
277
|
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
278
278
|
if (attempt > 0) {
|
|
279
|
-
const delay =
|
|
279
|
+
const delay = 1e3 * Math.pow(2, attempt - 1);
|
|
280
280
|
await new Promise((r) => setTimeout(r, delay));
|
|
281
281
|
}
|
|
282
282
|
const response = await fetch(endpoint, {
|
|
@@ -313,44 +313,123 @@ function generateEnglishSlug(title) {
|
|
|
313
313
|
|
|
314
314
|
// src/telegram.ts
|
|
315
315
|
import { TelegramClient, Api } from "telegram";
|
|
316
|
-
import { StringSession } from "telegram/sessions";
|
|
316
|
+
import { StringSession } from "telegram/sessions/index.js";
|
|
317
317
|
import * as fs from "fs";
|
|
318
318
|
import * as path from "path";
|
|
319
319
|
import * as readline from "readline";
|
|
320
320
|
function entitiesToMarkdown(text, entities) {
|
|
321
321
|
if (!entities || entities.length === 0) return text;
|
|
322
|
-
const
|
|
323
|
-
|
|
322
|
+
const mergedEntities = [];
|
|
323
|
+
const processedIndices = /* @__PURE__ */ new Set();
|
|
324
|
+
const links = [];
|
|
325
|
+
for (const entity of entities) {
|
|
326
|
+
if (entity instanceof Api.MessageEntityTextUrl) {
|
|
327
|
+
links.push({
|
|
328
|
+
start: entity.offset,
|
|
329
|
+
end: entity.offset + entity.length,
|
|
330
|
+
entity
|
|
331
|
+
});
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
const formatEntities = entities.filter(
|
|
335
|
+
(e) => e instanceof Api.MessageEntityBold || e instanceof Api.MessageEntityItalic
|
|
336
|
+
);
|
|
337
|
+
for (let i = 0; i < formatEntities.length; i++) {
|
|
338
|
+
if (processedIndices.has(entities.indexOf(formatEntities[i]))) continue;
|
|
339
|
+
const current = formatEntities[i];
|
|
340
|
+
const currentEnd = current.offset + current.length;
|
|
341
|
+
const bridgingLink = links.find((l) => l.start === currentEnd);
|
|
342
|
+
if (bridgingLink) {
|
|
343
|
+
const nextEntity = formatEntities.find(
|
|
344
|
+
(e) => e !== current && e.constructor === current.constructor && e.offset === bridgingLink.end
|
|
345
|
+
);
|
|
346
|
+
if (nextEntity) {
|
|
347
|
+
const mergedLength = nextEntity.offset + nextEntity.length - current.offset;
|
|
348
|
+
if (current instanceof Api.MessageEntityBold) {
|
|
349
|
+
mergedEntities.push(new Api.MessageEntityBold({ offset: current.offset, length: mergedLength }));
|
|
350
|
+
} else if (current instanceof Api.MessageEntityItalic) {
|
|
351
|
+
mergedEntities.push(new Api.MessageEntityItalic({ offset: current.offset, length: mergedLength }));
|
|
352
|
+
}
|
|
353
|
+
processedIndices.add(entities.indexOf(current));
|
|
354
|
+
processedIndices.add(entities.indexOf(nextEntity));
|
|
355
|
+
continue;
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
if (!processedIndices.has(entities.indexOf(current))) {
|
|
359
|
+
mergedEntities.push(current);
|
|
360
|
+
processedIndices.add(entities.indexOf(current));
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
for (let i = 0; i < entities.length; i++) {
|
|
364
|
+
if (processedIndices.has(i)) continue;
|
|
365
|
+
const entity = entities[i];
|
|
366
|
+
if (!(entity instanceof Api.MessageEntityBold) && !(entity instanceof Api.MessageEntityItalic)) {
|
|
367
|
+
mergedEntities.push(entity);
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
const sorted = [...mergedEntities].sort((a, b) => {
|
|
371
|
+
if (a.offset !== b.offset) return a.offset - b.offset;
|
|
372
|
+
return b.length - a.length;
|
|
373
|
+
});
|
|
374
|
+
const markers = [];
|
|
324
375
|
for (const entity of sorted) {
|
|
325
376
|
const start = entity.offset;
|
|
326
377
|
const end = entity.offset + entity.length;
|
|
327
|
-
|
|
328
|
-
let
|
|
378
|
+
let startMark = "";
|
|
379
|
+
let endMark = "";
|
|
380
|
+
let priority = 0;
|
|
329
381
|
if (entity instanceof Api.MessageEntityBold) {
|
|
330
|
-
|
|
382
|
+
startMark = "**";
|
|
383
|
+
endMark = "**";
|
|
384
|
+
priority = 1;
|
|
331
385
|
} else if (entity instanceof Api.MessageEntityItalic) {
|
|
332
|
-
|
|
386
|
+
startMark = "*";
|
|
387
|
+
endMark = "*";
|
|
388
|
+
priority = 1;
|
|
333
389
|
} else if (entity instanceof Api.MessageEntityCode) {
|
|
334
|
-
|
|
390
|
+
startMark = "`";
|
|
391
|
+
endMark = "`";
|
|
392
|
+
priority = 2;
|
|
335
393
|
} else if (entity instanceof Api.MessageEntityPre) {
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
394
|
+
startMark = "```\n";
|
|
395
|
+
endMark = "\n```";
|
|
396
|
+
priority = 2;
|
|
339
397
|
} else if (entity instanceof Api.MessageEntityStrike) {
|
|
340
|
-
|
|
398
|
+
startMark = "~~";
|
|
399
|
+
endMark = "~~";
|
|
400
|
+
priority = 1;
|
|
341
401
|
} else if (entity instanceof Api.MessageEntityUnderline) {
|
|
342
|
-
|
|
402
|
+
startMark = "**";
|
|
403
|
+
endMark = "**";
|
|
404
|
+
priority = 1;
|
|
343
405
|
} else if (entity instanceof Api.MessageEntityTextUrl) {
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
}
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
406
|
+
startMark = "[";
|
|
407
|
+
endMark = `](${entity.url})`;
|
|
408
|
+
priority = 10;
|
|
409
|
+
}
|
|
410
|
+
if (startMark) {
|
|
411
|
+
markers.push({ pos: start, insert: startMark, priority });
|
|
412
|
+
markers.push({ pos: end, insert: endMark, priority: -priority });
|
|
351
413
|
}
|
|
352
|
-
result = result.substring(0, start) + replacement + result.substring(end);
|
|
353
414
|
}
|
|
415
|
+
markers.sort((a, b) => {
|
|
416
|
+
if (a.pos !== b.pos) return a.pos - b.pos;
|
|
417
|
+
if (a.priority < 0 && b.priority < 0) {
|
|
418
|
+
return a.priority - b.priority;
|
|
419
|
+
}
|
|
420
|
+
if (a.priority > 0 && b.priority > 0) {
|
|
421
|
+
return b.priority - a.priority;
|
|
422
|
+
}
|
|
423
|
+
return a.priority - b.priority;
|
|
424
|
+
});
|
|
425
|
+
let result = "";
|
|
426
|
+
let lastPos = 0;
|
|
427
|
+
for (const marker of markers) {
|
|
428
|
+
result += text.substring(lastPos, marker.pos) + marker.insert;
|
|
429
|
+
lastPos = marker.pos;
|
|
430
|
+
}
|
|
431
|
+
result += text.substring(lastPos);
|
|
432
|
+
result = result.replace(/(\S) \*\*/g, "$1** ").replace(/\*\* (\S)/g, " **$1").replace(/(\S) \*/g, "$1* ").replace(/\* (\S)/g, " *$1");
|
|
354
433
|
return result;
|
|
355
434
|
}
|
|
356
435
|
async function defaultReadline(prompt) {
|
|
@@ -522,6 +601,43 @@ async function exportTelegramChannel(options) {
|
|
|
522
601
|
if (message.replyTo && "replyToMsgId" in message.replyTo) {
|
|
523
602
|
replyToMsgId = message.replyTo.replyToMsgId;
|
|
524
603
|
}
|
|
604
|
+
let fwdFrom;
|
|
605
|
+
if (message.fwdFrom) {
|
|
606
|
+
fwdFrom = {};
|
|
607
|
+
if (message.fwdFrom.date) {
|
|
608
|
+
fwdFrom.date = new Date(message.fwdFrom.date * 1e3);
|
|
609
|
+
}
|
|
610
|
+
const fromId = message.fwdFrom.fromId;
|
|
611
|
+
if (fromId) {
|
|
612
|
+
if (fromId instanceof Api.PeerChannel) {
|
|
613
|
+
fwdFrom.fromId = fromId.channelId.toJSNumber();
|
|
614
|
+
try {
|
|
615
|
+
const channelEntity = await client.getEntity(fromId.channelId);
|
|
616
|
+
if (channelEntity instanceof Api.Channel) {
|
|
617
|
+
fwdFrom.fromUsername = channelEntity.username || void 0;
|
|
618
|
+
fwdFrom.fromName = channelEntity.title;
|
|
619
|
+
}
|
|
620
|
+
} catch (e) {
|
|
621
|
+
}
|
|
622
|
+
} else if (fromId instanceof Api.PeerUser) {
|
|
623
|
+
fwdFrom.fromId = fromId.userId.toJSNumber();
|
|
624
|
+
try {
|
|
625
|
+
const userEntity = await client.getEntity(fromId.userId);
|
|
626
|
+
if (userEntity instanceof Api.User) {
|
|
627
|
+
fwdFrom.fromUsername = userEntity.username || void 0;
|
|
628
|
+
fwdFrom.fromName = [userEntity.firstName, userEntity.lastName].filter(Boolean).join(" ");
|
|
629
|
+
}
|
|
630
|
+
} catch (e) {
|
|
631
|
+
}
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
if (!fwdFrom.fromName && message.fwdFrom.fromName) {
|
|
635
|
+
fwdFrom.fromName = message.fwdFrom.fromName;
|
|
636
|
+
}
|
|
637
|
+
if (message.fwdFrom.channelPost) {
|
|
638
|
+
fwdFrom.postId = message.fwdFrom.channelPost;
|
|
639
|
+
}
|
|
640
|
+
}
|
|
525
641
|
const post = {
|
|
526
642
|
msgId,
|
|
527
643
|
date: new Date(message.date * 1e3),
|
|
@@ -533,7 +649,8 @@ async function exportTelegramChannel(options) {
|
|
|
533
649
|
link,
|
|
534
650
|
channelUsername: channelMeta.username,
|
|
535
651
|
channelTitle: channelMeta.title,
|
|
536
|
-
replyToMsgId
|
|
652
|
+
replyToMsgId,
|
|
653
|
+
fwdFrom
|
|
537
654
|
};
|
|
538
655
|
posts.push(post);
|
|
539
656
|
const markdown = formatPostMarkdown(post);
|
|
@@ -571,6 +688,29 @@ forwards: ${post.forwards}`;
|
|
|
571
688
|
frontmatter += `
|
|
572
689
|
reply_to_msg_id: ${post.replyToMsgId}`;
|
|
573
690
|
}
|
|
691
|
+
if (post.fwdFrom) {
|
|
692
|
+
frontmatter += "\nfwd_from:";
|
|
693
|
+
if (post.fwdFrom.fromName) {
|
|
694
|
+
frontmatter += `
|
|
695
|
+
from_name: "${post.fwdFrom.fromName.replace(/"/g, '\\"')}"`;
|
|
696
|
+
}
|
|
697
|
+
if (post.fwdFrom.fromUsername) {
|
|
698
|
+
frontmatter += `
|
|
699
|
+
from_username: "${post.fwdFrom.fromUsername}"`;
|
|
700
|
+
}
|
|
701
|
+
if (post.fwdFrom.fromId) {
|
|
702
|
+
frontmatter += `
|
|
703
|
+
from_id: ${post.fwdFrom.fromId}`;
|
|
704
|
+
}
|
|
705
|
+
if (post.fwdFrom.postId) {
|
|
706
|
+
frontmatter += `
|
|
707
|
+
post_id: ${post.fwdFrom.postId}`;
|
|
708
|
+
}
|
|
709
|
+
if (post.fwdFrom.date) {
|
|
710
|
+
frontmatter += `
|
|
711
|
+
date: ${post.fwdFrom.date.toISOString()}`;
|
|
712
|
+
}
|
|
713
|
+
}
|
|
574
714
|
frontmatter += "\n---\n\n";
|
|
575
715
|
let body = post.content || "";
|
|
576
716
|
if (post.mediaFiles.length > 0) {
|
|
@@ -643,8 +783,68 @@ function generateSlug2(text, lang) {
|
|
|
643
783
|
}
|
|
644
784
|
return processed.toLowerCase().replace(/[^a-z0-9\s-]/g, "").replace(/\s+/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "").substring(0, 60) || "untitled";
|
|
645
785
|
}
|
|
786
|
+
function groupPostsByTime(posts, timeWindowMs = 5 * 60 * 1e3) {
|
|
787
|
+
const sortedPosts = [...posts].sort(
|
|
788
|
+
(a, b) => new Date(a.date).getTime() - new Date(b.date).getTime()
|
|
789
|
+
);
|
|
790
|
+
const groups = [];
|
|
791
|
+
let currentGroup = [];
|
|
792
|
+
let lastDate = null;
|
|
793
|
+
for (const post of sortedPosts) {
|
|
794
|
+
const postDate = new Date(post.date);
|
|
795
|
+
if (lastDate && Math.abs(postDate.getTime() - lastDate.getTime()) <= timeWindowMs) {
|
|
796
|
+
currentGroup.push(post);
|
|
797
|
+
} else {
|
|
798
|
+
if (currentGroup.length > 0) {
|
|
799
|
+
groups.push(currentGroup);
|
|
800
|
+
}
|
|
801
|
+
currentGroup = [post];
|
|
802
|
+
}
|
|
803
|
+
lastDate = postDate;
|
|
804
|
+
}
|
|
805
|
+
if (currentGroup.length > 0) {
|
|
806
|
+
groups.push(currentGroup);
|
|
807
|
+
}
|
|
808
|
+
return groups;
|
|
809
|
+
}
|
|
810
|
+
function mergePostGroup(posts) {
|
|
811
|
+
const sorted = [...posts].sort(
|
|
812
|
+
(a, b) => new Date(a.date).getTime() - new Date(b.date).getTime()
|
|
813
|
+
);
|
|
814
|
+
const mainPostIndex = sorted.findIndex((p) => !p.fwdFrom);
|
|
815
|
+
const mainPost = mainPostIndex >= 0 ? sorted[mainPostIndex] : sorted[0];
|
|
816
|
+
const contentParts = [];
|
|
817
|
+
for (const p of sorted) {
|
|
818
|
+
if (!p.content && !p.fwdFrom) continue;
|
|
819
|
+
if (p.fwdFrom && p === mainPost) {
|
|
820
|
+
contentParts.push(p.content || "");
|
|
821
|
+
} else if (p.fwdFrom) {
|
|
822
|
+
const fwd = p.fwdFrom;
|
|
823
|
+
const quotedContent = (p.content || "").split("\n").map((line) => `> ${line}`).join("\n");
|
|
824
|
+
const sourceLink = fwd.fromUsername && fwd.postId ? `https://t.me/${fwd.fromUsername}/${fwd.postId}` : fwd.fromUsername ? `https://t.me/${fwd.fromUsername}` : null;
|
|
825
|
+
const sourceName = fwd.fromName || fwd.fromUsername || "Unknown";
|
|
826
|
+
const attribution = sourceLink ? `\u2014 [${sourceName}](${sourceLink})` : `\u2014 ${sourceName}`;
|
|
827
|
+
contentParts.push(`${quotedContent}
|
|
828
|
+
>
|
|
829
|
+
> ${attribution}`);
|
|
830
|
+
} else {
|
|
831
|
+
contentParts.push(p.content || "");
|
|
832
|
+
}
|
|
833
|
+
}
|
|
834
|
+
const allMediaFiles = sorted.flatMap((p) => p.mediaFiles || []);
|
|
835
|
+
return {
|
|
836
|
+
...mainPost,
|
|
837
|
+
content: contentParts.filter(Boolean).join("\n\n"),
|
|
838
|
+
hasMedia: sorted.some((p) => p.hasMedia),
|
|
839
|
+
mediaFiles: allMediaFiles,
|
|
840
|
+
// Store all msgIds for reference
|
|
841
|
+
mergedMsgIds: sorted.map((p) => p.msgId),
|
|
842
|
+
// Keep fwdFrom only if main post is forwarded
|
|
843
|
+
fwdFrom: mainPost.fwdFrom
|
|
844
|
+
};
|
|
845
|
+
}
|
|
646
846
|
function cleanContent2(text) {
|
|
647
|
-
return text.replace(/##\s*Attachments\n(?:- [^\n]+\n?)*/g, "").replace(/- media\/\d+\/[^\n]+/g, "").replace(/#\w+@\w+/g, "").trim();
|
|
847
|
+
return text.replace(/##\s*Attachments\n(?:- [^\n]+\n?)*/g, "").replace(/- media\/\d+\/[^\n]+/g, "").replace(/!\[\]\(\/media\/\d+\/[^)]+\)/g, "").replace(/<video src="\/media\/\d+\/[^"]+"\s*[^>]*><\/video>/g, "").replace(/#\w+@\w+/g, "").trim();
|
|
648
848
|
}
|
|
649
849
|
function extractTitleAndBody(content) {
|
|
650
850
|
const lines = content.split("\n").filter((l) => l.trim());
|
|
@@ -659,39 +859,23 @@ async function processPost(post, options, exportDir) {
|
|
|
659
859
|
const { title: originalTitle, body: originalBody } = extractTitleAndBody(cleanedContent);
|
|
660
860
|
const date = post.date.toISOString().split("T")[0];
|
|
661
861
|
const languages = [];
|
|
662
|
-
const
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
return true;
|
|
672
|
-
}
|
|
673
|
-
}
|
|
862
|
+
const baseSlug = String(post.msgId);
|
|
863
|
+
const postDir = path2.join(outputDir, baseSlug);
|
|
864
|
+
const translationExistsInDir = (lang) => {
|
|
865
|
+
if (!fs2.existsSync(postDir)) return false;
|
|
866
|
+
const langFile = path2.join(postDir, `${lang}.md`);
|
|
867
|
+
if (fs2.existsSync(langFile)) {
|
|
868
|
+
const content = fs2.readFileSync(langFile, "utf-8");
|
|
869
|
+
if (content.includes(`original_link: "${post.link}"`)) {
|
|
870
|
+
return true;
|
|
674
871
|
}
|
|
675
|
-
} catch {
|
|
676
872
|
}
|
|
677
873
|
return false;
|
|
678
874
|
};
|
|
679
875
|
if (translate && translate.targetLangs.length > 0) {
|
|
680
876
|
const sourceLang = translate.sourceLang || "ru";
|
|
681
|
-
if (translate.keepOriginal && !translationExists(sourceLang)) {
|
|
682
|
-
languages.push({
|
|
683
|
-
lang: sourceLang,
|
|
684
|
-
title: originalTitle,
|
|
685
|
-
body: originalBody,
|
|
686
|
-
isOriginal: true
|
|
687
|
-
});
|
|
688
|
-
}
|
|
689
877
|
for (const targetLang of translate.targetLangs) {
|
|
690
878
|
if (targetLang === sourceLang) continue;
|
|
691
|
-
if (translationExists(targetLang)) {
|
|
692
|
-
onProgress?.(` ${targetLang}: already exists, skipping`);
|
|
693
|
-
continue;
|
|
694
|
-
}
|
|
695
879
|
onProgress?.(` Translating to ${targetLang}...`);
|
|
696
880
|
const translateOpts = {
|
|
697
881
|
apiKey: translate.apiKey,
|
|
@@ -702,18 +886,34 @@ async function processPost(post, options, exportDir) {
|
|
|
702
886
|
};
|
|
703
887
|
try {
|
|
704
888
|
const translatedTitle = await translateTitle(originalTitle, translateOpts);
|
|
889
|
+
if (originalBody) {
|
|
890
|
+
await new Promise((r) => setTimeout(r, 2e3));
|
|
891
|
+
}
|
|
705
892
|
const translatedBody = originalBody ? await translateContent(originalBody, translateOpts) : "";
|
|
706
893
|
languages.push({
|
|
707
894
|
lang: targetLang,
|
|
708
895
|
title: translatedTitle,
|
|
709
896
|
body: translatedBody,
|
|
710
|
-
isOriginal: false
|
|
897
|
+
isOriginal: false,
|
|
898
|
+
slug: generateEnglishSlug(translatedTitle)
|
|
899
|
+
// slug from translated title for URL
|
|
711
900
|
});
|
|
712
901
|
onProgress?.(` \u2192 ${targetLang}: "${translatedTitle.substring(0, 40)}..."`);
|
|
713
902
|
} catch (error) {
|
|
714
903
|
onProgress?.(` Error translating to ${targetLang}: ${error.message}`);
|
|
715
904
|
}
|
|
716
|
-
await new Promise((r) => setTimeout(r,
|
|
905
|
+
await new Promise((r) => setTimeout(r, 5e3));
|
|
906
|
+
}
|
|
907
|
+
if (translate.keepOriginal) {
|
|
908
|
+
const russianSlug = generateSlug2(originalTitle);
|
|
909
|
+
languages.push({
|
|
910
|
+
lang: sourceLang,
|
|
911
|
+
title: originalTitle,
|
|
912
|
+
body: originalBody,
|
|
913
|
+
isOriginal: true,
|
|
914
|
+
slug: russianSlug
|
|
915
|
+
// Custom slug for Russian URL
|
|
916
|
+
});
|
|
717
917
|
}
|
|
718
918
|
} else {
|
|
719
919
|
const defaultLang = translate?.sourceLang || "ru";
|
|
@@ -724,25 +924,21 @@ async function processPost(post, options, exportDir) {
|
|
|
724
924
|
isOriginal: true
|
|
725
925
|
});
|
|
726
926
|
}
|
|
727
|
-
for (const { lang, title, body, isOriginal } of languages) {
|
|
728
|
-
const
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
onProgress?.(` Skipping existing: ${lang}/${slug}`);
|
|
927
|
+
for (const { lang, title, body, isOriginal, slug: customSlug } of languages) {
|
|
928
|
+
const langFile = path2.join(postDir, `${lang}.md`);
|
|
929
|
+
if (fs2.existsSync(langFile) || translationExistsInDir(lang)) {
|
|
930
|
+
onProgress?.(` Skipping existing: ${baseSlug}/${lang}.md`);
|
|
732
931
|
continue;
|
|
733
932
|
}
|
|
734
933
|
fs2.mkdirSync(postDir, { recursive: true });
|
|
735
934
|
let finalBody = body;
|
|
736
935
|
const mediaFiles = [];
|
|
737
936
|
if (post.mediaFiles && post.mediaFiles.length > 0) {
|
|
738
|
-
const paddedId = String(post.msgId).padStart(6, "0");
|
|
739
|
-
const targetMediaDir = mediaDir ? path2.join(mediaDir, paddedId) : path2.join(outputDir, "..", "media", paddedId);
|
|
740
|
-
fs2.mkdirSync(targetMediaDir, { recursive: true });
|
|
741
937
|
for (let i = 0; i < post.mediaFiles.length; i++) {
|
|
742
938
|
const sourcePath = path2.join(exportDir, post.mediaFiles[i]);
|
|
743
939
|
const ext = path2.extname(sourcePath).toLowerCase() || ".jpg";
|
|
744
940
|
const newName = `image${i + 1}${ext}`;
|
|
745
|
-
const targetPath = path2.join(
|
|
941
|
+
const targetPath = path2.join(postDir, newName);
|
|
746
942
|
if (fs2.existsSync(sourcePath)) {
|
|
747
943
|
if (!fs2.existsSync(targetPath)) {
|
|
748
944
|
fs2.copyFileSync(sourcePath, targetPath);
|
|
@@ -753,8 +949,8 @@ async function processPost(post, options, exportDir) {
|
|
|
753
949
|
if (mediaFiles.length > 0) {
|
|
754
950
|
const images = mediaFiles.filter((f) => !f.match(/\.(mp4|mov|webm|m4v)$/i));
|
|
755
951
|
const videos = mediaFiles.filter((f) => f.match(/\.(mp4|mov|webm|m4v)$/i));
|
|
756
|
-
const imageMarkdown = images.map((f) => ` => `<video src="
|
|
952
|
+
const imageMarkdown = images.map((f) => ``).join("\n\n");
|
|
953
|
+
const videoMarkdown = videos.map((f) => `<video src="${f}" controls></video>`).join("\n\n");
|
|
758
954
|
const mediaMarkdown = [imageMarkdown, videoMarkdown].filter(Boolean).join("\n\n");
|
|
759
955
|
if (mediaMarkdown) {
|
|
760
956
|
finalBody = finalBody + "\n\n" + mediaMarkdown;
|
|
@@ -763,22 +959,116 @@ async function processPost(post, options, exportDir) {
|
|
|
763
959
|
}
|
|
764
960
|
const replyLine = post.replyToMsgId ? `reply_to_msg_id: ${post.replyToMsgId}
|
|
765
961
|
` : "";
|
|
962
|
+
const slugLine = customSlug ? `slug: "${customSlug}"
|
|
963
|
+
` : "";
|
|
964
|
+
let fwdFromLine = "";
|
|
965
|
+
let bodyWithForwardQuote = finalBody;
|
|
966
|
+
if (post.fwdFrom) {
|
|
967
|
+
const fwdFrom = post.fwdFrom;
|
|
968
|
+
fwdFromLine = "fwd_from:\n";
|
|
969
|
+
if (fwdFrom.fromName) {
|
|
970
|
+
fwdFromLine += ` from_name: "${fwdFrom.fromName.replace(/"/g, '\\"')}"
|
|
971
|
+
`;
|
|
972
|
+
}
|
|
973
|
+
if (fwdFrom.fromUsername) {
|
|
974
|
+
fwdFromLine += ` from_username: "${fwdFrom.fromUsername}"
|
|
975
|
+
`;
|
|
976
|
+
}
|
|
977
|
+
if (fwdFrom.postId) {
|
|
978
|
+
fwdFromLine += ` post_id: ${fwdFrom.postId}
|
|
979
|
+
`;
|
|
980
|
+
}
|
|
981
|
+
const quotedBody = finalBody.split("\n").map((line) => `> ${line}`).join("\n");
|
|
982
|
+
const sourceLink = fwdFrom.fromUsername && fwdFrom.postId ? `https://t.me/${fwdFrom.fromUsername}/${fwdFrom.postId}` : fwdFrom.fromUsername ? `https://t.me/${fwdFrom.fromUsername}` : null;
|
|
983
|
+
const sourceName = fwdFrom.fromName || fwdFrom.fromUsername || "Unknown";
|
|
984
|
+
const attribution = sourceLink ? `\u2014 [${sourceName}](${sourceLink})` : `\u2014 ${sourceName}`;
|
|
985
|
+
bodyWithForwardQuote = `${quotedBody}
|
|
986
|
+
>
|
|
987
|
+
> ${attribution}`;
|
|
988
|
+
}
|
|
766
989
|
const markdown = `---
|
|
767
990
|
title: "${title.replace(/"/g, '\\"')}"
|
|
768
991
|
date: ${date}
|
|
769
992
|
lang: ${lang}
|
|
770
993
|
original_link: "${post.link || ""}"
|
|
771
|
-
${replyLine}${isOriginal ? "" : `translated_from: "${translate?.sourceLang || "ru"}"
|
|
994
|
+
${slugLine}${replyLine}${fwdFromLine}${isOriginal ? "" : `translated_from: "${translate?.sourceLang || "ru"}"
|
|
772
995
|
`}---
|
|
773
996
|
|
|
774
|
-
${
|
|
997
|
+
${bodyWithForwardQuote}
|
|
775
998
|
`;
|
|
776
|
-
fs2.writeFileSync(
|
|
777
|
-
createdPosts.push(`${
|
|
778
|
-
onProgress?.(` Created: ${
|
|
999
|
+
fs2.writeFileSync(langFile, markdown);
|
|
1000
|
+
createdPosts.push(`${baseSlug}/${lang}.md`);
|
|
1001
|
+
onProgress?.(` Created: ${baseSlug}/${lang}.md`);
|
|
779
1002
|
}
|
|
780
1003
|
return createdPosts;
|
|
781
1004
|
}
|
|
1005
|
+
async function processFromFiles(options) {
|
|
1006
|
+
const { exportDir, outputDir, translate, onProgress, msgIds, force } = options;
|
|
1007
|
+
const ndjsonPath = path2.join(exportDir, "posts.ndjson");
|
|
1008
|
+
if (!fs2.existsSync(ndjsonPath)) {
|
|
1009
|
+
throw new Error(`posts.ndjson not found in ${exportDir}`);
|
|
1010
|
+
}
|
|
1011
|
+
fs2.mkdirSync(outputDir, { recursive: true });
|
|
1012
|
+
const lines = fs2.readFileSync(ndjsonPath, "utf-8").split("\n").filter((l) => l.trim());
|
|
1013
|
+
const posts = lines.map((line) => {
|
|
1014
|
+
const data = JSON.parse(line);
|
|
1015
|
+
return {
|
|
1016
|
+
...data,
|
|
1017
|
+
date: new Date(data.date)
|
|
1018
|
+
};
|
|
1019
|
+
});
|
|
1020
|
+
onProgress?.(`Found ${posts.length} posts in ${ndjsonPath}`);
|
|
1021
|
+
const postGroups = groupPostsByTime(posts, 5 * 60 * 1e3);
|
|
1022
|
+
onProgress?.(`Grouped into ${postGroups.length} post groups`);
|
|
1023
|
+
const mergedPosts = postGroups.map(
|
|
1024
|
+
(group) => group.length > 1 ? mergePostGroup(group) : group[0]
|
|
1025
|
+
);
|
|
1026
|
+
let processed = 0;
|
|
1027
|
+
let skipped = 0;
|
|
1028
|
+
for (const post of mergedPosts) {
|
|
1029
|
+
const postMsgIds = post.mergedMsgIds || [post.msgId];
|
|
1030
|
+
if (msgIds && msgIds.length > 0 && !postMsgIds.some((id) => msgIds.includes(id))) {
|
|
1031
|
+
continue;
|
|
1032
|
+
}
|
|
1033
|
+
const postId = `${post.channelUsername}-${post.msgId}`;
|
|
1034
|
+
if (!post.content && !post.hasMedia) {
|
|
1035
|
+
skipped++;
|
|
1036
|
+
continue;
|
|
1037
|
+
}
|
|
1038
|
+
onProgress?.(`Processing: ${postId}`);
|
|
1039
|
+
const processOptions = {
|
|
1040
|
+
apiId: 0,
|
|
1041
|
+
apiHash: "",
|
|
1042
|
+
channel: "",
|
|
1043
|
+
outputDir,
|
|
1044
|
+
translate,
|
|
1045
|
+
onProgress
|
|
1046
|
+
};
|
|
1047
|
+
try {
|
|
1048
|
+
if (force) {
|
|
1049
|
+
const postDir = path2.join(outputDir, String(post.msgId));
|
|
1050
|
+
if (fs2.existsSync(postDir)) {
|
|
1051
|
+
for (const file of fs2.readdirSync(postDir)) {
|
|
1052
|
+
if (file.endsWith(".md")) {
|
|
1053
|
+
fs2.unlinkSync(path2.join(postDir, file));
|
|
1054
|
+
}
|
|
1055
|
+
}
|
|
1056
|
+
}
|
|
1057
|
+
}
|
|
1058
|
+
const created = await processPost(post, processOptions, exportDir);
|
|
1059
|
+
if (created.length > 0) {
|
|
1060
|
+
processed++;
|
|
1061
|
+
} else {
|
|
1062
|
+
skipped++;
|
|
1063
|
+
}
|
|
1064
|
+
} catch (error) {
|
|
1065
|
+
onProgress?.(` Error: ${error.message}`);
|
|
1066
|
+
skipped++;
|
|
1067
|
+
}
|
|
1068
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
1069
|
+
}
|
|
1070
|
+
return { processed, skipped };
|
|
1071
|
+
}
|
|
782
1072
|
async function exportAndTranslate(options) {
|
|
783
1073
|
const {
|
|
784
1074
|
apiId,
|
|
@@ -814,9 +1104,14 @@ async function exportAndTranslate(options) {
|
|
|
814
1104
|
});
|
|
815
1105
|
onProgress?.(` Exported ${exportResult.posts.length} posts from "${exportResult.channelMeta.title}"`);
|
|
816
1106
|
onProgress?.("\nStep 2: Processing posts...");
|
|
1107
|
+
const postGroups = groupPostsByTime(exportResult.posts, 5 * 60 * 1e3);
|
|
1108
|
+
onProgress?.(` Grouped into ${postGroups.length} post groups`);
|
|
1109
|
+
const mergedPosts = postGroups.map(
|
|
1110
|
+
(group) => group.length > 1 ? mergePostGroup(group) : group[0]
|
|
1111
|
+
);
|
|
817
1112
|
let processed = 0;
|
|
818
1113
|
let skipped = 0;
|
|
819
|
-
for (const post of
|
|
1114
|
+
for (const post of mergedPosts) {
|
|
820
1115
|
const postId = `${post.channelUsername}-${post.msgId}`;
|
|
821
1116
|
if (!post.content && !post.hasMedia) {
|
|
822
1117
|
skipped++;
|
|
@@ -867,6 +1162,7 @@ export {
|
|
|
867
1162
|
generateSlug,
|
|
868
1163
|
groupPosts,
|
|
869
1164
|
parsePost,
|
|
1165
|
+
processFromFiles,
|
|
870
1166
|
resumeExport,
|
|
871
1167
|
trackBookAppointment,
|
|
872
1168
|
trackGoal,
|