koztv-blog-tools 1.2.4 → 1.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +91 -27
- package/dist/index.mjs +91 -27
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -374,6 +374,42 @@ var import_sessions = require("telegram/sessions");
|
|
|
374
374
|
var fs = __toESM(require("fs"));
|
|
375
375
|
var path = __toESM(require("path"));
|
|
376
376
|
var readline = __toESM(require("readline"));
|
|
377
|
+
function entitiesToMarkdown(text, entities) {
|
|
378
|
+
if (!entities || entities.length === 0) return text;
|
|
379
|
+
const sorted = [...entities].sort((a, b) => b.offset - a.offset);
|
|
380
|
+
let result = text;
|
|
381
|
+
for (const entity of sorted) {
|
|
382
|
+
const start = entity.offset;
|
|
383
|
+
const end = entity.offset + entity.length;
|
|
384
|
+
const content = result.substring(start, end);
|
|
385
|
+
let replacement = content;
|
|
386
|
+
if (entity instanceof import_telegram.Api.MessageEntityBold) {
|
|
387
|
+
replacement = `**${content}**`;
|
|
388
|
+
} else if (entity instanceof import_telegram.Api.MessageEntityItalic) {
|
|
389
|
+
replacement = `*${content}*`;
|
|
390
|
+
} else if (entity instanceof import_telegram.Api.MessageEntityCode) {
|
|
391
|
+
replacement = `\`${content}\``;
|
|
392
|
+
} else if (entity instanceof import_telegram.Api.MessageEntityPre) {
|
|
393
|
+
replacement = `\`\`\`
|
|
394
|
+
${content}
|
|
395
|
+
\`\`\``;
|
|
396
|
+
} else if (entity instanceof import_telegram.Api.MessageEntityStrike) {
|
|
397
|
+
replacement = `~~${content}~~`;
|
|
398
|
+
} else if (entity instanceof import_telegram.Api.MessageEntityUnderline) {
|
|
399
|
+
replacement = `**${content}**`;
|
|
400
|
+
} else if (entity instanceof import_telegram.Api.MessageEntityTextUrl) {
|
|
401
|
+
replacement = `[${content}](${entity.url})`;
|
|
402
|
+
} else if (entity instanceof import_telegram.Api.MessageEntityUrl) {
|
|
403
|
+
replacement = content;
|
|
404
|
+
} else if (entity instanceof import_telegram.Api.MessageEntityMention) {
|
|
405
|
+
replacement = content;
|
|
406
|
+
} else if (entity instanceof import_telegram.Api.MessageEntityHashtag) {
|
|
407
|
+
replacement = content;
|
|
408
|
+
}
|
|
409
|
+
result = result.substring(0, start) + replacement + result.substring(end);
|
|
410
|
+
}
|
|
411
|
+
return result;
|
|
412
|
+
}
|
|
377
413
|
async function defaultReadline(prompt) {
|
|
378
414
|
const rl = readline.createInterface({
|
|
379
415
|
input: process.stdin,
|
|
@@ -497,40 +533,47 @@ async function exportTelegramChannel(options) {
|
|
|
497
533
|
const postMediaDir = path.join(mediaDir, paddedId);
|
|
498
534
|
const mediaFiles = [];
|
|
499
535
|
if (downloadMedia && message.media) {
|
|
500
|
-
fs.
|
|
501
|
-
|
|
502
|
-
const
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
536
|
+
const existingFiles = fs.existsSync(postMediaDir) ? fs.readdirSync(postMediaDir).filter((f) => f.startsWith("media.")) : [];
|
|
537
|
+
if (existingFiles.length > 0) {
|
|
538
|
+
for (const f of existingFiles) {
|
|
539
|
+
mediaFiles.push(`media/${paddedId}/${f}`);
|
|
540
|
+
}
|
|
541
|
+
} else {
|
|
542
|
+
fs.mkdirSync(postMediaDir, { recursive: true });
|
|
543
|
+
try {
|
|
544
|
+
const buffer = await client.downloadMedia(message.media, {});
|
|
545
|
+
if (buffer) {
|
|
546
|
+
let ext = ".bin";
|
|
547
|
+
if (message.media instanceof import_telegram.Api.MessageMediaPhoto) {
|
|
548
|
+
ext = ".jpg";
|
|
549
|
+
} else if (message.media instanceof import_telegram.Api.MessageMediaDocument) {
|
|
550
|
+
const doc = message.media.document;
|
|
551
|
+
if (doc instanceof import_telegram.Api.Document) {
|
|
552
|
+
const mimeExt = doc.mimeType?.split("/")[1];
|
|
553
|
+
if (mimeExt) {
|
|
554
|
+
ext = "." + mimeExt.replace("jpeg", "jpg");
|
|
517
555
|
}
|
|
518
|
-
|
|
519
|
-
|
|
556
|
+
for (const attr of doc.attributes) {
|
|
557
|
+
if (attr instanceof import_telegram.Api.DocumentAttributeVideo) {
|
|
558
|
+
ext = ".mp4";
|
|
559
|
+
}
|
|
560
|
+
if (attr instanceof import_telegram.Api.DocumentAttributeFilename) {
|
|
561
|
+
ext = path.extname(attr.fileName) || ext;
|
|
562
|
+
}
|
|
520
563
|
}
|
|
521
564
|
}
|
|
522
565
|
}
|
|
566
|
+
const mediaFileName = `media${ext}`;
|
|
567
|
+
const mediaPath = path.join(postMediaDir, mediaFileName);
|
|
568
|
+
fs.writeFileSync(mediaPath, buffer);
|
|
569
|
+
mediaFiles.push(`media/${paddedId}/${mediaFileName}`);
|
|
523
570
|
}
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
fs.writeFileSync(mediaPath, buffer);
|
|
527
|
-
mediaFiles.push(`media/${paddedId}/${mediaFileName}`);
|
|
571
|
+
} catch (e) {
|
|
572
|
+
console.error(`Error downloading media for message ${msgId}:`, e);
|
|
528
573
|
}
|
|
529
|
-
} catch (e) {
|
|
530
|
-
console.error(`Error downloading media for message ${msgId}:`, e);
|
|
531
574
|
}
|
|
532
575
|
}
|
|
533
|
-
const content = message.message || "";
|
|
576
|
+
const content = entitiesToMarkdown(message.message || "", message.entities);
|
|
534
577
|
const link = channelMeta.username ? `https://t.me/${channelMeta.username}/${msgId}` : "";
|
|
535
578
|
const post = {
|
|
536
579
|
msgId,
|
|
@@ -664,9 +707,26 @@ async function processPost(post, options, exportDir) {
|
|
|
664
707
|
const { title: originalTitle, body: originalBody } = extractTitleAndBody(cleanedContent);
|
|
665
708
|
const date = post.date.toISOString().split("T")[0];
|
|
666
709
|
const languages = [];
|
|
710
|
+
const translationExists = (lang) => {
|
|
711
|
+
const langDir = path2.join(outputDir, lang);
|
|
712
|
+
if (!fs2.existsSync(langDir)) return false;
|
|
713
|
+
try {
|
|
714
|
+
for (const slug of fs2.readdirSync(langDir)) {
|
|
715
|
+
const indexPath = path2.join(langDir, slug, "index.md");
|
|
716
|
+
if (fs2.existsSync(indexPath)) {
|
|
717
|
+
const content = fs2.readFileSync(indexPath, "utf-8");
|
|
718
|
+
if (content.includes(`original_link: "${post.link}"`)) {
|
|
719
|
+
return true;
|
|
720
|
+
}
|
|
721
|
+
}
|
|
722
|
+
}
|
|
723
|
+
} catch {
|
|
724
|
+
}
|
|
725
|
+
return false;
|
|
726
|
+
};
|
|
667
727
|
if (translate && translate.targetLangs.length > 0) {
|
|
668
728
|
const sourceLang = translate.sourceLang || "ru";
|
|
669
|
-
if (translate.keepOriginal) {
|
|
729
|
+
if (translate.keepOriginal && !translationExists(sourceLang)) {
|
|
670
730
|
languages.push({
|
|
671
731
|
lang: sourceLang,
|
|
672
732
|
title: originalTitle,
|
|
@@ -676,6 +736,10 @@ async function processPost(post, options, exportDir) {
|
|
|
676
736
|
}
|
|
677
737
|
for (const targetLang of translate.targetLangs) {
|
|
678
738
|
if (targetLang === sourceLang) continue;
|
|
739
|
+
if (translationExists(targetLang)) {
|
|
740
|
+
onProgress?.(` ${targetLang}: already exists, skipping`);
|
|
741
|
+
continue;
|
|
742
|
+
}
|
|
679
743
|
onProgress?.(` Translating to ${targetLang}...`);
|
|
680
744
|
const translateOpts = {
|
|
681
745
|
apiKey: translate.apiKey,
|
package/dist/index.mjs
CHANGED
|
@@ -317,6 +317,42 @@ import { StringSession } from "telegram/sessions";
|
|
|
317
317
|
import * as fs from "fs";
|
|
318
318
|
import * as path from "path";
|
|
319
319
|
import * as readline from "readline";
|
|
320
|
+
function entitiesToMarkdown(text, entities) {
|
|
321
|
+
if (!entities || entities.length === 0) return text;
|
|
322
|
+
const sorted = [...entities].sort((a, b) => b.offset - a.offset);
|
|
323
|
+
let result = text;
|
|
324
|
+
for (const entity of sorted) {
|
|
325
|
+
const start = entity.offset;
|
|
326
|
+
const end = entity.offset + entity.length;
|
|
327
|
+
const content = result.substring(start, end);
|
|
328
|
+
let replacement = content;
|
|
329
|
+
if (entity instanceof Api.MessageEntityBold) {
|
|
330
|
+
replacement = `**${content}**`;
|
|
331
|
+
} else if (entity instanceof Api.MessageEntityItalic) {
|
|
332
|
+
replacement = `*${content}*`;
|
|
333
|
+
} else if (entity instanceof Api.MessageEntityCode) {
|
|
334
|
+
replacement = `\`${content}\``;
|
|
335
|
+
} else if (entity instanceof Api.MessageEntityPre) {
|
|
336
|
+
replacement = `\`\`\`
|
|
337
|
+
${content}
|
|
338
|
+
\`\`\``;
|
|
339
|
+
} else if (entity instanceof Api.MessageEntityStrike) {
|
|
340
|
+
replacement = `~~${content}~~`;
|
|
341
|
+
} else if (entity instanceof Api.MessageEntityUnderline) {
|
|
342
|
+
replacement = `**${content}**`;
|
|
343
|
+
} else if (entity instanceof Api.MessageEntityTextUrl) {
|
|
344
|
+
replacement = `[${content}](${entity.url})`;
|
|
345
|
+
} else if (entity instanceof Api.MessageEntityUrl) {
|
|
346
|
+
replacement = content;
|
|
347
|
+
} else if (entity instanceof Api.MessageEntityMention) {
|
|
348
|
+
replacement = content;
|
|
349
|
+
} else if (entity instanceof Api.MessageEntityHashtag) {
|
|
350
|
+
replacement = content;
|
|
351
|
+
}
|
|
352
|
+
result = result.substring(0, start) + replacement + result.substring(end);
|
|
353
|
+
}
|
|
354
|
+
return result;
|
|
355
|
+
}
|
|
320
356
|
async function defaultReadline(prompt) {
|
|
321
357
|
const rl = readline.createInterface({
|
|
322
358
|
input: process.stdin,
|
|
@@ -440,40 +476,47 @@ async function exportTelegramChannel(options) {
|
|
|
440
476
|
const postMediaDir = path.join(mediaDir, paddedId);
|
|
441
477
|
const mediaFiles = [];
|
|
442
478
|
if (downloadMedia && message.media) {
|
|
443
|
-
fs.
|
|
444
|
-
|
|
445
|
-
const
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
479
|
+
const existingFiles = fs.existsSync(postMediaDir) ? fs.readdirSync(postMediaDir).filter((f) => f.startsWith("media.")) : [];
|
|
480
|
+
if (existingFiles.length > 0) {
|
|
481
|
+
for (const f of existingFiles) {
|
|
482
|
+
mediaFiles.push(`media/${paddedId}/${f}`);
|
|
483
|
+
}
|
|
484
|
+
} else {
|
|
485
|
+
fs.mkdirSync(postMediaDir, { recursive: true });
|
|
486
|
+
try {
|
|
487
|
+
const buffer = await client.downloadMedia(message.media, {});
|
|
488
|
+
if (buffer) {
|
|
489
|
+
let ext = ".bin";
|
|
490
|
+
if (message.media instanceof Api.MessageMediaPhoto) {
|
|
491
|
+
ext = ".jpg";
|
|
492
|
+
} else if (message.media instanceof Api.MessageMediaDocument) {
|
|
493
|
+
const doc = message.media.document;
|
|
494
|
+
if (doc instanceof Api.Document) {
|
|
495
|
+
const mimeExt = doc.mimeType?.split("/")[1];
|
|
496
|
+
if (mimeExt) {
|
|
497
|
+
ext = "." + mimeExt.replace("jpeg", "jpg");
|
|
460
498
|
}
|
|
461
|
-
|
|
462
|
-
|
|
499
|
+
for (const attr of doc.attributes) {
|
|
500
|
+
if (attr instanceof Api.DocumentAttributeVideo) {
|
|
501
|
+
ext = ".mp4";
|
|
502
|
+
}
|
|
503
|
+
if (attr instanceof Api.DocumentAttributeFilename) {
|
|
504
|
+
ext = path.extname(attr.fileName) || ext;
|
|
505
|
+
}
|
|
463
506
|
}
|
|
464
507
|
}
|
|
465
508
|
}
|
|
509
|
+
const mediaFileName = `media${ext}`;
|
|
510
|
+
const mediaPath = path.join(postMediaDir, mediaFileName);
|
|
511
|
+
fs.writeFileSync(mediaPath, buffer);
|
|
512
|
+
mediaFiles.push(`media/${paddedId}/${mediaFileName}`);
|
|
466
513
|
}
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
fs.writeFileSync(mediaPath, buffer);
|
|
470
|
-
mediaFiles.push(`media/${paddedId}/${mediaFileName}`);
|
|
514
|
+
} catch (e) {
|
|
515
|
+
console.error(`Error downloading media for message ${msgId}:`, e);
|
|
471
516
|
}
|
|
472
|
-
} catch (e) {
|
|
473
|
-
console.error(`Error downloading media for message ${msgId}:`, e);
|
|
474
517
|
}
|
|
475
518
|
}
|
|
476
|
-
const content = message.message || "";
|
|
519
|
+
const content = entitiesToMarkdown(message.message || "", message.entities);
|
|
477
520
|
const link = channelMeta.username ? `https://t.me/${channelMeta.username}/${msgId}` : "";
|
|
478
521
|
const post = {
|
|
479
522
|
msgId,
|
|
@@ -607,9 +650,26 @@ async function processPost(post, options, exportDir) {
|
|
|
607
650
|
const { title: originalTitle, body: originalBody } = extractTitleAndBody(cleanedContent);
|
|
608
651
|
const date = post.date.toISOString().split("T")[0];
|
|
609
652
|
const languages = [];
|
|
653
|
+
const translationExists = (lang) => {
|
|
654
|
+
const langDir = path2.join(outputDir, lang);
|
|
655
|
+
if (!fs2.existsSync(langDir)) return false;
|
|
656
|
+
try {
|
|
657
|
+
for (const slug of fs2.readdirSync(langDir)) {
|
|
658
|
+
const indexPath = path2.join(langDir, slug, "index.md");
|
|
659
|
+
if (fs2.existsSync(indexPath)) {
|
|
660
|
+
const content = fs2.readFileSync(indexPath, "utf-8");
|
|
661
|
+
if (content.includes(`original_link: "${post.link}"`)) {
|
|
662
|
+
return true;
|
|
663
|
+
}
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
} catch {
|
|
667
|
+
}
|
|
668
|
+
return false;
|
|
669
|
+
};
|
|
610
670
|
if (translate && translate.targetLangs.length > 0) {
|
|
611
671
|
const sourceLang = translate.sourceLang || "ru";
|
|
612
|
-
if (translate.keepOriginal) {
|
|
672
|
+
if (translate.keepOriginal && !translationExists(sourceLang)) {
|
|
613
673
|
languages.push({
|
|
614
674
|
lang: sourceLang,
|
|
615
675
|
title: originalTitle,
|
|
@@ -619,6 +679,10 @@ async function processPost(post, options, exportDir) {
|
|
|
619
679
|
}
|
|
620
680
|
for (const targetLang of translate.targetLangs) {
|
|
621
681
|
if (targetLang === sourceLang) continue;
|
|
682
|
+
if (translationExists(targetLang)) {
|
|
683
|
+
onProgress?.(` ${targetLang}: already exists, skipping`);
|
|
684
|
+
continue;
|
|
685
|
+
}
|
|
622
686
|
onProgress?.(` Translating to ${targetLang}...`);
|
|
623
687
|
const translateOpts = {
|
|
624
688
|
apiKey: translate.apiKey,
|