koztv-blog-tools 1.1.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +69 -1
- package/dist/index.d.ts +69 -1
- package/dist/index.js +238 -0
- package/dist/index.mjs +237 -0
- package/package.json +1 -1
package/dist/index.d.mts
CHANGED
|
@@ -216,4 +216,72 @@ declare function resumeExport(options: Omit<TelegramExportOptions, 'onPhoneNumbe
|
|
|
216
216
|
session: string;
|
|
217
217
|
}): Promise<ExportResult>;
|
|
218
218
|
|
|
219
|
-
|
|
219
|
+
/**
|
|
220
|
+
* Combined Telegram export + translation module
|
|
221
|
+
*/
|
|
222
|
+
interface TranslationConfig {
|
|
223
|
+
/** API key for translation service */
|
|
224
|
+
apiKey: string;
|
|
225
|
+
/** API base URL (OpenAI-compatible) */
|
|
226
|
+
apiUrl: string;
|
|
227
|
+
/** Model name */
|
|
228
|
+
model: string;
|
|
229
|
+
/** Source language code (default: 'ru') */
|
|
230
|
+
sourceLang?: string;
|
|
231
|
+
/** Target languages array (e.g., ['en', 'de', 'zh']) */
|
|
232
|
+
targetLangs: string[];
|
|
233
|
+
/** Also keep original language version */
|
|
234
|
+
keepOriginal?: boolean;
|
|
235
|
+
}
|
|
236
|
+
interface ExportAndTranslateOptions {
|
|
237
|
+
/** Telegram API ID */
|
|
238
|
+
apiId: number;
|
|
239
|
+
/** Telegram API Hash */
|
|
240
|
+
apiHash: string;
|
|
241
|
+
/** Session string */
|
|
242
|
+
session?: string;
|
|
243
|
+
/** Target channel */
|
|
244
|
+
channel: string;
|
|
245
|
+
/** Output directory for posts */
|
|
246
|
+
outputDir: string;
|
|
247
|
+
/** Media output directory (default: outputDir/../media) */
|
|
248
|
+
mediaDir?: string;
|
|
249
|
+
/** Export limit */
|
|
250
|
+
limit?: number;
|
|
251
|
+
/** Export posts since date */
|
|
252
|
+
since?: Date;
|
|
253
|
+
/** Download media files */
|
|
254
|
+
downloadMedia?: boolean;
|
|
255
|
+
/** Translation config (optional - if not provided, no translation) */
|
|
256
|
+
translate?: TranslationConfig;
|
|
257
|
+
/** Progress callback */
|
|
258
|
+
onProgress?: (message: string) => void;
|
|
259
|
+
/** Session save callback */
|
|
260
|
+
onSession?: (session: string) => void;
|
|
261
|
+
/** Log of already processed posts (to skip) */
|
|
262
|
+
processedLog?: Record<string, any>;
|
|
263
|
+
/** Callback to save processed log */
|
|
264
|
+
onProcessedLog?: (log: Record<string, any>) => void;
|
|
265
|
+
}
|
|
266
|
+
interface ExportAndTranslateResult {
|
|
267
|
+
/** Number of posts exported */
|
|
268
|
+
exported: number;
|
|
269
|
+
/** Number of posts processed (translated/saved) */
|
|
270
|
+
processed: number;
|
|
271
|
+
/** Number of posts skipped */
|
|
272
|
+
skipped: number;
|
|
273
|
+
/** Channel metadata */
|
|
274
|
+
channelMeta: {
|
|
275
|
+
id: number;
|
|
276
|
+
username: string;
|
|
277
|
+
title: string;
|
|
278
|
+
};
|
|
279
|
+
/** Session string for future use */
|
|
280
|
+
session: string;
|
|
281
|
+
}
|
|
282
|
+
/**
|
|
283
|
+
* Export posts from Telegram channel with optional translation
|
|
284
|
+
*/
|
|
285
|
+
declare function exportAndTranslate(options: ExportAndTranslateOptions): Promise<ExportAndTranslateResult>;
|
|
286
|
+
|
|
287
|
+
export { type AnalyticsConfig, type ExportAndTranslateOptions, type ExportAndTranslateResult, type ExportResult, type ExportedPost, type GoalName, type GoalParams, type GroupedPost, type ParsePostOptions, type Post, type TelegramExportOptions, type TranslateOptions, type TranslationConfig, categorizePost, cleanContent, configureAnalytics, deduplicatePosts, exportAndTranslate, exportTelegramChannel, extractAttachments, extractExcerpt, extractTitle, formatPostMarkdown, generateEnglishSlug, generateSlug, groupPosts, parsePost, resumeExport, trackBookAppointment, trackGoal, trackLearnMore, trackServiceClick, trackTelegramClick, translateContent, translateTitle };
|
package/dist/index.d.ts
CHANGED
|
@@ -216,4 +216,72 @@ declare function resumeExport(options: Omit<TelegramExportOptions, 'onPhoneNumbe
|
|
|
216
216
|
session: string;
|
|
217
217
|
}): Promise<ExportResult>;
|
|
218
218
|
|
|
219
|
-
|
|
219
|
+
/**
|
|
220
|
+
* Combined Telegram export + translation module
|
|
221
|
+
*/
|
|
222
|
+
interface TranslationConfig {
|
|
223
|
+
/** API key for translation service */
|
|
224
|
+
apiKey: string;
|
|
225
|
+
/** API base URL (OpenAI-compatible) */
|
|
226
|
+
apiUrl: string;
|
|
227
|
+
/** Model name */
|
|
228
|
+
model: string;
|
|
229
|
+
/** Source language code (default: 'ru') */
|
|
230
|
+
sourceLang?: string;
|
|
231
|
+
/** Target languages array (e.g., ['en', 'de', 'zh']) */
|
|
232
|
+
targetLangs: string[];
|
|
233
|
+
/** Also keep original language version */
|
|
234
|
+
keepOriginal?: boolean;
|
|
235
|
+
}
|
|
236
|
+
interface ExportAndTranslateOptions {
|
|
237
|
+
/** Telegram API ID */
|
|
238
|
+
apiId: number;
|
|
239
|
+
/** Telegram API Hash */
|
|
240
|
+
apiHash: string;
|
|
241
|
+
/** Session string */
|
|
242
|
+
session?: string;
|
|
243
|
+
/** Target channel */
|
|
244
|
+
channel: string;
|
|
245
|
+
/** Output directory for posts */
|
|
246
|
+
outputDir: string;
|
|
247
|
+
/** Media output directory (default: outputDir/../media) */
|
|
248
|
+
mediaDir?: string;
|
|
249
|
+
/** Export limit */
|
|
250
|
+
limit?: number;
|
|
251
|
+
/** Export posts since date */
|
|
252
|
+
since?: Date;
|
|
253
|
+
/** Download media files */
|
|
254
|
+
downloadMedia?: boolean;
|
|
255
|
+
/** Translation config (optional - if not provided, no translation) */
|
|
256
|
+
translate?: TranslationConfig;
|
|
257
|
+
/** Progress callback */
|
|
258
|
+
onProgress?: (message: string) => void;
|
|
259
|
+
/** Session save callback */
|
|
260
|
+
onSession?: (session: string) => void;
|
|
261
|
+
/** Log of already processed posts (to skip) */
|
|
262
|
+
processedLog?: Record<string, any>;
|
|
263
|
+
/** Callback to save processed log */
|
|
264
|
+
onProcessedLog?: (log: Record<string, any>) => void;
|
|
265
|
+
}
|
|
266
|
+
interface ExportAndTranslateResult {
|
|
267
|
+
/** Number of posts exported */
|
|
268
|
+
exported: number;
|
|
269
|
+
/** Number of posts processed (translated/saved) */
|
|
270
|
+
processed: number;
|
|
271
|
+
/** Number of posts skipped */
|
|
272
|
+
skipped: number;
|
|
273
|
+
/** Channel metadata */
|
|
274
|
+
channelMeta: {
|
|
275
|
+
id: number;
|
|
276
|
+
username: string;
|
|
277
|
+
title: string;
|
|
278
|
+
};
|
|
279
|
+
/** Session string for future use */
|
|
280
|
+
session: string;
|
|
281
|
+
}
|
|
282
|
+
/**
|
|
283
|
+
* Export posts from Telegram channel with optional translation
|
|
284
|
+
*/
|
|
285
|
+
declare function exportAndTranslate(options: ExportAndTranslateOptions): Promise<ExportAndTranslateResult>;
|
|
286
|
+
|
|
287
|
+
export { type AnalyticsConfig, type ExportAndTranslateOptions, type ExportAndTranslateResult, type ExportResult, type ExportedPost, type GoalName, type GoalParams, type GroupedPost, type ParsePostOptions, type Post, type TelegramExportOptions, type TranslateOptions, type TranslationConfig, categorizePost, cleanContent, configureAnalytics, deduplicatePosts, exportAndTranslate, exportTelegramChannel, extractAttachments, extractExcerpt, extractTitle, formatPostMarkdown, generateEnglishSlug, generateSlug, groupPosts, parsePost, resumeExport, trackBookAppointment, trackGoal, trackLearnMore, trackServiceClick, trackTelegramClick, translateContent, translateTitle };
|
package/dist/index.js
CHANGED
|
@@ -34,6 +34,7 @@ __export(index_exports, {
|
|
|
34
34
|
cleanContent: () => cleanContent,
|
|
35
35
|
configureAnalytics: () => configureAnalytics,
|
|
36
36
|
deduplicatePosts: () => deduplicatePosts,
|
|
37
|
+
exportAndTranslate: () => exportAndTranslate,
|
|
37
38
|
exportTelegramChannel: () => exportTelegramChannel,
|
|
38
39
|
extractAttachments: () => extractAttachments,
|
|
39
40
|
extractExcerpt: () => extractExcerpt,
|
|
@@ -586,12 +587,249 @@ async function resumeExport(options) {
|
|
|
586
587
|
}
|
|
587
588
|
return exportTelegramChannel(options);
|
|
588
589
|
}
|
|
590
|
+
|
|
591
|
+
// src/exporter.ts
|
|
592
|
+
var fs2 = __toESM(require("fs"));
|
|
593
|
+
var path2 = __toESM(require("path"));
|
|
594
|
+
var translitMap2 = {
|
|
595
|
+
"\u0430": "a",
|
|
596
|
+
"\u0431": "b",
|
|
597
|
+
"\u0432": "v",
|
|
598
|
+
"\u0433": "g",
|
|
599
|
+
"\u0434": "d",
|
|
600
|
+
"\u0435": "e",
|
|
601
|
+
"\u0451": "yo",
|
|
602
|
+
"\u0436": "zh",
|
|
603
|
+
"\u0437": "z",
|
|
604
|
+
"\u0438": "i",
|
|
605
|
+
"\u0439": "y",
|
|
606
|
+
"\u043A": "k",
|
|
607
|
+
"\u043B": "l",
|
|
608
|
+
"\u043C": "m",
|
|
609
|
+
"\u043D": "n",
|
|
610
|
+
"\u043E": "o",
|
|
611
|
+
"\u043F": "p",
|
|
612
|
+
"\u0440": "r",
|
|
613
|
+
"\u0441": "s",
|
|
614
|
+
"\u0442": "t",
|
|
615
|
+
"\u0443": "u",
|
|
616
|
+
"\u0444": "f",
|
|
617
|
+
"\u0445": "h",
|
|
618
|
+
"\u0446": "ts",
|
|
619
|
+
"\u0447": "ch",
|
|
620
|
+
"\u0448": "sh",
|
|
621
|
+
"\u0449": "sch",
|
|
622
|
+
"\u044A": "",
|
|
623
|
+
"\u044B": "y",
|
|
624
|
+
"\u044C": "",
|
|
625
|
+
"\u044D": "e",
|
|
626
|
+
"\u044E": "yu",
|
|
627
|
+
"\u044F": "ya"
|
|
628
|
+
};
|
|
629
|
+
function transliterate(text) {
|
|
630
|
+
return text.toLowerCase().split("").map((char) => translitMap2[char] || char).join("");
|
|
631
|
+
}
|
|
632
|
+
function generateSlug2(text, lang) {
|
|
633
|
+
let processed = text;
|
|
634
|
+
if (/[а-яё]/i.test(text)) {
|
|
635
|
+
processed = transliterate(text);
|
|
636
|
+
}
|
|
637
|
+
return processed.toLowerCase().replace(/[^a-z0-9\s-]/g, "").replace(/\s+/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "").substring(0, 60) || "untitled";
|
|
638
|
+
}
|
|
639
|
+
function cleanContent2(text) {
|
|
640
|
+
return text.replace(/##\s*Attachments\n(?:- [^\n]+\n?)*/g, "").replace(/- media\/\d+\/[^\n]+/g, "").replace(/#\w+@\w+/g, "").trim();
|
|
641
|
+
}
|
|
642
|
+
function extractTitleAndBody(content) {
|
|
643
|
+
const lines = content.split("\n").filter((l) => l.trim());
|
|
644
|
+
const title = lines[0]?.replace(/[#@[\]*]/g, "").trim() || "Untitled";
|
|
645
|
+
const body = lines.slice(1).join("\n").trim();
|
|
646
|
+
return { title, body };
|
|
647
|
+
}
|
|
648
|
+
async function processPost(post, options, exportDir) {
|
|
649
|
+
const { translate, outputDir, mediaDir, onProgress } = options;
|
|
650
|
+
const createdPosts = [];
|
|
651
|
+
const cleanedContent = cleanContent2(post.content || "");
|
|
652
|
+
const { title: originalTitle, body: originalBody } = extractTitleAndBody(cleanedContent);
|
|
653
|
+
const date = post.date.toISOString().split("T")[0];
|
|
654
|
+
const languages = [];
|
|
655
|
+
if (translate && translate.targetLangs.length > 0) {
|
|
656
|
+
const sourceLang = translate.sourceLang || "ru";
|
|
657
|
+
if (translate.keepOriginal) {
|
|
658
|
+
languages.push({
|
|
659
|
+
lang: sourceLang,
|
|
660
|
+
title: originalTitle,
|
|
661
|
+
body: originalBody,
|
|
662
|
+
isOriginal: true
|
|
663
|
+
});
|
|
664
|
+
}
|
|
665
|
+
for (const targetLang of translate.targetLangs) {
|
|
666
|
+
if (targetLang === sourceLang) continue;
|
|
667
|
+
onProgress?.(` Translating to ${targetLang}...`);
|
|
668
|
+
const translateOpts = {
|
|
669
|
+
apiKey: translate.apiKey,
|
|
670
|
+
apiUrl: translate.apiUrl,
|
|
671
|
+
model: translate.model,
|
|
672
|
+
sourceLang,
|
|
673
|
+
targetLang
|
|
674
|
+
};
|
|
675
|
+
try {
|
|
676
|
+
const translatedTitle = await translateTitle(originalTitle, translateOpts);
|
|
677
|
+
const translatedBody = originalBody ? await translateContent(originalBody, translateOpts) : "";
|
|
678
|
+
languages.push({
|
|
679
|
+
lang: targetLang,
|
|
680
|
+
title: translatedTitle,
|
|
681
|
+
body: translatedBody,
|
|
682
|
+
isOriginal: false
|
|
683
|
+
});
|
|
684
|
+
onProgress?.(` \u2192 ${targetLang}: "${translatedTitle.substring(0, 40)}..."`);
|
|
685
|
+
} catch (error) {
|
|
686
|
+
onProgress?.(` Error translating to ${targetLang}: ${error.message}`);
|
|
687
|
+
}
|
|
688
|
+
await new Promise((r) => setTimeout(r, 300));
|
|
689
|
+
}
|
|
690
|
+
} else {
|
|
691
|
+
languages.push({
|
|
692
|
+
lang: "original",
|
|
693
|
+
title: originalTitle,
|
|
694
|
+
body: originalBody,
|
|
695
|
+
isOriginal: true
|
|
696
|
+
});
|
|
697
|
+
}
|
|
698
|
+
for (const { lang, title, body, isOriginal } of languages) {
|
|
699
|
+
const slug = generateSlug2(title, lang);
|
|
700
|
+
let postDir;
|
|
701
|
+
if (languages.length > 1 || translate && translate.targetLangs.length > 0) {
|
|
702
|
+
postDir = path2.join(outputDir, lang, slug);
|
|
703
|
+
} else {
|
|
704
|
+
postDir = path2.join(outputDir, slug);
|
|
705
|
+
}
|
|
706
|
+
if (fs2.existsSync(postDir)) {
|
|
707
|
+
onProgress?.(` Skipping existing: ${lang}/${slug}`);
|
|
708
|
+
continue;
|
|
709
|
+
}
|
|
710
|
+
fs2.mkdirSync(postDir, { recursive: true });
|
|
711
|
+
let finalBody = body;
|
|
712
|
+
const mediaFiles = [];
|
|
713
|
+
if (post.mediaFiles && post.mediaFiles.length > 0) {
|
|
714
|
+
const paddedId = String(post.msgId).padStart(6, "0");
|
|
715
|
+
const targetMediaDir = mediaDir ? path2.join(mediaDir, paddedId) : path2.join(outputDir, "..", "media", paddedId);
|
|
716
|
+
fs2.mkdirSync(targetMediaDir, { recursive: true });
|
|
717
|
+
for (let i = 0; i < post.mediaFiles.length; i++) {
|
|
718
|
+
const sourcePath = path2.join(exportDir, post.mediaFiles[i]);
|
|
719
|
+
const ext = path2.extname(sourcePath).toLowerCase() || ".jpg";
|
|
720
|
+
const newName = `image${i + 1}${ext}`;
|
|
721
|
+
const targetPath = path2.join(targetMediaDir, newName);
|
|
722
|
+
if (fs2.existsSync(sourcePath) && !fs2.existsSync(targetPath)) {
|
|
723
|
+
fs2.copyFileSync(sourcePath, targetPath);
|
|
724
|
+
mediaFiles.push(newName);
|
|
725
|
+
}
|
|
726
|
+
}
|
|
727
|
+
if (mediaFiles.length > 0) {
|
|
728
|
+
const imageMarkdown = mediaFiles.filter((f) => !f.match(/\.(mp4|mov)$/i)).map((f) => ``).join("\n\n");
|
|
729
|
+
if (imageMarkdown) {
|
|
730
|
+
finalBody = finalBody + "\n\n" + imageMarkdown;
|
|
731
|
+
}
|
|
732
|
+
}
|
|
733
|
+
}
|
|
734
|
+
const markdown = `---
|
|
735
|
+
title: "${title.replace(/"/g, '\\"')}"
|
|
736
|
+
date: ${date}
|
|
737
|
+
lang: ${lang}
|
|
738
|
+
original_link: "${post.link || ""}"
|
|
739
|
+
${isOriginal ? "" : `translated_from: "${translate?.sourceLang || "ru"}"
|
|
740
|
+
`}---
|
|
741
|
+
|
|
742
|
+
${finalBody}
|
|
743
|
+
`;
|
|
744
|
+
fs2.writeFileSync(path2.join(postDir, "index.md"), markdown);
|
|
745
|
+
createdPosts.push(`${lang}/${slug}`);
|
|
746
|
+
onProgress?.(` Created: ${lang}/${slug}`);
|
|
747
|
+
}
|
|
748
|
+
return createdPosts;
|
|
749
|
+
}
|
|
750
|
+
async function exportAndTranslate(options) {
|
|
751
|
+
const {
|
|
752
|
+
apiId,
|
|
753
|
+
apiHash,
|
|
754
|
+
session,
|
|
755
|
+
channel,
|
|
756
|
+
outputDir,
|
|
757
|
+
limit,
|
|
758
|
+
since,
|
|
759
|
+
downloadMedia = true,
|
|
760
|
+
onProgress,
|
|
761
|
+
onSession,
|
|
762
|
+
processedLog = {},
|
|
763
|
+
onProcessedLog
|
|
764
|
+
} = options;
|
|
765
|
+
const exportDir = path2.join(outputDir, "..", ".telegram-export");
|
|
766
|
+
fs2.mkdirSync(exportDir, { recursive: true });
|
|
767
|
+
fs2.mkdirSync(outputDir, { recursive: true });
|
|
768
|
+
onProgress?.("Step 1: Exporting from Telegram...");
|
|
769
|
+
const exportResult = await exportTelegramChannel({
|
|
770
|
+
apiId,
|
|
771
|
+
apiHash,
|
|
772
|
+
session,
|
|
773
|
+
target: channel,
|
|
774
|
+
outputDir: exportDir,
|
|
775
|
+
limit,
|
|
776
|
+
since,
|
|
777
|
+
downloadMedia,
|
|
778
|
+
onProgress: (current, total, msg) => {
|
|
779
|
+
onProgress?.(` ${msg} (${current}/${total})`);
|
|
780
|
+
},
|
|
781
|
+
onSession
|
|
782
|
+
});
|
|
783
|
+
onProgress?.(` Exported ${exportResult.posts.length} posts from "${exportResult.channelMeta.title}"`);
|
|
784
|
+
onProgress?.("\nStep 2: Processing posts...");
|
|
785
|
+
let processed = 0;
|
|
786
|
+
let skipped = 0;
|
|
787
|
+
for (const post of exportResult.posts) {
|
|
788
|
+
const postId = `${post.channelUsername}-${post.msgId}`;
|
|
789
|
+
if (processedLog[postId]) {
|
|
790
|
+
skipped++;
|
|
791
|
+
continue;
|
|
792
|
+
}
|
|
793
|
+
if (!post.content && !post.hasMedia) {
|
|
794
|
+
skipped++;
|
|
795
|
+
continue;
|
|
796
|
+
}
|
|
797
|
+
onProgress?.(`
|
|
798
|
+
Processing: ${postId}`);
|
|
799
|
+
try {
|
|
800
|
+
const created = await processPost(post, options, exportDir);
|
|
801
|
+
if (created.length > 0) {
|
|
802
|
+
processed++;
|
|
803
|
+
processedLog[postId] = {
|
|
804
|
+
posts: created,
|
|
805
|
+
date: post.date.toISOString(),
|
|
806
|
+
processedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
807
|
+
};
|
|
808
|
+
onProcessedLog?.(processedLog);
|
|
809
|
+
} else {
|
|
810
|
+
skipped++;
|
|
811
|
+
}
|
|
812
|
+
} catch (error) {
|
|
813
|
+
onProgress?.(` Error: ${error.message}`);
|
|
814
|
+
skipped++;
|
|
815
|
+
}
|
|
816
|
+
await new Promise((r) => setTimeout(r, 200));
|
|
817
|
+
}
|
|
818
|
+
return {
|
|
819
|
+
exported: exportResult.posts.length,
|
|
820
|
+
processed,
|
|
821
|
+
skipped,
|
|
822
|
+
channelMeta: exportResult.channelMeta,
|
|
823
|
+
session: exportResult.session
|
|
824
|
+
};
|
|
825
|
+
}
|
|
589
826
|
// Annotate the CommonJS export names for ESM import in node:
|
|
590
827
|
0 && (module.exports = {
|
|
591
828
|
categorizePost,
|
|
592
829
|
cleanContent,
|
|
593
830
|
configureAnalytics,
|
|
594
831
|
deduplicatePosts,
|
|
832
|
+
exportAndTranslate,
|
|
595
833
|
exportTelegramChannel,
|
|
596
834
|
extractAttachments,
|
|
597
835
|
extractExcerpt,
|
package/dist/index.mjs
CHANGED
|
@@ -530,11 +530,248 @@ async function resumeExport(options) {
|
|
|
530
530
|
}
|
|
531
531
|
return exportTelegramChannel(options);
|
|
532
532
|
}
|
|
533
|
+
|
|
534
|
+
// src/exporter.ts
|
|
535
|
+
import * as fs2 from "fs";
|
|
536
|
+
import * as path2 from "path";
|
|
537
|
+
var translitMap2 = {
|
|
538
|
+
"\u0430": "a",
|
|
539
|
+
"\u0431": "b",
|
|
540
|
+
"\u0432": "v",
|
|
541
|
+
"\u0433": "g",
|
|
542
|
+
"\u0434": "d",
|
|
543
|
+
"\u0435": "e",
|
|
544
|
+
"\u0451": "yo",
|
|
545
|
+
"\u0436": "zh",
|
|
546
|
+
"\u0437": "z",
|
|
547
|
+
"\u0438": "i",
|
|
548
|
+
"\u0439": "y",
|
|
549
|
+
"\u043A": "k",
|
|
550
|
+
"\u043B": "l",
|
|
551
|
+
"\u043C": "m",
|
|
552
|
+
"\u043D": "n",
|
|
553
|
+
"\u043E": "o",
|
|
554
|
+
"\u043F": "p",
|
|
555
|
+
"\u0440": "r",
|
|
556
|
+
"\u0441": "s",
|
|
557
|
+
"\u0442": "t",
|
|
558
|
+
"\u0443": "u",
|
|
559
|
+
"\u0444": "f",
|
|
560
|
+
"\u0445": "h",
|
|
561
|
+
"\u0446": "ts",
|
|
562
|
+
"\u0447": "ch",
|
|
563
|
+
"\u0448": "sh",
|
|
564
|
+
"\u0449": "sch",
|
|
565
|
+
"\u044A": "",
|
|
566
|
+
"\u044B": "y",
|
|
567
|
+
"\u044C": "",
|
|
568
|
+
"\u044D": "e",
|
|
569
|
+
"\u044E": "yu",
|
|
570
|
+
"\u044F": "ya"
|
|
571
|
+
};
|
|
572
|
+
function transliterate(text) {
|
|
573
|
+
return text.toLowerCase().split("").map((char) => translitMap2[char] || char).join("");
|
|
574
|
+
}
|
|
575
|
+
function generateSlug2(text, lang) {
|
|
576
|
+
let processed = text;
|
|
577
|
+
if (/[а-яё]/i.test(text)) {
|
|
578
|
+
processed = transliterate(text);
|
|
579
|
+
}
|
|
580
|
+
return processed.toLowerCase().replace(/[^a-z0-9\s-]/g, "").replace(/\s+/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "").substring(0, 60) || "untitled";
|
|
581
|
+
}
|
|
582
|
+
function cleanContent2(text) {
|
|
583
|
+
return text.replace(/##\s*Attachments\n(?:- [^\n]+\n?)*/g, "").replace(/- media\/\d+\/[^\n]+/g, "").replace(/#\w+@\w+/g, "").trim();
|
|
584
|
+
}
|
|
585
|
+
function extractTitleAndBody(content) {
|
|
586
|
+
const lines = content.split("\n").filter((l) => l.trim());
|
|
587
|
+
const title = lines[0]?.replace(/[#@[\]*]/g, "").trim() || "Untitled";
|
|
588
|
+
const body = lines.slice(1).join("\n").trim();
|
|
589
|
+
return { title, body };
|
|
590
|
+
}
|
|
591
|
+
async function processPost(post, options, exportDir) {
|
|
592
|
+
const { translate, outputDir, mediaDir, onProgress } = options;
|
|
593
|
+
const createdPosts = [];
|
|
594
|
+
const cleanedContent = cleanContent2(post.content || "");
|
|
595
|
+
const { title: originalTitle, body: originalBody } = extractTitleAndBody(cleanedContent);
|
|
596
|
+
const date = post.date.toISOString().split("T")[0];
|
|
597
|
+
const languages = [];
|
|
598
|
+
if (translate && translate.targetLangs.length > 0) {
|
|
599
|
+
const sourceLang = translate.sourceLang || "ru";
|
|
600
|
+
if (translate.keepOriginal) {
|
|
601
|
+
languages.push({
|
|
602
|
+
lang: sourceLang,
|
|
603
|
+
title: originalTitle,
|
|
604
|
+
body: originalBody,
|
|
605
|
+
isOriginal: true
|
|
606
|
+
});
|
|
607
|
+
}
|
|
608
|
+
for (const targetLang of translate.targetLangs) {
|
|
609
|
+
if (targetLang === sourceLang) continue;
|
|
610
|
+
onProgress?.(` Translating to ${targetLang}...`);
|
|
611
|
+
const translateOpts = {
|
|
612
|
+
apiKey: translate.apiKey,
|
|
613
|
+
apiUrl: translate.apiUrl,
|
|
614
|
+
model: translate.model,
|
|
615
|
+
sourceLang,
|
|
616
|
+
targetLang
|
|
617
|
+
};
|
|
618
|
+
try {
|
|
619
|
+
const translatedTitle = await translateTitle(originalTitle, translateOpts);
|
|
620
|
+
const translatedBody = originalBody ? await translateContent(originalBody, translateOpts) : "";
|
|
621
|
+
languages.push({
|
|
622
|
+
lang: targetLang,
|
|
623
|
+
title: translatedTitle,
|
|
624
|
+
body: translatedBody,
|
|
625
|
+
isOriginal: false
|
|
626
|
+
});
|
|
627
|
+
onProgress?.(` \u2192 ${targetLang}: "${translatedTitle.substring(0, 40)}..."`);
|
|
628
|
+
} catch (error) {
|
|
629
|
+
onProgress?.(` Error translating to ${targetLang}: ${error.message}`);
|
|
630
|
+
}
|
|
631
|
+
await new Promise((r) => setTimeout(r, 300));
|
|
632
|
+
}
|
|
633
|
+
} else {
|
|
634
|
+
languages.push({
|
|
635
|
+
lang: "original",
|
|
636
|
+
title: originalTitle,
|
|
637
|
+
body: originalBody,
|
|
638
|
+
isOriginal: true
|
|
639
|
+
});
|
|
640
|
+
}
|
|
641
|
+
for (const { lang, title, body, isOriginal } of languages) {
|
|
642
|
+
const slug = generateSlug2(title, lang);
|
|
643
|
+
let postDir;
|
|
644
|
+
if (languages.length > 1 || translate && translate.targetLangs.length > 0) {
|
|
645
|
+
postDir = path2.join(outputDir, lang, slug);
|
|
646
|
+
} else {
|
|
647
|
+
postDir = path2.join(outputDir, slug);
|
|
648
|
+
}
|
|
649
|
+
if (fs2.existsSync(postDir)) {
|
|
650
|
+
onProgress?.(` Skipping existing: ${lang}/${slug}`);
|
|
651
|
+
continue;
|
|
652
|
+
}
|
|
653
|
+
fs2.mkdirSync(postDir, { recursive: true });
|
|
654
|
+
let finalBody = body;
|
|
655
|
+
const mediaFiles = [];
|
|
656
|
+
if (post.mediaFiles && post.mediaFiles.length > 0) {
|
|
657
|
+
const paddedId = String(post.msgId).padStart(6, "0");
|
|
658
|
+
const targetMediaDir = mediaDir ? path2.join(mediaDir, paddedId) : path2.join(outputDir, "..", "media", paddedId);
|
|
659
|
+
fs2.mkdirSync(targetMediaDir, { recursive: true });
|
|
660
|
+
for (let i = 0; i < post.mediaFiles.length; i++) {
|
|
661
|
+
const sourcePath = path2.join(exportDir, post.mediaFiles[i]);
|
|
662
|
+
const ext = path2.extname(sourcePath).toLowerCase() || ".jpg";
|
|
663
|
+
const newName = `image${i + 1}${ext}`;
|
|
664
|
+
const targetPath = path2.join(targetMediaDir, newName);
|
|
665
|
+
if (fs2.existsSync(sourcePath) && !fs2.existsSync(targetPath)) {
|
|
666
|
+
fs2.copyFileSync(sourcePath, targetPath);
|
|
667
|
+
mediaFiles.push(newName);
|
|
668
|
+
}
|
|
669
|
+
}
|
|
670
|
+
if (mediaFiles.length > 0) {
|
|
671
|
+
const imageMarkdown = mediaFiles.filter((f) => !f.match(/\.(mp4|mov)$/i)).map((f) => ``).join("\n\n");
|
|
672
|
+
if (imageMarkdown) {
|
|
673
|
+
finalBody = finalBody + "\n\n" + imageMarkdown;
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
}
|
|
677
|
+
const markdown = `---
|
|
678
|
+
title: "${title.replace(/"/g, '\\"')}"
|
|
679
|
+
date: ${date}
|
|
680
|
+
lang: ${lang}
|
|
681
|
+
original_link: "${post.link || ""}"
|
|
682
|
+
${isOriginal ? "" : `translated_from: "${translate?.sourceLang || "ru"}"
|
|
683
|
+
`}---
|
|
684
|
+
|
|
685
|
+
${finalBody}
|
|
686
|
+
`;
|
|
687
|
+
fs2.writeFileSync(path2.join(postDir, "index.md"), markdown);
|
|
688
|
+
createdPosts.push(`${lang}/${slug}`);
|
|
689
|
+
onProgress?.(` Created: ${lang}/${slug}`);
|
|
690
|
+
}
|
|
691
|
+
return createdPosts;
|
|
692
|
+
}
|
|
693
|
+
async function exportAndTranslate(options) {
|
|
694
|
+
const {
|
|
695
|
+
apiId,
|
|
696
|
+
apiHash,
|
|
697
|
+
session,
|
|
698
|
+
channel,
|
|
699
|
+
outputDir,
|
|
700
|
+
limit,
|
|
701
|
+
since,
|
|
702
|
+
downloadMedia = true,
|
|
703
|
+
onProgress,
|
|
704
|
+
onSession,
|
|
705
|
+
processedLog = {},
|
|
706
|
+
onProcessedLog
|
|
707
|
+
} = options;
|
|
708
|
+
const exportDir = path2.join(outputDir, "..", ".telegram-export");
|
|
709
|
+
fs2.mkdirSync(exportDir, { recursive: true });
|
|
710
|
+
fs2.mkdirSync(outputDir, { recursive: true });
|
|
711
|
+
onProgress?.("Step 1: Exporting from Telegram...");
|
|
712
|
+
const exportResult = await exportTelegramChannel({
|
|
713
|
+
apiId,
|
|
714
|
+
apiHash,
|
|
715
|
+
session,
|
|
716
|
+
target: channel,
|
|
717
|
+
outputDir: exportDir,
|
|
718
|
+
limit,
|
|
719
|
+
since,
|
|
720
|
+
downloadMedia,
|
|
721
|
+
onProgress: (current, total, msg) => {
|
|
722
|
+
onProgress?.(` ${msg} (${current}/${total})`);
|
|
723
|
+
},
|
|
724
|
+
onSession
|
|
725
|
+
});
|
|
726
|
+
onProgress?.(` Exported ${exportResult.posts.length} posts from "${exportResult.channelMeta.title}"`);
|
|
727
|
+
onProgress?.("\nStep 2: Processing posts...");
|
|
728
|
+
let processed = 0;
|
|
729
|
+
let skipped = 0;
|
|
730
|
+
for (const post of exportResult.posts) {
|
|
731
|
+
const postId = `${post.channelUsername}-${post.msgId}`;
|
|
732
|
+
if (processedLog[postId]) {
|
|
733
|
+
skipped++;
|
|
734
|
+
continue;
|
|
735
|
+
}
|
|
736
|
+
if (!post.content && !post.hasMedia) {
|
|
737
|
+
skipped++;
|
|
738
|
+
continue;
|
|
739
|
+
}
|
|
740
|
+
onProgress?.(`
|
|
741
|
+
Processing: ${postId}`);
|
|
742
|
+
try {
|
|
743
|
+
const created = await processPost(post, options, exportDir);
|
|
744
|
+
if (created.length > 0) {
|
|
745
|
+
processed++;
|
|
746
|
+
processedLog[postId] = {
|
|
747
|
+
posts: created,
|
|
748
|
+
date: post.date.toISOString(),
|
|
749
|
+
processedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
750
|
+
};
|
|
751
|
+
onProcessedLog?.(processedLog);
|
|
752
|
+
} else {
|
|
753
|
+
skipped++;
|
|
754
|
+
}
|
|
755
|
+
} catch (error) {
|
|
756
|
+
onProgress?.(` Error: ${error.message}`);
|
|
757
|
+
skipped++;
|
|
758
|
+
}
|
|
759
|
+
await new Promise((r) => setTimeout(r, 200));
|
|
760
|
+
}
|
|
761
|
+
return {
|
|
762
|
+
exported: exportResult.posts.length,
|
|
763
|
+
processed,
|
|
764
|
+
skipped,
|
|
765
|
+
channelMeta: exportResult.channelMeta,
|
|
766
|
+
session: exportResult.session
|
|
767
|
+
};
|
|
768
|
+
}
|
|
533
769
|
export {
|
|
534
770
|
categorizePost,
|
|
535
771
|
cleanContent,
|
|
536
772
|
configureAnalytics,
|
|
537
773
|
deduplicatePosts,
|
|
774
|
+
exportAndTranslate,
|
|
538
775
|
exportTelegramChannel,
|
|
539
776
|
extractAttachments,
|
|
540
777
|
extractExcerpt,
|