koztv-blog-tools 1.1.1 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,215 @@
1
+ # koztv-blog-tools
2
+
3
+ Shared utilities for Telegram-based blog sites. Export posts from Telegram channels, translate with LLM APIs, and generate markdown for static site generators.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ npm install koztv-blog-tools
9
+ ```
10
+
11
+ ## Features
12
+
13
+ - **Telegram Export** — Export full channel history via MTProto (gramjs)
14
+ - **Translation** — Translate posts using OpenAI-compatible APIs (GLM, OpenAI, etc.)
15
+ - **Multi-language** — Export to multiple languages simultaneously
16
+ - **Markdown Generation** — Create markdown files with YAML frontmatter
17
+ - **Media Download** — Download photos, videos, and documents
18
+ - **Incremental Export** — Track processed posts, skip duplicates
19
+
20
+ ## Quick Start
21
+
22
+ ### Export + Translate
23
+
24
+ ```javascript
25
+ const { exportAndTranslate } = require('koztv-blog-tools');
26
+
27
+ const result = await exportAndTranslate({
28
+ // Telegram credentials (from https://my.telegram.org)
29
+ apiId: 12345678,
30
+ apiHash: 'your_api_hash',
31
+ session: 'saved_session_string', // from previous auth
32
+
33
+ // Target channel
34
+ channel: '@channelname',
35
+
36
+ // Output
37
+ outputDir: './content/posts',
38
+ mediaDir: './public/media',
39
+
40
+ // Translation (optional)
41
+ translate: {
42
+ apiKey: 'your_llm_api_key',
43
+ apiUrl: 'https://api.openai.com/v1', // or GLM, etc.
44
+ model: 'gpt-4',
45
+ sourceLang: 'ru',
46
+ targetLangs: ['en', 'de'], // translate to multiple languages
47
+ keepOriginal: true, // also save original language
48
+ },
49
+
50
+ // Callbacks
51
+ onProgress: (msg) => console.log(msg),
52
+ onSession: (s) => saveSession(s), // save for future use
53
+ });
54
+
55
+ console.log(`Exported: ${result.exported}, Processed: ${result.processed}`);
56
+ ```
57
+
58
+ ### Export Only (no translation)
59
+
60
+ ```javascript
61
+ const { exportTelegramChannel } = require('koztv-blog-tools');
62
+
63
+ const result = await exportTelegramChannel({
64
+ apiId: 12345678,
65
+ apiHash: 'your_api_hash',
66
+ session: 'saved_session_string',
67
+ target: '@channelname',
68
+ outputDir: './export',
69
+ downloadMedia: true,
70
+ limit: 100, // optional: limit number of posts
71
+ since: new Date('2024-01-01'), // optional: filter by date
72
+ });
73
+ ```
74
+
75
+ ### Translate Text
76
+
77
+ ```javascript
78
+ const { translateContent, translateTitle } = require('koztv-blog-tools');
79
+
80
+ const translated = await translateContent('Привет мир', {
81
+ apiKey: 'your_api_key',
82
+ apiUrl: 'https://api.openai.com/v1',
83
+ model: 'gpt-4',
84
+ sourceLang: 'ru',
85
+ targetLang: 'en',
86
+ });
87
+ // => "Hello world"
88
+ ```
89
+
90
+ ## Authentication
91
+
92
+ First-time authentication requires QR code login. See `scripts/qr-login.js` in your project:
93
+
94
+ ```javascript
95
+ // Example QR login script
96
+ const { TelegramClient } = require('telegram');
97
+ const { StringSession } = require('telegram/sessions');
98
+
99
+ const client = new TelegramClient(
100
+ new StringSession(''),
101
+ API_ID,
102
+ API_HASH,
103
+ { connectionRetries: 5 }
104
+ );
105
+
106
+ await client.start({
107
+ phoneNumber: async () => prompt('Phone: '),
108
+ password: async () => prompt('2FA: '),
109
+ phoneCode: async () => prompt('Code: '),
110
+ onError: console.error,
111
+ });
112
+
113
+ console.log('Session:', client.session.save());
114
+ ```
115
+
116
+ ## Output Structure
117
+
118
+ With multi-language enabled (`targetLangs` + `keepOriginal`):
119
+
120
+ ```
121
+ content/posts/
122
+ en/
123
+ my-post-slug/
124
+ index.md
125
+ ru/
126
+ my-post-slug/
127
+ index.md
128
+ public/media/
129
+ 000123/
130
+ image1.jpg
131
+ image2.jpg
132
+ ```
133
+
134
+ Markdown format:
135
+
136
+ ```yaml
137
+ ---
138
+ title: "Post Title"
139
+ date: 2024-01-15
140
+ lang: en
141
+ original_link: "https://t.me/channel/123"
142
+ translated_from: "ru"
143
+ ---
144
+
145
+ Post content here...
146
+
147
+ ![](/media/000123/image1.jpg)
148
+ ```
149
+
150
+ ## Environment Variables
151
+
152
+ For GitHub Actions / CI:
153
+
154
+ ```bash
155
+ TELEGRAM_API_ID=12345678
156
+ TELEGRAM_API_HASH=your_api_hash
157
+ TELEGRAM_SESSION=base64_session_string
158
+ TELEGRAM_CHANNEL=@channelname
159
+
160
+ LLM_API_KEY=your_llm_key
161
+ LLM_API_URL=https://api.openai.com/v1
162
+ LLM_MODEL=gpt-4
163
+
164
+ TARGET_LANGS=en,de # comma-separated
165
+ KEEP_ORIGINAL=true # keep source language
166
+ ```
167
+
168
+ ## API Reference
169
+
170
+ ### exportAndTranslate(options)
171
+
172
+ Main function for export + translation workflow.
173
+
174
+ **Options:**
175
+ - `apiId`, `apiHash`, `session` — Telegram credentials
176
+ - `channel` — Target channel (@username or ID)
177
+ - `outputDir` — Where to save markdown files
178
+ - `mediaDir` — Where to save media (optional)
179
+ - `limit` — Max posts to export (optional)
180
+ - `since` — Export posts after this date (optional)
181
+ - `downloadMedia` — Download media files (default: true)
182
+ - `translate` — Translation config (optional)
183
+ - `onProgress` — Progress callback
184
+ - `onSession` — Session save callback
185
+ - `processedLog` — Object tracking processed posts
186
+ - `onProcessedLog` — Callback to save processed log
187
+
188
+ ### exportTelegramChannel(options)
189
+
190
+ Low-level Telegram export.
191
+
192
+ ### translateContent(text, options)
193
+
194
+ Translate text content.
195
+
196
+ ### translateTitle(title, options)
197
+
198
+ Translate title (optimized prompt for short text).
199
+
200
+ ### generateEnglishSlug(title, options)
201
+
202
+ Generate URL-friendly slug from any language title.
203
+
204
+ ## Used By
205
+
206
+ - [koz.tv](https://koz.tv) — Personal blog with Telegram sync
207
+ - [staskoz.com](https://staskoz.com) — Another blog using this package
208
+
209
+ ## Related
210
+
211
+ - [k-engine](https://github.com/Koz-TV/k-engine) — Static site generator with multi-language support
212
+
213
+ ## License
214
+
215
+ MIT
package/dist/index.d.mts CHANGED
@@ -216,4 +216,72 @@ declare function resumeExport(options: Omit<TelegramExportOptions, 'onPhoneNumbe
216
216
  session: string;
217
217
  }): Promise<ExportResult>;
218
218
 
219
- export { type AnalyticsConfig, type ExportResult, type ExportedPost, type GoalName, type GoalParams, type GroupedPost, type ParsePostOptions, type Post, type TelegramExportOptions, type TranslateOptions, categorizePost, cleanContent, configureAnalytics, deduplicatePosts, exportTelegramChannel, extractAttachments, extractExcerpt, extractTitle, formatPostMarkdown, generateEnglishSlug, generateSlug, groupPosts, parsePost, resumeExport, trackBookAppointment, trackGoal, trackLearnMore, trackServiceClick, trackTelegramClick, translateContent, translateTitle };
219
+ /**
220
+ * Combined Telegram export + translation module
221
+ */
222
+ interface TranslationConfig {
223
+ /** API key for translation service */
224
+ apiKey: string;
225
+ /** API base URL (OpenAI-compatible) */
226
+ apiUrl: string;
227
+ /** Model name */
228
+ model: string;
229
+ /** Source language code (default: 'ru') */
230
+ sourceLang?: string;
231
+ /** Target languages array (e.g., ['en', 'de', 'zh']) */
232
+ targetLangs: string[];
233
+ /** Also keep original language version */
234
+ keepOriginal?: boolean;
235
+ }
236
+ interface ExportAndTranslateOptions {
237
+ /** Telegram API ID */
238
+ apiId: number;
239
+ /** Telegram API Hash */
240
+ apiHash: string;
241
+ /** Session string */
242
+ session?: string;
243
+ /** Target channel */
244
+ channel: string;
245
+ /** Output directory for posts */
246
+ outputDir: string;
247
+ /** Media output directory (default: outputDir/../media) */
248
+ mediaDir?: string;
249
+ /** Export limit */
250
+ limit?: number;
251
+ /** Export posts since date */
252
+ since?: Date;
253
+ /** Download media files */
254
+ downloadMedia?: boolean;
255
+ /** Translation config (optional - if not provided, no translation) */
256
+ translate?: TranslationConfig;
257
+ /** Progress callback */
258
+ onProgress?: (message: string) => void;
259
+ /** Session save callback */
260
+ onSession?: (session: string) => void;
261
+ /** Log of already processed posts (to skip) */
262
+ processedLog?: Record<string, any>;
263
+ /** Callback to save processed log */
264
+ onProcessedLog?: (log: Record<string, any>) => void;
265
+ }
266
+ interface ExportAndTranslateResult {
267
+ /** Number of posts exported */
268
+ exported: number;
269
+ /** Number of posts processed (translated/saved) */
270
+ processed: number;
271
+ /** Number of posts skipped */
272
+ skipped: number;
273
+ /** Channel metadata */
274
+ channelMeta: {
275
+ id: number;
276
+ username: string;
277
+ title: string;
278
+ };
279
+ /** Session string for future use */
280
+ session: string;
281
+ }
282
+ /**
283
+ * Export posts from Telegram channel with optional translation
284
+ */
285
+ declare function exportAndTranslate(options: ExportAndTranslateOptions): Promise<ExportAndTranslateResult>;
286
+
287
+ export { type AnalyticsConfig, type ExportAndTranslateOptions, type ExportAndTranslateResult, type ExportResult, type ExportedPost, type GoalName, type GoalParams, type GroupedPost, type ParsePostOptions, type Post, type TelegramExportOptions, type TranslateOptions, type TranslationConfig, categorizePost, cleanContent, configureAnalytics, deduplicatePosts, exportAndTranslate, exportTelegramChannel, extractAttachments, extractExcerpt, extractTitle, formatPostMarkdown, generateEnglishSlug, generateSlug, groupPosts, parsePost, resumeExport, trackBookAppointment, trackGoal, trackLearnMore, trackServiceClick, trackTelegramClick, translateContent, translateTitle };
package/dist/index.d.ts CHANGED
@@ -216,4 +216,72 @@ declare function resumeExport(options: Omit<TelegramExportOptions, 'onPhoneNumbe
216
216
  session: string;
217
217
  }): Promise<ExportResult>;
218
218
 
219
- export { type AnalyticsConfig, type ExportResult, type ExportedPost, type GoalName, type GoalParams, type GroupedPost, type ParsePostOptions, type Post, type TelegramExportOptions, type TranslateOptions, categorizePost, cleanContent, configureAnalytics, deduplicatePosts, exportTelegramChannel, extractAttachments, extractExcerpt, extractTitle, formatPostMarkdown, generateEnglishSlug, generateSlug, groupPosts, parsePost, resumeExport, trackBookAppointment, trackGoal, trackLearnMore, trackServiceClick, trackTelegramClick, translateContent, translateTitle };
219
+ /**
220
+ * Combined Telegram export + translation module
221
+ */
222
+ interface TranslationConfig {
223
+ /** API key for translation service */
224
+ apiKey: string;
225
+ /** API base URL (OpenAI-compatible) */
226
+ apiUrl: string;
227
+ /** Model name */
228
+ model: string;
229
+ /** Source language code (default: 'ru') */
230
+ sourceLang?: string;
231
+ /** Target languages array (e.g., ['en', 'de', 'zh']) */
232
+ targetLangs: string[];
233
+ /** Also keep original language version */
234
+ keepOriginal?: boolean;
235
+ }
236
+ interface ExportAndTranslateOptions {
237
+ /** Telegram API ID */
238
+ apiId: number;
239
+ /** Telegram API Hash */
240
+ apiHash: string;
241
+ /** Session string */
242
+ session?: string;
243
+ /** Target channel */
244
+ channel: string;
245
+ /** Output directory for posts */
246
+ outputDir: string;
247
+ /** Media output directory (default: outputDir/../media) */
248
+ mediaDir?: string;
249
+ /** Export limit */
250
+ limit?: number;
251
+ /** Export posts since date */
252
+ since?: Date;
253
+ /** Download media files */
254
+ downloadMedia?: boolean;
255
+ /** Translation config (optional - if not provided, no translation) */
256
+ translate?: TranslationConfig;
257
+ /** Progress callback */
258
+ onProgress?: (message: string) => void;
259
+ /** Session save callback */
260
+ onSession?: (session: string) => void;
261
+ /** Log of already processed posts (to skip) */
262
+ processedLog?: Record<string, any>;
263
+ /** Callback to save processed log */
264
+ onProcessedLog?: (log: Record<string, any>) => void;
265
+ }
266
+ interface ExportAndTranslateResult {
267
+ /** Number of posts exported */
268
+ exported: number;
269
+ /** Number of posts processed (translated/saved) */
270
+ processed: number;
271
+ /** Number of posts skipped */
272
+ skipped: number;
273
+ /** Channel metadata */
274
+ channelMeta: {
275
+ id: number;
276
+ username: string;
277
+ title: string;
278
+ };
279
+ /** Session string for future use */
280
+ session: string;
281
+ }
282
+ /**
283
+ * Export posts from Telegram channel with optional translation
284
+ */
285
+ declare function exportAndTranslate(options: ExportAndTranslateOptions): Promise<ExportAndTranslateResult>;
286
+
287
+ export { type AnalyticsConfig, type ExportAndTranslateOptions, type ExportAndTranslateResult, type ExportResult, type ExportedPost, type GoalName, type GoalParams, type GroupedPost, type ParsePostOptions, type Post, type TelegramExportOptions, type TranslateOptions, type TranslationConfig, categorizePost, cleanContent, configureAnalytics, deduplicatePosts, exportAndTranslate, exportTelegramChannel, extractAttachments, extractExcerpt, extractTitle, formatPostMarkdown, generateEnglishSlug, generateSlug, groupPosts, parsePost, resumeExport, trackBookAppointment, trackGoal, trackLearnMore, trackServiceClick, trackTelegramClick, translateContent, translateTitle };
package/dist/index.js CHANGED
@@ -34,6 +34,7 @@ __export(index_exports, {
34
34
  cleanContent: () => cleanContent,
35
35
  configureAnalytics: () => configureAnalytics,
36
36
  deduplicatePosts: () => deduplicatePosts,
37
+ exportAndTranslate: () => exportAndTranslate,
37
38
  exportTelegramChannel: () => exportTelegramChannel,
38
39
  extractAttachments: () => extractAttachments,
39
40
  extractExcerpt: () => extractExcerpt,
@@ -586,12 +587,253 @@ async function resumeExport(options) {
586
587
  }
587
588
  return exportTelegramChannel(options);
588
589
  }
590
+
591
+ // src/exporter.ts
592
+ var fs2 = __toESM(require("fs"));
593
+ var path2 = __toESM(require("path"));
594
+ var translitMap2 = {
595
+ "\u0430": "a",
596
+ "\u0431": "b",
597
+ "\u0432": "v",
598
+ "\u0433": "g",
599
+ "\u0434": "d",
600
+ "\u0435": "e",
601
+ "\u0451": "yo",
602
+ "\u0436": "zh",
603
+ "\u0437": "z",
604
+ "\u0438": "i",
605
+ "\u0439": "y",
606
+ "\u043A": "k",
607
+ "\u043B": "l",
608
+ "\u043C": "m",
609
+ "\u043D": "n",
610
+ "\u043E": "o",
611
+ "\u043F": "p",
612
+ "\u0440": "r",
613
+ "\u0441": "s",
614
+ "\u0442": "t",
615
+ "\u0443": "u",
616
+ "\u0444": "f",
617
+ "\u0445": "h",
618
+ "\u0446": "ts",
619
+ "\u0447": "ch",
620
+ "\u0448": "sh",
621
+ "\u0449": "sch",
622
+ "\u044A": "",
623
+ "\u044B": "y",
624
+ "\u044C": "",
625
+ "\u044D": "e",
626
+ "\u044E": "yu",
627
+ "\u044F": "ya"
628
+ };
629
+ function transliterate(text) {
630
+ return text.toLowerCase().split("").map((char) => translitMap2[char] || char).join("");
631
+ }
632
+ function generateSlug2(text, lang) {
633
+ let processed = text;
634
+ if (/[а-яё]/i.test(text)) {
635
+ processed = transliterate(text);
636
+ }
637
+ return processed.toLowerCase().replace(/[^a-z0-9\s-]/g, "").replace(/\s+/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "").substring(0, 60) || "untitled";
638
+ }
639
+ function cleanContent2(text) {
640
+ return text.replace(/##\s*Attachments\n(?:- [^\n]+\n?)*/g, "").replace(/- media\/\d+\/[^\n]+/g, "").replace(/#\w+@\w+/g, "").trim();
641
+ }
642
+ function extractTitleAndBody(content) {
643
+ const lines = content.split("\n").filter((l) => l.trim());
644
+ const title = lines[0]?.replace(/[#@[\]*]/g, "").trim() || "Untitled";
645
+ const body = lines.slice(1).join("\n").trim();
646
+ return { title, body };
647
+ }
648
+ async function processPost(post, options, exportDir) {
649
+ const { translate, outputDir, mediaDir, onProgress } = options;
650
+ const createdPosts = [];
651
+ const cleanedContent = cleanContent2(post.content || "");
652
+ const { title: originalTitle, body: originalBody } = extractTitleAndBody(cleanedContent);
653
+ const date = post.date.toISOString().split("T")[0];
654
+ const languages = [];
655
+ if (translate && translate.targetLangs.length > 0) {
656
+ const sourceLang = translate.sourceLang || "ru";
657
+ if (translate.keepOriginal) {
658
+ languages.push({
659
+ lang: sourceLang,
660
+ title: originalTitle,
661
+ body: originalBody,
662
+ isOriginal: true
663
+ });
664
+ }
665
+ for (const targetLang of translate.targetLangs) {
666
+ if (targetLang === sourceLang) continue;
667
+ onProgress?.(` Translating to ${targetLang}...`);
668
+ const translateOpts = {
669
+ apiKey: translate.apiKey,
670
+ apiUrl: translate.apiUrl,
671
+ model: translate.model,
672
+ sourceLang,
673
+ targetLang
674
+ };
675
+ try {
676
+ const translatedTitle = await translateTitle(originalTitle, translateOpts);
677
+ const translatedBody = originalBody ? await translateContent(originalBody, translateOpts) : "";
678
+ languages.push({
679
+ lang: targetLang,
680
+ title: translatedTitle,
681
+ body: translatedBody,
682
+ isOriginal: false
683
+ });
684
+ onProgress?.(` \u2192 ${targetLang}: "${translatedTitle.substring(0, 40)}..."`);
685
+ } catch (error) {
686
+ onProgress?.(` Error translating to ${targetLang}: ${error.message}`);
687
+ }
688
+ await new Promise((r) => setTimeout(r, 300));
689
+ }
690
+ } else {
691
+ languages.push({
692
+ lang: "original",
693
+ title: originalTitle,
694
+ body: originalBody,
695
+ isOriginal: true
696
+ });
697
+ }
698
+ for (const { lang, title, body, isOriginal } of languages) {
699
+ const slug = generateSlug2(title, lang);
700
+ let postDir;
701
+ if (languages.length > 1 || translate && translate.targetLangs.length > 0) {
702
+ postDir = path2.join(outputDir, lang, slug);
703
+ } else {
704
+ postDir = path2.join(outputDir, slug);
705
+ }
706
+ if (fs2.existsSync(postDir)) {
707
+ onProgress?.(` Skipping existing: ${lang}/${slug}`);
708
+ continue;
709
+ }
710
+ fs2.mkdirSync(postDir, { recursive: true });
711
+ let finalBody = body;
712
+ const mediaFiles = [];
713
+ if (post.mediaFiles && post.mediaFiles.length > 0) {
714
+ const paddedId = String(post.msgId).padStart(6, "0");
715
+ const targetMediaDir = mediaDir ? path2.join(mediaDir, paddedId) : path2.join(outputDir, "..", "media", paddedId);
716
+ fs2.mkdirSync(targetMediaDir, { recursive: true });
717
+ for (let i = 0; i < post.mediaFiles.length; i++) {
718
+ const sourcePath = path2.join(exportDir, post.mediaFiles[i]);
719
+ const ext = path2.extname(sourcePath).toLowerCase() || ".jpg";
720
+ const newName = `image${i + 1}${ext}`;
721
+ const targetPath = path2.join(targetMediaDir, newName);
722
+ if (fs2.existsSync(sourcePath) && !fs2.existsSync(targetPath)) {
723
+ fs2.copyFileSync(sourcePath, targetPath);
724
+ mediaFiles.push(newName);
725
+ }
726
+ }
727
+ if (mediaFiles.length > 0) {
728
+ const images = mediaFiles.filter((f) => !f.match(/\.(mp4|mov|webm|m4v)$/i));
729
+ const videos = mediaFiles.filter((f) => f.match(/\.(mp4|mov|webm|m4v)$/i));
730
+ const imageMarkdown = images.map((f) => `![](/media/${paddedId}/${f})`).join("\n\n");
731
+ const videoMarkdown = videos.map((f) => `<video src="/media/${paddedId}/${f}" controls></video>`).join("\n\n");
732
+ const mediaMarkdown = [imageMarkdown, videoMarkdown].filter(Boolean).join("\n\n");
733
+ if (mediaMarkdown) {
734
+ finalBody = finalBody + "\n\n" + mediaMarkdown;
735
+ }
736
+ }
737
+ }
738
+ const markdown = `---
739
+ title: "${title.replace(/"/g, '\\"')}"
740
+ date: ${date}
741
+ lang: ${lang}
742
+ original_link: "${post.link || ""}"
743
+ ${isOriginal ? "" : `translated_from: "${translate?.sourceLang || "ru"}"
744
+ `}---
745
+
746
+ ${finalBody}
747
+ `;
748
+ fs2.writeFileSync(path2.join(postDir, "index.md"), markdown);
749
+ createdPosts.push(`${lang}/${slug}`);
750
+ onProgress?.(` Created: ${lang}/${slug}`);
751
+ }
752
+ return createdPosts;
753
+ }
754
+ async function exportAndTranslate(options) {
755
+ const {
756
+ apiId,
757
+ apiHash,
758
+ session,
759
+ channel,
760
+ outputDir,
761
+ limit,
762
+ since,
763
+ downloadMedia = true,
764
+ onProgress,
765
+ onSession,
766
+ processedLog = {},
767
+ onProcessedLog
768
+ } = options;
769
+ const exportDir = path2.join(outputDir, "..", ".telegram-export");
770
+ fs2.mkdirSync(exportDir, { recursive: true });
771
+ fs2.mkdirSync(outputDir, { recursive: true });
772
+ onProgress?.("Step 1: Exporting from Telegram...");
773
+ const exportResult = await exportTelegramChannel({
774
+ apiId,
775
+ apiHash,
776
+ session,
777
+ target: channel,
778
+ outputDir: exportDir,
779
+ limit,
780
+ since,
781
+ downloadMedia,
782
+ onProgress: (current, total, msg) => {
783
+ onProgress?.(` ${msg} (${current}/${total})`);
784
+ },
785
+ onSession
786
+ });
787
+ onProgress?.(` Exported ${exportResult.posts.length} posts from "${exportResult.channelMeta.title}"`);
788
+ onProgress?.("\nStep 2: Processing posts...");
789
+ let processed = 0;
790
+ let skipped = 0;
791
+ for (const post of exportResult.posts) {
792
+ const postId = `${post.channelUsername}-${post.msgId}`;
793
+ if (processedLog[postId]) {
794
+ skipped++;
795
+ continue;
796
+ }
797
+ if (!post.content && !post.hasMedia) {
798
+ skipped++;
799
+ continue;
800
+ }
801
+ onProgress?.(`
802
+ Processing: ${postId}`);
803
+ try {
804
+ const created = await processPost(post, options, exportDir);
805
+ if (created.length > 0) {
806
+ processed++;
807
+ processedLog[postId] = {
808
+ posts: created,
809
+ date: post.date.toISOString(),
810
+ processedAt: (/* @__PURE__ */ new Date()).toISOString()
811
+ };
812
+ onProcessedLog?.(processedLog);
813
+ } else {
814
+ skipped++;
815
+ }
816
+ } catch (error) {
817
+ onProgress?.(` Error: ${error.message}`);
818
+ skipped++;
819
+ }
820
+ await new Promise((r) => setTimeout(r, 200));
821
+ }
822
+ return {
823
+ exported: exportResult.posts.length,
824
+ processed,
825
+ skipped,
826
+ channelMeta: exportResult.channelMeta,
827
+ session: exportResult.session
828
+ };
829
+ }
589
830
  // Annotate the CommonJS export names for ESM import in node:
590
831
  0 && (module.exports = {
591
832
  categorizePost,
592
833
  cleanContent,
593
834
  configureAnalytics,
594
835
  deduplicatePosts,
836
+ exportAndTranslate,
595
837
  exportTelegramChannel,
596
838
  extractAttachments,
597
839
  extractExcerpt,
package/dist/index.mjs CHANGED
@@ -530,11 +530,252 @@ async function resumeExport(options) {
530
530
  }
531
531
  return exportTelegramChannel(options);
532
532
  }
533
+
534
+ // src/exporter.ts
535
+ import * as fs2 from "fs";
536
+ import * as path2 from "path";
537
+ var translitMap2 = {
538
+ "\u0430": "a",
539
+ "\u0431": "b",
540
+ "\u0432": "v",
541
+ "\u0433": "g",
542
+ "\u0434": "d",
543
+ "\u0435": "e",
544
+ "\u0451": "yo",
545
+ "\u0436": "zh",
546
+ "\u0437": "z",
547
+ "\u0438": "i",
548
+ "\u0439": "y",
549
+ "\u043A": "k",
550
+ "\u043B": "l",
551
+ "\u043C": "m",
552
+ "\u043D": "n",
553
+ "\u043E": "o",
554
+ "\u043F": "p",
555
+ "\u0440": "r",
556
+ "\u0441": "s",
557
+ "\u0442": "t",
558
+ "\u0443": "u",
559
+ "\u0444": "f",
560
+ "\u0445": "h",
561
+ "\u0446": "ts",
562
+ "\u0447": "ch",
563
+ "\u0448": "sh",
564
+ "\u0449": "sch",
565
+ "\u044A": "",
566
+ "\u044B": "y",
567
+ "\u044C": "",
568
+ "\u044D": "e",
569
+ "\u044E": "yu",
570
+ "\u044F": "ya"
571
+ };
572
+ function transliterate(text) {
573
+ return text.toLowerCase().split("").map((char) => translitMap2[char] || char).join("");
574
+ }
575
+ function generateSlug2(text, lang) {
576
+ let processed = text;
577
+ if (/[а-яё]/i.test(text)) {
578
+ processed = transliterate(text);
579
+ }
580
+ return processed.toLowerCase().replace(/[^a-z0-9\s-]/g, "").replace(/\s+/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "").substring(0, 60) || "untitled";
581
+ }
582
+ function cleanContent2(text) {
583
+ return text.replace(/##\s*Attachments\n(?:- [^\n]+\n?)*/g, "").replace(/- media\/\d+\/[^\n]+/g, "").replace(/#\w+@\w+/g, "").trim();
584
+ }
585
+ function extractTitleAndBody(content) {
586
+ const lines = content.split("\n").filter((l) => l.trim());
587
+ const title = lines[0]?.replace(/[#@[\]*]/g, "").trim() || "Untitled";
588
+ const body = lines.slice(1).join("\n").trim();
589
+ return { title, body };
590
+ }
591
+ async function processPost(post, options, exportDir) {
592
+ const { translate, outputDir, mediaDir, onProgress } = options;
593
+ const createdPosts = [];
594
+ const cleanedContent = cleanContent2(post.content || "");
595
+ const { title: originalTitle, body: originalBody } = extractTitleAndBody(cleanedContent);
596
+ const date = post.date.toISOString().split("T")[0];
597
+ const languages = [];
598
+ if (translate && translate.targetLangs.length > 0) {
599
+ const sourceLang = translate.sourceLang || "ru";
600
+ if (translate.keepOriginal) {
601
+ languages.push({
602
+ lang: sourceLang,
603
+ title: originalTitle,
604
+ body: originalBody,
605
+ isOriginal: true
606
+ });
607
+ }
608
+ for (const targetLang of translate.targetLangs) {
609
+ if (targetLang === sourceLang) continue;
610
+ onProgress?.(` Translating to ${targetLang}...`);
611
+ const translateOpts = {
612
+ apiKey: translate.apiKey,
613
+ apiUrl: translate.apiUrl,
614
+ model: translate.model,
615
+ sourceLang,
616
+ targetLang
617
+ };
618
+ try {
619
+ const translatedTitle = await translateTitle(originalTitle, translateOpts);
620
+ const translatedBody = originalBody ? await translateContent(originalBody, translateOpts) : "";
621
+ languages.push({
622
+ lang: targetLang,
623
+ title: translatedTitle,
624
+ body: translatedBody,
625
+ isOriginal: false
626
+ });
627
+ onProgress?.(` \u2192 ${targetLang}: "${translatedTitle.substring(0, 40)}..."`);
628
+ } catch (error) {
629
+ onProgress?.(` Error translating to ${targetLang}: ${error.message}`);
630
+ }
631
+ await new Promise((r) => setTimeout(r, 300));
632
+ }
633
+ } else {
634
+ languages.push({
635
+ lang: "original",
636
+ title: originalTitle,
637
+ body: originalBody,
638
+ isOriginal: true
639
+ });
640
+ }
641
+ for (const { lang, title, body, isOriginal } of languages) {
642
+ const slug = generateSlug2(title, lang);
643
+ let postDir;
644
+ if (languages.length > 1 || translate && translate.targetLangs.length > 0) {
645
+ postDir = path2.join(outputDir, lang, slug);
646
+ } else {
647
+ postDir = path2.join(outputDir, slug);
648
+ }
649
+ if (fs2.existsSync(postDir)) {
650
+ onProgress?.(` Skipping existing: ${lang}/${slug}`);
651
+ continue;
652
+ }
653
+ fs2.mkdirSync(postDir, { recursive: true });
654
+ let finalBody = body;
655
+ const mediaFiles = [];
656
+ if (post.mediaFiles && post.mediaFiles.length > 0) {
657
+ const paddedId = String(post.msgId).padStart(6, "0");
658
+ const targetMediaDir = mediaDir ? path2.join(mediaDir, paddedId) : path2.join(outputDir, "..", "media", paddedId);
659
+ fs2.mkdirSync(targetMediaDir, { recursive: true });
660
+ for (let i = 0; i < post.mediaFiles.length; i++) {
661
+ const sourcePath = path2.join(exportDir, post.mediaFiles[i]);
662
+ const ext = path2.extname(sourcePath).toLowerCase() || ".jpg";
663
+ const newName = `image${i + 1}${ext}`;
664
+ const targetPath = path2.join(targetMediaDir, newName);
665
+ if (fs2.existsSync(sourcePath) && !fs2.existsSync(targetPath)) {
666
+ fs2.copyFileSync(sourcePath, targetPath);
667
+ mediaFiles.push(newName);
668
+ }
669
+ }
670
+ if (mediaFiles.length > 0) {
671
+ const images = mediaFiles.filter((f) => !f.match(/\.(mp4|mov|webm|m4v)$/i));
672
+ const videos = mediaFiles.filter((f) => f.match(/\.(mp4|mov|webm|m4v)$/i));
673
+ const imageMarkdown = images.map((f) => `![](/media/${paddedId}/${f})`).join("\n\n");
674
+ const videoMarkdown = videos.map((f) => `<video src="/media/${paddedId}/${f}" controls></video>`).join("\n\n");
675
+ const mediaMarkdown = [imageMarkdown, videoMarkdown].filter(Boolean).join("\n\n");
676
+ if (mediaMarkdown) {
677
+ finalBody = finalBody + "\n\n" + mediaMarkdown;
678
+ }
679
+ }
680
+ }
681
+ const markdown = `---
682
+ title: "${title.replace(/"/g, '\\"')}"
683
+ date: ${date}
684
+ lang: ${lang}
685
+ original_link: "${post.link || ""}"
686
+ ${isOriginal ? "" : `translated_from: "${translate?.sourceLang || "ru"}"
687
+ `}---
688
+
689
+ ${finalBody}
690
+ `;
691
+ fs2.writeFileSync(path2.join(postDir, "index.md"), markdown);
692
+ createdPosts.push(`${lang}/${slug}`);
693
+ onProgress?.(` Created: ${lang}/${slug}`);
694
+ }
695
+ return createdPosts;
696
+ }
697
+ async function exportAndTranslate(options) {
698
+ const {
699
+ apiId,
700
+ apiHash,
701
+ session,
702
+ channel,
703
+ outputDir,
704
+ limit,
705
+ since,
706
+ downloadMedia = true,
707
+ onProgress,
708
+ onSession,
709
+ processedLog = {},
710
+ onProcessedLog
711
+ } = options;
712
+ const exportDir = path2.join(outputDir, "..", ".telegram-export");
713
+ fs2.mkdirSync(exportDir, { recursive: true });
714
+ fs2.mkdirSync(outputDir, { recursive: true });
715
+ onProgress?.("Step 1: Exporting from Telegram...");
716
+ const exportResult = await exportTelegramChannel({
717
+ apiId,
718
+ apiHash,
719
+ session,
720
+ target: channel,
721
+ outputDir: exportDir,
722
+ limit,
723
+ since,
724
+ downloadMedia,
725
+ onProgress: (current, total, msg) => {
726
+ onProgress?.(` ${msg} (${current}/${total})`);
727
+ },
728
+ onSession
729
+ });
730
+ onProgress?.(` Exported ${exportResult.posts.length} posts from "${exportResult.channelMeta.title}"`);
731
+ onProgress?.("\nStep 2: Processing posts...");
732
+ let processed = 0;
733
+ let skipped = 0;
734
+ for (const post of exportResult.posts) {
735
+ const postId = `${post.channelUsername}-${post.msgId}`;
736
+ if (processedLog[postId]) {
737
+ skipped++;
738
+ continue;
739
+ }
740
+ if (!post.content && !post.hasMedia) {
741
+ skipped++;
742
+ continue;
743
+ }
744
+ onProgress?.(`
745
+ Processing: ${postId}`);
746
+ try {
747
+ const created = await processPost(post, options, exportDir);
748
+ if (created.length > 0) {
749
+ processed++;
750
+ processedLog[postId] = {
751
+ posts: created,
752
+ date: post.date.toISOString(),
753
+ processedAt: (/* @__PURE__ */ new Date()).toISOString()
754
+ };
755
+ onProcessedLog?.(processedLog);
756
+ } else {
757
+ skipped++;
758
+ }
759
+ } catch (error) {
760
+ onProgress?.(` Error: ${error.message}`);
761
+ skipped++;
762
+ }
763
+ await new Promise((r) => setTimeout(r, 200));
764
+ }
765
+ return {
766
+ exported: exportResult.posts.length,
767
+ processed,
768
+ skipped,
769
+ channelMeta: exportResult.channelMeta,
770
+ session: exportResult.session
771
+ };
772
+ }
533
773
  export {
534
774
  categorizePost,
535
775
  cleanContent,
536
776
  configureAnalytics,
537
777
  deduplicatePosts,
778
+ exportAndTranslate,
538
779
  exportTelegramChannel,
539
780
  extractAttachments,
540
781
  extractExcerpt,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "koztv-blog-tools",
3
- "version": "1.1.1",
3
+ "version": "1.2.1",
4
4
  "description": "Shared utilities for Telegram-based blog sites",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.mjs",