koztv-blog-tools 1.0.6 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/export-telegram.js +121 -0
- package/dist/index.d.mts +75 -1
- package/dist/index.d.ts +75 -1
- package/dist/index.js +238 -0
- package/dist/index.mjs +235 -0
- package/package.json +17 -5
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* CLI for exporting Telegram channel posts
|
|
4
|
+
*
|
|
5
|
+
* Usage:
|
|
6
|
+
* npx koztv-blog-tools export --channel @channelname --out ./export
|
|
7
|
+
*
|
|
8
|
+
* Environment variables:
|
|
9
|
+
* TELEGRAM_API_ID - API ID from https://my.telegram.org
|
|
10
|
+
* TELEGRAM_API_HASH - API Hash from https://my.telegram.org
|
|
11
|
+
* TELEGRAM_SESSION - Session string (for re-authentication)
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
const { exportTelegramChannel } = require('../dist/index.js');
|
|
15
|
+
const fs = require('fs');
|
|
16
|
+
const path = require('path');
|
|
17
|
+
const readline = require('readline');
|
|
18
|
+
|
|
19
|
+
// Parse arguments
|
|
20
|
+
const args = process.argv.slice(2);
|
|
21
|
+
const getArg = (name) => {
|
|
22
|
+
const idx = args.indexOf(`--${name}`);
|
|
23
|
+
return idx !== -1 ? args[idx + 1] : null;
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
const CHANNEL = getArg('channel') || process.env.TELEGRAM_CHANNEL;
|
|
27
|
+
const OUTPUT_DIR = getArg('out') || './export';
|
|
28
|
+
const LIMIT = parseInt(getArg('limit') || '0', 10);
|
|
29
|
+
const SINCE = getArg('since');
|
|
30
|
+
const UNTIL = getArg('until');
|
|
31
|
+
const NO_MEDIA = args.includes('--no-media');
|
|
32
|
+
|
|
33
|
+
const API_ID = parseInt(process.env.TELEGRAM_API_ID || '0', 10);
|
|
34
|
+
const API_HASH = process.env.TELEGRAM_API_HASH || '';
|
|
35
|
+
const SESSION = process.env.TELEGRAM_SESSION || '';
|
|
36
|
+
const SESSION_FILE = path.join(OUTPUT_DIR, '.telegram-session');
|
|
37
|
+
|
|
38
|
+
if (!CHANNEL) {
|
|
39
|
+
console.error('Error: Channel is required');
|
|
40
|
+
console.error('');
|
|
41
|
+
console.error('Usage:');
|
|
42
|
+
console.error(' npx koztv-blog-tools export --channel @channelname --out ./export');
|
|
43
|
+
console.error('');
|
|
44
|
+
console.error('Environment variables:');
|
|
45
|
+
console.error(' TELEGRAM_API_ID - API ID from https://my.telegram.org');
|
|
46
|
+
console.error(' TELEGRAM_API_HASH - API Hash');
|
|
47
|
+
console.error(' TELEGRAM_SESSION - Session string (optional, for re-auth)');
|
|
48
|
+
console.error(' TELEGRAM_CHANNEL - Default channel (optional)');
|
|
49
|
+
console.error('');
|
|
50
|
+
console.error('Options:');
|
|
51
|
+
console.error(' --channel @name Target channel');
|
|
52
|
+
console.error(' --out ./path Output directory (default: ./export)');
|
|
53
|
+
console.error(' --limit N Max posts to export');
|
|
54
|
+
console.error(' --since YYYY-MM-DD Export posts since date');
|
|
55
|
+
console.error(' --until YYYY-MM-DD Export posts until date');
|
|
56
|
+
console.error(' --no-media Skip media download');
|
|
57
|
+
process.exit(1);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
if (!API_ID || !API_HASH) {
|
|
61
|
+
console.error('Error: TELEGRAM_API_ID and TELEGRAM_API_HASH are required');
|
|
62
|
+
console.error('Get them from https://my.telegram.org');
|
|
63
|
+
process.exit(1);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Read saved session if exists
|
|
67
|
+
let savedSession = SESSION;
|
|
68
|
+
if (!savedSession && fs.existsSync(SESSION_FILE)) {
|
|
69
|
+
savedSession = fs.readFileSync(SESSION_FILE, 'utf-8').trim();
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
async function main() {
|
|
73
|
+
console.log('Telegram Channel Exporter');
|
|
74
|
+
console.log('=========================');
|
|
75
|
+
console.log(`Channel: ${CHANNEL}`);
|
|
76
|
+
console.log(`Output: ${OUTPUT_DIR}`);
|
|
77
|
+
if (LIMIT) console.log(`Limit: ${LIMIT}`);
|
|
78
|
+
if (SINCE) console.log(`Since: ${SINCE}`);
|
|
79
|
+
if (UNTIL) console.log(`Until: ${UNTIL}`);
|
|
80
|
+
console.log(`Media: ${NO_MEDIA ? 'skip' : 'download'}`);
|
|
81
|
+
console.log('');
|
|
82
|
+
|
|
83
|
+
try {
|
|
84
|
+
const result = await exportTelegramChannel({
|
|
85
|
+
apiId: API_ID,
|
|
86
|
+
apiHash: API_HASH,
|
|
87
|
+
session: savedSession,
|
|
88
|
+
target: CHANNEL,
|
|
89
|
+
outputDir: OUTPUT_DIR,
|
|
90
|
+
limit: LIMIT,
|
|
91
|
+
since: SINCE ? new Date(SINCE) : undefined,
|
|
92
|
+
until: UNTIL ? new Date(UNTIL) : undefined,
|
|
93
|
+
downloadMedia: !NO_MEDIA,
|
|
94
|
+
onProgress: (current, total, msg) => {
|
|
95
|
+
process.stdout.write(`\r${msg} (${current}/${total})`);
|
|
96
|
+
},
|
|
97
|
+
onSession: (session) => {
|
|
98
|
+
// Save session for future use
|
|
99
|
+
fs.mkdirSync(OUTPUT_DIR, { recursive: true });
|
|
100
|
+
fs.writeFileSync(SESSION_FILE, session);
|
|
101
|
+
console.log('\nSession saved to', SESSION_FILE);
|
|
102
|
+
},
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
console.log('\n');
|
|
106
|
+
console.log('=========================');
|
|
107
|
+
console.log(`Exported ${result.posts.length} posts from "${result.channelMeta.title}"`);
|
|
108
|
+
console.log(`Output: ${OUTPUT_DIR}`);
|
|
109
|
+
|
|
110
|
+
// Output session string if new
|
|
111
|
+
if (result.session && result.session !== savedSession) {
|
|
112
|
+
console.log('\nNew session string (save to TELEGRAM_SESSION):');
|
|
113
|
+
console.log(result.session);
|
|
114
|
+
}
|
|
115
|
+
} catch (error) {
|
|
116
|
+
console.error('\nError:', error.message);
|
|
117
|
+
process.exit(1);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
main();
|
package/dist/index.d.mts
CHANGED
|
@@ -142,4 +142,78 @@ declare function translateTitle(title: string, options: TranslateOptions): Promi
|
|
|
142
142
|
*/
|
|
143
143
|
declare function generateEnglishSlug(title: string): string;
|
|
144
144
|
|
|
145
|
-
|
|
145
|
+
/**
|
|
146
|
+
* Telegram channel export utilities using gramjs (MTProto)
|
|
147
|
+
*/
|
|
148
|
+
|
|
149
|
+
interface TelegramExportOptions {
|
|
150
|
+
/** Telegram API ID from https://my.telegram.org */
|
|
151
|
+
apiId: number;
|
|
152
|
+
/** Telegram API Hash from https://my.telegram.org */
|
|
153
|
+
apiHash: string;
|
|
154
|
+
/** Session string (for re-authentication). If empty, will prompt for login */
|
|
155
|
+
session?: string;
|
|
156
|
+
/** Target channel username, link or ID */
|
|
157
|
+
target: string;
|
|
158
|
+
/** Output directory for exported data */
|
|
159
|
+
outputDir: string;
|
|
160
|
+
/** Maximum number of posts to export (0 = all) */
|
|
161
|
+
limit?: number;
|
|
162
|
+
/** Only export posts since this date */
|
|
163
|
+
since?: Date;
|
|
164
|
+
/** Only export posts until this date */
|
|
165
|
+
until?: Date;
|
|
166
|
+
/** Download media files */
|
|
167
|
+
downloadMedia?: boolean;
|
|
168
|
+
/** Number of concurrent media downloads */
|
|
169
|
+
mediaWorkers?: number;
|
|
170
|
+
/** Callback for progress updates */
|
|
171
|
+
onProgress?: (current: number, total: number, message: string) => void;
|
|
172
|
+
/** Callback to get phone number for login */
|
|
173
|
+
onPhoneNumber?: () => Promise<string>;
|
|
174
|
+
/** Callback to get verification code */
|
|
175
|
+
onCode?: () => Promise<string>;
|
|
176
|
+
/** Callback to get 2FA password */
|
|
177
|
+
onPassword?: () => Promise<string>;
|
|
178
|
+
/** Callback when session string is generated (save this for future use) */
|
|
179
|
+
onSession?: (session: string) => void;
|
|
180
|
+
}
|
|
181
|
+
interface ExportedPost {
|
|
182
|
+
msgId: number;
|
|
183
|
+
date: Date;
|
|
184
|
+
content: string;
|
|
185
|
+
hasMedia: boolean;
|
|
186
|
+
mediaFiles: string[];
|
|
187
|
+
views?: number;
|
|
188
|
+
forwards?: number;
|
|
189
|
+
link: string;
|
|
190
|
+
channelUsername: string;
|
|
191
|
+
channelTitle: string;
|
|
192
|
+
}
|
|
193
|
+
interface ExportResult {
|
|
194
|
+
channelMeta: {
|
|
195
|
+
id: number;
|
|
196
|
+
username: string;
|
|
197
|
+
title: string;
|
|
198
|
+
description?: string;
|
|
199
|
+
participantsCount?: number;
|
|
200
|
+
};
|
|
201
|
+
posts: ExportedPost[];
|
|
202
|
+
session: string;
|
|
203
|
+
}
|
|
204
|
+
/**
|
|
205
|
+
* Export messages from a Telegram channel
|
|
206
|
+
*/
|
|
207
|
+
declare function exportTelegramChannel(options: TelegramExportOptions): Promise<ExportResult>;
|
|
208
|
+
/**
|
|
209
|
+
* Format a post as markdown with YAML frontmatter
|
|
210
|
+
*/
|
|
211
|
+
declare function formatPostMarkdown(post: ExportedPost): string;
|
|
212
|
+
/**
|
|
213
|
+
* Resume export from a saved session
|
|
214
|
+
*/
|
|
215
|
+
declare function resumeExport(options: Omit<TelegramExportOptions, 'onPhoneNumber' | 'onCode' | 'onPassword'> & {
|
|
216
|
+
session: string;
|
|
217
|
+
}): Promise<ExportResult>;
|
|
218
|
+
|
|
219
|
+
export { type AnalyticsConfig, type ExportResult, type ExportedPost, type GoalName, type GoalParams, type GroupedPost, type ParsePostOptions, type Post, type TelegramExportOptions, type TranslateOptions, categorizePost, cleanContent, configureAnalytics, deduplicatePosts, exportTelegramChannel, extractAttachments, extractExcerpt, extractTitle, formatPostMarkdown, generateEnglishSlug, generateSlug, groupPosts, parsePost, resumeExport, trackBookAppointment, trackGoal, trackLearnMore, trackServiceClick, trackTelegramClick, translateContent, translateTitle };
|
package/dist/index.d.ts
CHANGED
|
@@ -142,4 +142,78 @@ declare function translateTitle(title: string, options: TranslateOptions): Promi
|
|
|
142
142
|
*/
|
|
143
143
|
declare function generateEnglishSlug(title: string): string;
|
|
144
144
|
|
|
145
|
-
|
|
145
|
+
/**
|
|
146
|
+
* Telegram channel export utilities using gramjs (MTProto)
|
|
147
|
+
*/
|
|
148
|
+
|
|
149
|
+
interface TelegramExportOptions {
|
|
150
|
+
/** Telegram API ID from https://my.telegram.org */
|
|
151
|
+
apiId: number;
|
|
152
|
+
/** Telegram API Hash from https://my.telegram.org */
|
|
153
|
+
apiHash: string;
|
|
154
|
+
/** Session string (for re-authentication). If empty, will prompt for login */
|
|
155
|
+
session?: string;
|
|
156
|
+
/** Target channel username, link or ID */
|
|
157
|
+
target: string;
|
|
158
|
+
/** Output directory for exported data */
|
|
159
|
+
outputDir: string;
|
|
160
|
+
/** Maximum number of posts to export (0 = all) */
|
|
161
|
+
limit?: number;
|
|
162
|
+
/** Only export posts since this date */
|
|
163
|
+
since?: Date;
|
|
164
|
+
/** Only export posts until this date */
|
|
165
|
+
until?: Date;
|
|
166
|
+
/** Download media files */
|
|
167
|
+
downloadMedia?: boolean;
|
|
168
|
+
/** Number of concurrent media downloads */
|
|
169
|
+
mediaWorkers?: number;
|
|
170
|
+
/** Callback for progress updates */
|
|
171
|
+
onProgress?: (current: number, total: number, message: string) => void;
|
|
172
|
+
/** Callback to get phone number for login */
|
|
173
|
+
onPhoneNumber?: () => Promise<string>;
|
|
174
|
+
/** Callback to get verification code */
|
|
175
|
+
onCode?: () => Promise<string>;
|
|
176
|
+
/** Callback to get 2FA password */
|
|
177
|
+
onPassword?: () => Promise<string>;
|
|
178
|
+
/** Callback when session string is generated (save this for future use) */
|
|
179
|
+
onSession?: (session: string) => void;
|
|
180
|
+
}
|
|
181
|
+
interface ExportedPost {
|
|
182
|
+
msgId: number;
|
|
183
|
+
date: Date;
|
|
184
|
+
content: string;
|
|
185
|
+
hasMedia: boolean;
|
|
186
|
+
mediaFiles: string[];
|
|
187
|
+
views?: number;
|
|
188
|
+
forwards?: number;
|
|
189
|
+
link: string;
|
|
190
|
+
channelUsername: string;
|
|
191
|
+
channelTitle: string;
|
|
192
|
+
}
|
|
193
|
+
interface ExportResult {
|
|
194
|
+
channelMeta: {
|
|
195
|
+
id: number;
|
|
196
|
+
username: string;
|
|
197
|
+
title: string;
|
|
198
|
+
description?: string;
|
|
199
|
+
participantsCount?: number;
|
|
200
|
+
};
|
|
201
|
+
posts: ExportedPost[];
|
|
202
|
+
session: string;
|
|
203
|
+
}
|
|
204
|
+
/**
|
|
205
|
+
* Export messages from a Telegram channel
|
|
206
|
+
*/
|
|
207
|
+
declare function exportTelegramChannel(options: TelegramExportOptions): Promise<ExportResult>;
|
|
208
|
+
/**
|
|
209
|
+
* Format a post as markdown with YAML frontmatter
|
|
210
|
+
*/
|
|
211
|
+
declare function formatPostMarkdown(post: ExportedPost): string;
|
|
212
|
+
/**
|
|
213
|
+
* Resume export from a saved session
|
|
214
|
+
*/
|
|
215
|
+
declare function resumeExport(options: Omit<TelegramExportOptions, 'onPhoneNumber' | 'onCode' | 'onPassword'> & {
|
|
216
|
+
session: string;
|
|
217
|
+
}): Promise<ExportResult>;
|
|
218
|
+
|
|
219
|
+
export { type AnalyticsConfig, type ExportResult, type ExportedPost, type GoalName, type GoalParams, type GroupedPost, type ParsePostOptions, type Post, type TelegramExportOptions, type TranslateOptions, categorizePost, cleanContent, configureAnalytics, deduplicatePosts, exportTelegramChannel, extractAttachments, extractExcerpt, extractTitle, formatPostMarkdown, generateEnglishSlug, generateSlug, groupPosts, parsePost, resumeExport, trackBookAppointment, trackGoal, trackLearnMore, trackServiceClick, trackTelegramClick, translateContent, translateTitle };
|
package/dist/index.js
CHANGED
|
@@ -34,13 +34,16 @@ __export(index_exports, {
|
|
|
34
34
|
cleanContent: () => cleanContent,
|
|
35
35
|
configureAnalytics: () => configureAnalytics,
|
|
36
36
|
deduplicatePosts: () => deduplicatePosts,
|
|
37
|
+
exportTelegramChannel: () => exportTelegramChannel,
|
|
37
38
|
extractAttachments: () => extractAttachments,
|
|
38
39
|
extractExcerpt: () => extractExcerpt,
|
|
39
40
|
extractTitle: () => extractTitle,
|
|
41
|
+
formatPostMarkdown: () => formatPostMarkdown,
|
|
40
42
|
generateEnglishSlug: () => generateEnglishSlug,
|
|
41
43
|
generateSlug: () => generateSlug,
|
|
42
44
|
groupPosts: () => groupPosts,
|
|
43
45
|
parsePost: () => parsePost,
|
|
46
|
+
resumeExport: () => resumeExport,
|
|
44
47
|
trackBookAppointment: () => trackBookAppointment,
|
|
45
48
|
trackGoal: () => trackGoal,
|
|
46
49
|
trackLearnMore: () => trackLearnMore,
|
|
@@ -351,19 +354,254 @@ async function translateTitle(title, options) {
|
|
|
351
354
|
function generateEnglishSlug(title) {
|
|
352
355
|
return title.toLowerCase().replace(/[^a-z0-9\s-]/g, "").replace(/\s+/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "").substring(0, 60);
|
|
353
356
|
}
|
|
357
|
+
|
|
358
|
+
// src/telegram.ts
|
|
359
|
+
var import_telegram = require("telegram");
|
|
360
|
+
var import_sessions = require("telegram/sessions");
|
|
361
|
+
var fs = __toESM(require("fs"));
|
|
362
|
+
var path = __toESM(require("path"));
|
|
363
|
+
var readline = __toESM(require("readline"));
|
|
364
|
+
async function defaultReadline(prompt) {
|
|
365
|
+
const rl = readline.createInterface({
|
|
366
|
+
input: process.stdin,
|
|
367
|
+
output: process.stdout
|
|
368
|
+
});
|
|
369
|
+
return new Promise((resolve) => {
|
|
370
|
+
rl.question(prompt, (answer) => {
|
|
371
|
+
rl.close();
|
|
372
|
+
resolve(answer);
|
|
373
|
+
});
|
|
374
|
+
});
|
|
375
|
+
}
|
|
376
|
+
async function exportTelegramChannel(options) {
|
|
377
|
+
const {
|
|
378
|
+
apiId,
|
|
379
|
+
apiHash,
|
|
380
|
+
session = "",
|
|
381
|
+
target,
|
|
382
|
+
outputDir,
|
|
383
|
+
limit = 0,
|
|
384
|
+
since,
|
|
385
|
+
until,
|
|
386
|
+
downloadMedia = true,
|
|
387
|
+
mediaWorkers = 3,
|
|
388
|
+
onProgress,
|
|
389
|
+
onPhoneNumber = () => defaultReadline("Phone number: "),
|
|
390
|
+
onCode = () => defaultReadline("Verification code: "),
|
|
391
|
+
onPassword = () => defaultReadline("2FA Password: "),
|
|
392
|
+
onSession
|
|
393
|
+
} = options;
|
|
394
|
+
const postsDir = path.join(outputDir, "posts");
|
|
395
|
+
const mediaDir = path.join(outputDir, "media");
|
|
396
|
+
fs.mkdirSync(postsDir, { recursive: true });
|
|
397
|
+
fs.mkdirSync(mediaDir, { recursive: true });
|
|
398
|
+
const stringSession = new import_sessions.StringSession(session);
|
|
399
|
+
const client = new import_telegram.TelegramClient(stringSession, apiId, apiHash, {
|
|
400
|
+
connectionRetries: 5
|
|
401
|
+
});
|
|
402
|
+
await client.start({
|
|
403
|
+
phoneNumber: onPhoneNumber,
|
|
404
|
+
phoneCode: onCode,
|
|
405
|
+
password: onPassword,
|
|
406
|
+
onError: (err) => console.error("Auth error:", err)
|
|
407
|
+
});
|
|
408
|
+
const newSession = client.session.save();
|
|
409
|
+
if (onSession) {
|
|
410
|
+
onSession(newSession);
|
|
411
|
+
}
|
|
412
|
+
const entity = await client.getEntity(target);
|
|
413
|
+
if (!(entity instanceof import_telegram.Api.Channel)) {
|
|
414
|
+
throw new Error(`Target "${target}" is not a channel`);
|
|
415
|
+
}
|
|
416
|
+
const channelMeta = {
|
|
417
|
+
id: entity.id.toJSNumber(),
|
|
418
|
+
username: entity.username || "",
|
|
419
|
+
title: entity.title,
|
|
420
|
+
description: void 0,
|
|
421
|
+
participantsCount: void 0
|
|
422
|
+
};
|
|
423
|
+
try {
|
|
424
|
+
const fullChannel = await client.invoke(
|
|
425
|
+
new import_telegram.Api.channels.GetFullChannel({ channel: entity })
|
|
426
|
+
);
|
|
427
|
+
if (fullChannel.fullChat instanceof import_telegram.Api.ChannelFull) {
|
|
428
|
+
channelMeta.description = fullChannel.fullChat.about;
|
|
429
|
+
channelMeta.participantsCount = fullChannel.fullChat.participantsCount;
|
|
430
|
+
}
|
|
431
|
+
} catch (e) {
|
|
432
|
+
}
|
|
433
|
+
fs.writeFileSync(
|
|
434
|
+
path.join(outputDir, "channel_meta.json"),
|
|
435
|
+
JSON.stringify(channelMeta, null, 2)
|
|
436
|
+
);
|
|
437
|
+
const posts = [];
|
|
438
|
+
let processedCount = 0;
|
|
439
|
+
const iterParams = {
|
|
440
|
+
entity,
|
|
441
|
+
reverse: true
|
|
442
|
+
// Oldest first
|
|
443
|
+
};
|
|
444
|
+
if (limit > 0) {
|
|
445
|
+
iterParams.limit = limit;
|
|
446
|
+
}
|
|
447
|
+
if (since) {
|
|
448
|
+
iterParams.offsetDate = Math.floor(since.getTime() / 1e3);
|
|
449
|
+
}
|
|
450
|
+
let totalMessages = limit || 0;
|
|
451
|
+
if (!limit) {
|
|
452
|
+
try {
|
|
453
|
+
const history = await client.invoke(
|
|
454
|
+
new import_telegram.Api.messages.GetHistory({
|
|
455
|
+
peer: entity,
|
|
456
|
+
limit: 1,
|
|
457
|
+
offsetId: 0,
|
|
458
|
+
offsetDate: 0,
|
|
459
|
+
addOffset: 0,
|
|
460
|
+
maxId: 0,
|
|
461
|
+
minId: 0,
|
|
462
|
+
hash: 0n
|
|
463
|
+
})
|
|
464
|
+
);
|
|
465
|
+
if ("count" in history) {
|
|
466
|
+
totalMessages = history.count;
|
|
467
|
+
}
|
|
468
|
+
} catch (e) {
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
for await (const message of client.iterMessages(entity, iterParams)) {
|
|
472
|
+
if (until && message.date && message.date * 1e3 > until.getTime()) {
|
|
473
|
+
continue;
|
|
474
|
+
}
|
|
475
|
+
if (since && message.date && message.date * 1e3 < since.getTime()) {
|
|
476
|
+
break;
|
|
477
|
+
}
|
|
478
|
+
processedCount++;
|
|
479
|
+
if (onProgress) {
|
|
480
|
+
onProgress(processedCount, totalMessages, `Processing message ${message.id}`);
|
|
481
|
+
}
|
|
482
|
+
const msgId = message.id;
|
|
483
|
+
const paddedId = String(msgId).padStart(6, "0");
|
|
484
|
+
const postMediaDir = path.join(mediaDir, paddedId);
|
|
485
|
+
const mediaFiles = [];
|
|
486
|
+
if (downloadMedia && message.media) {
|
|
487
|
+
fs.mkdirSync(postMediaDir, { recursive: true });
|
|
488
|
+
try {
|
|
489
|
+
const buffer = await client.downloadMedia(message.media, {});
|
|
490
|
+
if (buffer) {
|
|
491
|
+
let ext = ".bin";
|
|
492
|
+
if (message.media instanceof import_telegram.Api.MessageMediaPhoto) {
|
|
493
|
+
ext = ".jpg";
|
|
494
|
+
} else if (message.media instanceof import_telegram.Api.MessageMediaDocument) {
|
|
495
|
+
const doc = message.media.document;
|
|
496
|
+
if (doc instanceof import_telegram.Api.Document) {
|
|
497
|
+
const mimeExt = doc.mimeType?.split("/")[1];
|
|
498
|
+
if (mimeExt) {
|
|
499
|
+
ext = "." + mimeExt.replace("jpeg", "jpg");
|
|
500
|
+
}
|
|
501
|
+
for (const attr of doc.attributes) {
|
|
502
|
+
if (attr instanceof import_telegram.Api.DocumentAttributeVideo) {
|
|
503
|
+
ext = ".mp4";
|
|
504
|
+
}
|
|
505
|
+
if (attr instanceof import_telegram.Api.DocumentAttributeFilename) {
|
|
506
|
+
ext = path.extname(attr.fileName) || ext;
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
const mediaFileName = `media${ext}`;
|
|
512
|
+
const mediaPath = path.join(postMediaDir, mediaFileName);
|
|
513
|
+
fs.writeFileSync(mediaPath, buffer);
|
|
514
|
+
mediaFiles.push(`media/${paddedId}/${mediaFileName}`);
|
|
515
|
+
}
|
|
516
|
+
} catch (e) {
|
|
517
|
+
console.error(`Error downloading media for message ${msgId}:`, e);
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
const content = message.message || "";
|
|
521
|
+
const link = channelMeta.username ? `https://t.me/${channelMeta.username}/${msgId}` : "";
|
|
522
|
+
const post = {
|
|
523
|
+
msgId,
|
|
524
|
+
date: new Date(message.date * 1e3),
|
|
525
|
+
content,
|
|
526
|
+
hasMedia: mediaFiles.length > 0 || !!message.media,
|
|
527
|
+
mediaFiles,
|
|
528
|
+
views: message.views,
|
|
529
|
+
forwards: message.forwards,
|
|
530
|
+
link,
|
|
531
|
+
channelUsername: channelMeta.username,
|
|
532
|
+
channelTitle: channelMeta.title
|
|
533
|
+
};
|
|
534
|
+
posts.push(post);
|
|
535
|
+
const markdown = formatPostMarkdown(post);
|
|
536
|
+
fs.writeFileSync(path.join(postsDir, `${paddedId}.md`), markdown);
|
|
537
|
+
}
|
|
538
|
+
const ndjsonPath = path.join(outputDir, "posts.ndjson");
|
|
539
|
+
const ndjsonContent = posts.map((p) => JSON.stringify(p)).join("\n");
|
|
540
|
+
fs.writeFileSync(ndjsonPath, ndjsonContent);
|
|
541
|
+
await client.disconnect();
|
|
542
|
+
return {
|
|
543
|
+
channelMeta,
|
|
544
|
+
posts,
|
|
545
|
+
session: newSession
|
|
546
|
+
};
|
|
547
|
+
}
|
|
548
|
+
function formatPostMarkdown(post) {
|
|
549
|
+
const dateStr = post.date.toISOString();
|
|
550
|
+
const dateOnly = dateStr.split("T")[0];
|
|
551
|
+
let frontmatter = `---
|
|
552
|
+
msg_id: ${post.msgId}
|
|
553
|
+
date: ${dateStr}
|
|
554
|
+
channel_username: "${post.channelUsername}"
|
|
555
|
+
channel_title: "${post.channelTitle.replace(/"/g, '\\"')}"
|
|
556
|
+
link: "${post.link}"
|
|
557
|
+
has_media: ${post.hasMedia}`;
|
|
558
|
+
if (post.views !== void 0) {
|
|
559
|
+
frontmatter += `
|
|
560
|
+
views: ${post.views}`;
|
|
561
|
+
}
|
|
562
|
+
if (post.forwards !== void 0) {
|
|
563
|
+
frontmatter += `
|
|
564
|
+
forwards: ${post.forwards}`;
|
|
565
|
+
}
|
|
566
|
+
frontmatter += "\n---\n\n";
|
|
567
|
+
let body = post.content || "";
|
|
568
|
+
if (post.mediaFiles.length > 0) {
|
|
569
|
+
body += "\n\n## Attachments\n\n";
|
|
570
|
+
for (const file of post.mediaFiles) {
|
|
571
|
+
const ext = path.extname(file).toLowerCase();
|
|
572
|
+
if ([".jpg", ".jpeg", ".png", ".gif", ".webp"].includes(ext)) {
|
|
573
|
+
body += `
|
|
574
|
+
`;
|
|
575
|
+
} else {
|
|
576
|
+
body += `- ${file}
|
|
577
|
+
`;
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
return frontmatter + body;
|
|
582
|
+
}
|
|
583
|
+
async function resumeExport(options) {
|
|
584
|
+
if (!options.session) {
|
|
585
|
+
throw new Error("Session string is required for resumeExport");
|
|
586
|
+
}
|
|
587
|
+
return exportTelegramChannel(options);
|
|
588
|
+
}
|
|
354
589
|
// Annotate the CommonJS export names for ESM import in node:
|
|
355
590
|
0 && (module.exports = {
|
|
356
591
|
categorizePost,
|
|
357
592
|
cleanContent,
|
|
358
593
|
configureAnalytics,
|
|
359
594
|
deduplicatePosts,
|
|
595
|
+
exportTelegramChannel,
|
|
360
596
|
extractAttachments,
|
|
361
597
|
extractExcerpt,
|
|
362
598
|
extractTitle,
|
|
599
|
+
formatPostMarkdown,
|
|
363
600
|
generateEnglishSlug,
|
|
364
601
|
generateSlug,
|
|
365
602
|
groupPosts,
|
|
366
603
|
parsePost,
|
|
604
|
+
resumeExport,
|
|
367
605
|
trackBookAppointment,
|
|
368
606
|
trackGoal,
|
|
369
607
|
trackLearnMore,
|
package/dist/index.mjs
CHANGED
|
@@ -298,18 +298,253 @@ async function translateTitle(title, options) {
|
|
|
298
298
|
function generateEnglishSlug(title) {
|
|
299
299
|
return title.toLowerCase().replace(/[^a-z0-9\s-]/g, "").replace(/\s+/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "").substring(0, 60);
|
|
300
300
|
}
|
|
301
|
+
|
|
302
|
+
// src/telegram.ts
|
|
303
|
+
import { TelegramClient, Api } from "telegram";
|
|
304
|
+
import { StringSession } from "telegram/sessions";
|
|
305
|
+
import * as fs from "fs";
|
|
306
|
+
import * as path from "path";
|
|
307
|
+
import * as readline from "readline";
|
|
308
|
+
async function defaultReadline(prompt) {
|
|
309
|
+
const rl = readline.createInterface({
|
|
310
|
+
input: process.stdin,
|
|
311
|
+
output: process.stdout
|
|
312
|
+
});
|
|
313
|
+
return new Promise((resolve) => {
|
|
314
|
+
rl.question(prompt, (answer) => {
|
|
315
|
+
rl.close();
|
|
316
|
+
resolve(answer);
|
|
317
|
+
});
|
|
318
|
+
});
|
|
319
|
+
}
|
|
320
|
+
async function exportTelegramChannel(options) {
|
|
321
|
+
const {
|
|
322
|
+
apiId,
|
|
323
|
+
apiHash,
|
|
324
|
+
session = "",
|
|
325
|
+
target,
|
|
326
|
+
outputDir,
|
|
327
|
+
limit = 0,
|
|
328
|
+
since,
|
|
329
|
+
until,
|
|
330
|
+
downloadMedia = true,
|
|
331
|
+
mediaWorkers = 3,
|
|
332
|
+
onProgress,
|
|
333
|
+
onPhoneNumber = () => defaultReadline("Phone number: "),
|
|
334
|
+
onCode = () => defaultReadline("Verification code: "),
|
|
335
|
+
onPassword = () => defaultReadline("2FA Password: "),
|
|
336
|
+
onSession
|
|
337
|
+
} = options;
|
|
338
|
+
const postsDir = path.join(outputDir, "posts");
|
|
339
|
+
const mediaDir = path.join(outputDir, "media");
|
|
340
|
+
fs.mkdirSync(postsDir, { recursive: true });
|
|
341
|
+
fs.mkdirSync(mediaDir, { recursive: true });
|
|
342
|
+
const stringSession = new StringSession(session);
|
|
343
|
+
const client = new TelegramClient(stringSession, apiId, apiHash, {
|
|
344
|
+
connectionRetries: 5
|
|
345
|
+
});
|
|
346
|
+
await client.start({
|
|
347
|
+
phoneNumber: onPhoneNumber,
|
|
348
|
+
phoneCode: onCode,
|
|
349
|
+
password: onPassword,
|
|
350
|
+
onError: (err) => console.error("Auth error:", err)
|
|
351
|
+
});
|
|
352
|
+
const newSession = client.session.save();
|
|
353
|
+
if (onSession) {
|
|
354
|
+
onSession(newSession);
|
|
355
|
+
}
|
|
356
|
+
const entity = await client.getEntity(target);
|
|
357
|
+
if (!(entity instanceof Api.Channel)) {
|
|
358
|
+
throw new Error(`Target "${target}" is not a channel`);
|
|
359
|
+
}
|
|
360
|
+
const channelMeta = {
|
|
361
|
+
id: entity.id.toJSNumber(),
|
|
362
|
+
username: entity.username || "",
|
|
363
|
+
title: entity.title,
|
|
364
|
+
description: void 0,
|
|
365
|
+
participantsCount: void 0
|
|
366
|
+
};
|
|
367
|
+
try {
|
|
368
|
+
const fullChannel = await client.invoke(
|
|
369
|
+
new Api.channels.GetFullChannel({ channel: entity })
|
|
370
|
+
);
|
|
371
|
+
if (fullChannel.fullChat instanceof Api.ChannelFull) {
|
|
372
|
+
channelMeta.description = fullChannel.fullChat.about;
|
|
373
|
+
channelMeta.participantsCount = fullChannel.fullChat.participantsCount;
|
|
374
|
+
}
|
|
375
|
+
} catch (e) {
|
|
376
|
+
}
|
|
377
|
+
fs.writeFileSync(
|
|
378
|
+
path.join(outputDir, "channel_meta.json"),
|
|
379
|
+
JSON.stringify(channelMeta, null, 2)
|
|
380
|
+
);
|
|
381
|
+
const posts = [];
|
|
382
|
+
let processedCount = 0;
|
|
383
|
+
const iterParams = {
|
|
384
|
+
entity,
|
|
385
|
+
reverse: true
|
|
386
|
+
// Oldest first
|
|
387
|
+
};
|
|
388
|
+
if (limit > 0) {
|
|
389
|
+
iterParams.limit = limit;
|
|
390
|
+
}
|
|
391
|
+
if (since) {
|
|
392
|
+
iterParams.offsetDate = Math.floor(since.getTime() / 1e3);
|
|
393
|
+
}
|
|
394
|
+
let totalMessages = limit || 0;
|
|
395
|
+
if (!limit) {
|
|
396
|
+
try {
|
|
397
|
+
const history = await client.invoke(
|
|
398
|
+
new Api.messages.GetHistory({
|
|
399
|
+
peer: entity,
|
|
400
|
+
limit: 1,
|
|
401
|
+
offsetId: 0,
|
|
402
|
+
offsetDate: 0,
|
|
403
|
+
addOffset: 0,
|
|
404
|
+
maxId: 0,
|
|
405
|
+
minId: 0,
|
|
406
|
+
hash: 0n
|
|
407
|
+
})
|
|
408
|
+
);
|
|
409
|
+
if ("count" in history) {
|
|
410
|
+
totalMessages = history.count;
|
|
411
|
+
}
|
|
412
|
+
} catch (e) {
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
for await (const message of client.iterMessages(entity, iterParams)) {
|
|
416
|
+
if (until && message.date && message.date * 1e3 > until.getTime()) {
|
|
417
|
+
continue;
|
|
418
|
+
}
|
|
419
|
+
if (since && message.date && message.date * 1e3 < since.getTime()) {
|
|
420
|
+
break;
|
|
421
|
+
}
|
|
422
|
+
processedCount++;
|
|
423
|
+
if (onProgress) {
|
|
424
|
+
onProgress(processedCount, totalMessages, `Processing message ${message.id}`);
|
|
425
|
+
}
|
|
426
|
+
const msgId = message.id;
|
|
427
|
+
const paddedId = String(msgId).padStart(6, "0");
|
|
428
|
+
const postMediaDir = path.join(mediaDir, paddedId);
|
|
429
|
+
const mediaFiles = [];
|
|
430
|
+
if (downloadMedia && message.media) {
|
|
431
|
+
fs.mkdirSync(postMediaDir, { recursive: true });
|
|
432
|
+
try {
|
|
433
|
+
const buffer = await client.downloadMedia(message.media, {});
|
|
434
|
+
if (buffer) {
|
|
435
|
+
let ext = ".bin";
|
|
436
|
+
if (message.media instanceof Api.MessageMediaPhoto) {
|
|
437
|
+
ext = ".jpg";
|
|
438
|
+
} else if (message.media instanceof Api.MessageMediaDocument) {
|
|
439
|
+
const doc = message.media.document;
|
|
440
|
+
if (doc instanceof Api.Document) {
|
|
441
|
+
const mimeExt = doc.mimeType?.split("/")[1];
|
|
442
|
+
if (mimeExt) {
|
|
443
|
+
ext = "." + mimeExt.replace("jpeg", "jpg");
|
|
444
|
+
}
|
|
445
|
+
for (const attr of doc.attributes) {
|
|
446
|
+
if (attr instanceof Api.DocumentAttributeVideo) {
|
|
447
|
+
ext = ".mp4";
|
|
448
|
+
}
|
|
449
|
+
if (attr instanceof Api.DocumentAttributeFilename) {
|
|
450
|
+
ext = path.extname(attr.fileName) || ext;
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
const mediaFileName = `media${ext}`;
|
|
456
|
+
const mediaPath = path.join(postMediaDir, mediaFileName);
|
|
457
|
+
fs.writeFileSync(mediaPath, buffer);
|
|
458
|
+
mediaFiles.push(`media/${paddedId}/${mediaFileName}`);
|
|
459
|
+
}
|
|
460
|
+
} catch (e) {
|
|
461
|
+
console.error(`Error downloading media for message ${msgId}:`, e);
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
const content = message.message || "";
|
|
465
|
+
const link = channelMeta.username ? `https://t.me/${channelMeta.username}/${msgId}` : "";
|
|
466
|
+
const post = {
|
|
467
|
+
msgId,
|
|
468
|
+
date: new Date(message.date * 1e3),
|
|
469
|
+
content,
|
|
470
|
+
hasMedia: mediaFiles.length > 0 || !!message.media,
|
|
471
|
+
mediaFiles,
|
|
472
|
+
views: message.views,
|
|
473
|
+
forwards: message.forwards,
|
|
474
|
+
link,
|
|
475
|
+
channelUsername: channelMeta.username,
|
|
476
|
+
channelTitle: channelMeta.title
|
|
477
|
+
};
|
|
478
|
+
posts.push(post);
|
|
479
|
+
const markdown = formatPostMarkdown(post);
|
|
480
|
+
fs.writeFileSync(path.join(postsDir, `${paddedId}.md`), markdown);
|
|
481
|
+
}
|
|
482
|
+
const ndjsonPath = path.join(outputDir, "posts.ndjson");
|
|
483
|
+
const ndjsonContent = posts.map((p) => JSON.stringify(p)).join("\n");
|
|
484
|
+
fs.writeFileSync(ndjsonPath, ndjsonContent);
|
|
485
|
+
await client.disconnect();
|
|
486
|
+
return {
|
|
487
|
+
channelMeta,
|
|
488
|
+
posts,
|
|
489
|
+
session: newSession
|
|
490
|
+
};
|
|
491
|
+
}
|
|
492
|
+
function formatPostMarkdown(post) {
|
|
493
|
+
const dateStr = post.date.toISOString();
|
|
494
|
+
const dateOnly = dateStr.split("T")[0];
|
|
495
|
+
let frontmatter = `---
|
|
496
|
+
msg_id: ${post.msgId}
|
|
497
|
+
date: ${dateStr}
|
|
498
|
+
channel_username: "${post.channelUsername}"
|
|
499
|
+
channel_title: "${post.channelTitle.replace(/"/g, '\\"')}"
|
|
500
|
+
link: "${post.link}"
|
|
501
|
+
has_media: ${post.hasMedia}`;
|
|
502
|
+
if (post.views !== void 0) {
|
|
503
|
+
frontmatter += `
|
|
504
|
+
views: ${post.views}`;
|
|
505
|
+
}
|
|
506
|
+
if (post.forwards !== void 0) {
|
|
507
|
+
frontmatter += `
|
|
508
|
+
forwards: ${post.forwards}`;
|
|
509
|
+
}
|
|
510
|
+
frontmatter += "\n---\n\n";
|
|
511
|
+
let body = post.content || "";
|
|
512
|
+
if (post.mediaFiles.length > 0) {
|
|
513
|
+
body += "\n\n## Attachments\n\n";
|
|
514
|
+
for (const file of post.mediaFiles) {
|
|
515
|
+
const ext = path.extname(file).toLowerCase();
|
|
516
|
+
if ([".jpg", ".jpeg", ".png", ".gif", ".webp"].includes(ext)) {
|
|
517
|
+
body += `
|
|
518
|
+
`;
|
|
519
|
+
} else {
|
|
520
|
+
body += `- ${file}
|
|
521
|
+
`;
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
return frontmatter + body;
|
|
526
|
+
}
|
|
527
|
+
async function resumeExport(options) {
|
|
528
|
+
if (!options.session) {
|
|
529
|
+
throw new Error("Session string is required for resumeExport");
|
|
530
|
+
}
|
|
531
|
+
return exportTelegramChannel(options);
|
|
532
|
+
}
|
|
301
533
|
export {
|
|
302
534
|
categorizePost,
|
|
303
535
|
cleanContent,
|
|
304
536
|
configureAnalytics,
|
|
305
537
|
deduplicatePosts,
|
|
538
|
+
exportTelegramChannel,
|
|
306
539
|
extractAttachments,
|
|
307
540
|
extractExcerpt,
|
|
308
541
|
extractTitle,
|
|
542
|
+
formatPostMarkdown,
|
|
309
543
|
generateEnglishSlug,
|
|
310
544
|
generateSlug,
|
|
311
545
|
groupPosts,
|
|
312
546
|
parsePost,
|
|
547
|
+
resumeExport,
|
|
313
548
|
trackBookAppointment,
|
|
314
549
|
trackGoal,
|
|
315
550
|
trackLearnMore,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "koztv-blog-tools",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.1.1",
|
|
4
4
|
"description": "Shared utilities for Telegram-based blog sites",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.mjs",
|
|
@@ -12,8 +12,12 @@
|
|
|
12
12
|
"require": "./dist/index.js"
|
|
13
13
|
}
|
|
14
14
|
},
|
|
15
|
+
"bin": {
|
|
16
|
+
"tg-export": "./bin/export-telegram.js"
|
|
17
|
+
},
|
|
15
18
|
"files": [
|
|
16
|
-
"dist"
|
|
19
|
+
"dist",
|
|
20
|
+
"bin"
|
|
17
21
|
],
|
|
18
22
|
"scripts": {
|
|
19
23
|
"build": "tsup src/index.ts --format cjs,esm --dts --clean",
|
|
@@ -23,23 +27,31 @@
|
|
|
23
27
|
"blog",
|
|
24
28
|
"telegram",
|
|
25
29
|
"markdown",
|
|
26
|
-
"static-site"
|
|
30
|
+
"static-site",
|
|
31
|
+
"telegram-export",
|
|
32
|
+
"mtproto"
|
|
27
33
|
],
|
|
28
34
|
"author": "Koz TV",
|
|
29
35
|
"license": "MIT",
|
|
30
36
|
"devDependencies": {
|
|
37
|
+
"@types/node": "^20.0.0",
|
|
31
38
|
"tsup": "^8.0.0",
|
|
32
39
|
"typescript": "^5.0.0"
|
|
33
40
|
},
|
|
34
41
|
"dependencies": {
|
|
35
|
-
"gray-matter": "^4.0.3"
|
|
42
|
+
"gray-matter": "^4.0.3",
|
|
43
|
+
"telegram": "^2.26.22"
|
|
36
44
|
},
|
|
37
45
|
"peerDependencies": {
|
|
38
|
-
"gray-matter": "^4.0.0"
|
|
46
|
+
"gray-matter": "^4.0.0",
|
|
47
|
+
"telegram": "^2.0.0"
|
|
39
48
|
},
|
|
40
49
|
"peerDependenciesMeta": {
|
|
41
50
|
"gray-matter": {
|
|
42
51
|
"optional": true
|
|
52
|
+
},
|
|
53
|
+
"telegram": {
|
|
54
|
+
"optional": true
|
|
43
55
|
}
|
|
44
56
|
}
|
|
45
57
|
}
|