tokwise 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +185 -0
- package/dist/ask.js +58 -0
- package/dist/browser-cookies.js +160 -0
- package/dist/classify.js +118 -0
- package/dist/cli.js +894 -0
- package/dist/jsonl.js +51 -0
- package/dist/library.js +138 -0
- package/dist/markdown.js +211 -0
- package/dist/media.js +117 -0
- package/dist/paths.js +87 -0
- package/dist/process.js +68 -0
- package/dist/progress.js +56 -0
- package/dist/render.js +114 -0
- package/dist/search.js +226 -0
- package/dist/skill.js +57 -0
- package/dist/store.js +158 -0
- package/dist/tiktok.js +445 -0
- package/dist/transcribe.js +162 -0
- package/dist/types.js +1 -0
- package/package.json +57 -0
package/dist/tiktok.js
ADDED
|
@@ -0,0 +1,445 @@
|
|
|
1
|
+
import { stableHash, uniqueStrings } from "./store.js";
|
|
2
|
+
async function loadApi() {
|
|
3
|
+
const mod = (await import("@tobyg74/tiktok-api-dl"));
|
|
4
|
+
return mod.default ?? mod;
|
|
5
|
+
}
|
|
6
|
+
export async function fetchCollection(idOrUrl, options) {
|
|
7
|
+
const normalized = normalizeCollectionInput(idOrUrl, options.username);
|
|
8
|
+
if (!normalized.collectionId && !normalized.collectionUrl) {
|
|
9
|
+
throw new Error(`Cannot resolve collection "${idOrUrl}". Use a full URL, @user/collection/slug, or run \`tw auth set-username <handle>\`.`);
|
|
10
|
+
}
|
|
11
|
+
const resolved = normalized.collectionUrl ?? normalized.collectionId ?? idOrUrl;
|
|
12
|
+
const context = {
|
|
13
|
+
source: "collection",
|
|
14
|
+
collectionId: normalized.collectionId,
|
|
15
|
+
collectionUrl: normalized.collectionUrl,
|
|
16
|
+
};
|
|
17
|
+
if (options.cookie) {
|
|
18
|
+
return fetchPaged(fetchCollectionWithCookie, resolved, options, context);
|
|
19
|
+
}
|
|
20
|
+
const api = await loadApi();
|
|
21
|
+
const fn = requireFunction(api, "Collection");
|
|
22
|
+
return fetchPaged(fn, resolved, options, context);
|
|
23
|
+
}
|
|
24
|
+
export async function fetchPlaylist(idOrUrl, options) {
|
|
25
|
+
const api = await loadApi();
|
|
26
|
+
const fn = requireFunction(api, "Playlist");
|
|
27
|
+
return fetchPaged(fn, idOrUrl, options, {
|
|
28
|
+
source: "playlist",
|
|
29
|
+
collectionId: inferTrailingId(idOrUrl),
|
|
30
|
+
collectionUrl: looksLikeUrl(idOrUrl) ? idOrUrl : undefined,
|
|
31
|
+
});
|
|
32
|
+
}
|
|
33
|
+
export async function fetchLiked(username, options) {
|
|
34
|
+
const api = await loadApi();
|
|
35
|
+
const fn = requireFunction(api, "GetUserLiked");
|
|
36
|
+
return fetchPaged(fn, username, options, {
|
|
37
|
+
source: "liked",
|
|
38
|
+
collectionName: `${username} liked videos`,
|
|
39
|
+
collectionId: username,
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
export async function fetchUserPosts(username, options) {
|
|
43
|
+
const api = await loadApi();
|
|
44
|
+
const fn = requireFunction(api, "GetUserPosts");
|
|
45
|
+
return fetchPaged(fn, username, options, {
|
|
46
|
+
source: "user",
|
|
47
|
+
collectionName: `${username} posts`,
|
|
48
|
+
collectionId: username,
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
export async function fetchVideoSearch(query, options) {
|
|
52
|
+
const api = await loadApi();
|
|
53
|
+
const fn = requireFunction(api, "Search");
|
|
54
|
+
const response = await fn(query, {
|
|
55
|
+
type: "video",
|
|
56
|
+
page: options.page ?? 1,
|
|
57
|
+
cookie: options.cookie,
|
|
58
|
+
proxy: options.proxy,
|
|
59
|
+
});
|
|
60
|
+
return extractItems(response)
|
|
61
|
+
.slice(0, options.limit)
|
|
62
|
+
.map((item) => normalizeVideo(item, {
|
|
63
|
+
source: "search",
|
|
64
|
+
collectionName: `Search: ${query}`,
|
|
65
|
+
collectionId: query,
|
|
66
|
+
}));
|
|
67
|
+
}
|
|
68
|
+
export async function fetchSingleUrl(url, options) {
|
|
69
|
+
const api = await loadApi();
|
|
70
|
+
const fn = requireFunction(api, "Downloader");
|
|
71
|
+
const response = await fn(url, {
|
|
72
|
+
version: "v1",
|
|
73
|
+
proxy: options.proxy,
|
|
74
|
+
showOriginalResponse: true,
|
|
75
|
+
});
|
|
76
|
+
const items = extractItems(response);
|
|
77
|
+
const responseRecord = asRecord(response);
|
|
78
|
+
const item = items[0] ?? asRecord(responseRecord?.result) ?? responseRecord ?? {};
|
|
79
|
+
return normalizeVideo({ ...item, url }, { source: "url" });
|
|
80
|
+
}
|
|
81
|
+
export function videosFromUrls(urls) {
|
|
82
|
+
return urls.map((url) => normalizeVideo({ id: inferVideoId(url) ?? stableHash(url), url }, { source: "url" }));
|
|
83
|
+
}
|
|
84
|
+
export function videosFromImport(value) {
|
|
85
|
+
if (Array.isArray(value))
|
|
86
|
+
return value.map((item) => normalizeVideo(asRecord(item) ?? { value: item }, { source: "import" }));
|
|
87
|
+
if (isRecord(value) && Array.isArray(value.items)) {
|
|
88
|
+
return value.items.map((item) => normalizeVideo(asRecord(item) ?? { value: item }, { source: "import" }));
|
|
89
|
+
}
|
|
90
|
+
if (isRecord(value) && Array.isArray(value.result)) {
|
|
91
|
+
return value.result.map((item) => normalizeVideo(asRecord(item) ?? { value: item }, { source: "import" }));
|
|
92
|
+
}
|
|
93
|
+
const record = asRecord(value);
|
|
94
|
+
return record ? [normalizeVideo(record, { source: "import" })] : [];
|
|
95
|
+
}
|
|
96
|
+
async function fetchPaged(fn, idOrUrl, options, context) {
|
|
97
|
+
const limit = options.limit ?? 30;
|
|
98
|
+
const pageStart = options.page ?? 1;
|
|
99
|
+
const maxPages = options.pages ?? Math.ceil(limit / 30);
|
|
100
|
+
const videos = [];
|
|
101
|
+
let page = pageStart;
|
|
102
|
+
for (let i = 0; i < maxPages && videos.length < limit; i += 1) {
|
|
103
|
+
const response = await fn(idOrUrl, {
|
|
104
|
+
page,
|
|
105
|
+
count: Math.min(30, limit - videos.length),
|
|
106
|
+
postLimit: Math.min(30, limit - videos.length),
|
|
107
|
+
cookie: options.cookie,
|
|
108
|
+
proxy: options.proxy,
|
|
109
|
+
});
|
|
110
|
+
const items = extractItemsFromSuccessfulResponse(response, context.source);
|
|
111
|
+
videos.push(...items.map((item) => normalizeVideo(item, context)));
|
|
112
|
+
if (!hasMore(response) || items.length === 0)
|
|
113
|
+
break;
|
|
114
|
+
page += 1;
|
|
115
|
+
}
|
|
116
|
+
return dedupeById(videos).slice(0, limit);
|
|
117
|
+
}
|
|
118
|
+
function requireFunction(api, name) {
|
|
119
|
+
const value = api[name];
|
|
120
|
+
if (typeof value !== "function") {
|
|
121
|
+
throw new Error(`@tobyg74/tiktok-api-dl does not expose ${name}().`);
|
|
122
|
+
}
|
|
123
|
+
return value;
|
|
124
|
+
}
|
|
125
|
+
async function fetchCollectionWithCookie(idOrUrl, options) {
|
|
126
|
+
const collectionId = inferCollectionId(idOrUrl);
|
|
127
|
+
if (!collectionId) {
|
|
128
|
+
return {
|
|
129
|
+
status: "error",
|
|
130
|
+
message: "Invalid collection ID or URL format",
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
const page = numberValue(options.page) ?? 1;
|
|
134
|
+
const count = numberValue(options.count) ?? 30;
|
|
135
|
+
const cursor = Math.max(0, page - 1) * count;
|
|
136
|
+
const params = new URLSearchParams({
|
|
137
|
+
WebIdLastTime: String(Date.now()),
|
|
138
|
+
aid: "1988",
|
|
139
|
+
app_language: "en",
|
|
140
|
+
app_name: "tiktok_web",
|
|
141
|
+
browser_language: "en-US",
|
|
142
|
+
browser_name: "Mozilla",
|
|
143
|
+
browser_online: "true",
|
|
144
|
+
browser_platform: "MacIntel",
|
|
145
|
+
browser_version: "5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
|
|
146
|
+
channel: "tiktok_web",
|
|
147
|
+
collectionId,
|
|
148
|
+
cookie_enabled: "true",
|
|
149
|
+
count: String(count),
|
|
150
|
+
cursor: String(cursor),
|
|
151
|
+
device_platform: "web_pc",
|
|
152
|
+
focus_state: "true",
|
|
153
|
+
from_page: "user",
|
|
154
|
+
history_len: "3",
|
|
155
|
+
is_fullscreen: "false",
|
|
156
|
+
is_page_visible: "true",
|
|
157
|
+
language: "en",
|
|
158
|
+
os: "mac",
|
|
159
|
+
referer: looksLikeUrl(idOrUrl) ? idOrUrl : "",
|
|
160
|
+
sourceType: "113",
|
|
161
|
+
tz_name: Intl.DateTimeFormat().resolvedOptions().timeZone || "UTC",
|
|
162
|
+
user_is_login: "true",
|
|
163
|
+
webcast_language: "en",
|
|
164
|
+
});
|
|
165
|
+
const response = await fetch(`https://www.tiktok.com/api/collection/item_list/?${params.toString()}`, {
|
|
166
|
+
method: "GET",
|
|
167
|
+
headers: {
|
|
168
|
+
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
|
|
169
|
+
accept: "application/json, text/plain, */*",
|
|
170
|
+
"accept-language": "en-US,en;q=0.9",
|
|
171
|
+
cookie: String(options.cookie ?? ""),
|
|
172
|
+
referer: looksLikeUrl(idOrUrl) ? idOrUrl : "https://www.tiktok.com/",
|
|
173
|
+
},
|
|
174
|
+
});
|
|
175
|
+
if (!response.ok) {
|
|
176
|
+
return {
|
|
177
|
+
status: "error",
|
|
178
|
+
message: `Source returned HTTP ${response.status} ${response.statusText}`,
|
|
179
|
+
};
|
|
180
|
+
}
|
|
181
|
+
return response.json();
|
|
182
|
+
}
|
|
183
|
+
export function extractUsernameFromRehydrationHtml(html) {
|
|
184
|
+
const match = html.match(/__UNIVERSAL_DATA_FOR_REHYDRATION__"[^>]*>(.*?)<\/script>/s);
|
|
185
|
+
if (!match || !match[1])
|
|
186
|
+
return undefined;
|
|
187
|
+
try {
|
|
188
|
+
const data = asRecord(JSON.parse(match[1]));
|
|
189
|
+
const scope = asRecord(data?.["__DEFAULT_SCOPE__"]);
|
|
190
|
+
const appContext = asRecord(scope?.["webapp.app-context"]);
|
|
191
|
+
const user = asRecord(appContext?.user);
|
|
192
|
+
return stringValue(user?.uniqueId);
|
|
193
|
+
}
|
|
194
|
+
catch {
|
|
195
|
+
return undefined;
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
export async function detectLoggedInUsername(cookie, proxy) {
|
|
199
|
+
if (!cookie)
|
|
200
|
+
return undefined;
|
|
201
|
+
try {
|
|
202
|
+
const response = await fetch("https://www.tiktok.com/", {
|
|
203
|
+
method: "GET",
|
|
204
|
+
headers: {
|
|
205
|
+
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
|
|
206
|
+
accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
207
|
+
"accept-language": "en-US,en;q=0.9",
|
|
208
|
+
cookie,
|
|
209
|
+
},
|
|
210
|
+
});
|
|
211
|
+
if (!response.ok)
|
|
212
|
+
return undefined;
|
|
213
|
+
return extractUsernameFromRehydrationHtml(await response.text());
|
|
214
|
+
}
|
|
215
|
+
catch {
|
|
216
|
+
return undefined;
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
export function extractItemsFromSuccessfulResponse(response, source) {
|
|
220
|
+
assertSuccessfulResponse(response, source);
|
|
221
|
+
return extractItems(response).filter(isLikelyVideoItem);
|
|
222
|
+
}
|
|
223
|
+
function assertSuccessfulResponse(response, source) {
|
|
224
|
+
const root = asRecord(response);
|
|
225
|
+
if (!root)
|
|
226
|
+
return;
|
|
227
|
+
const status = stringValue(root.status);
|
|
228
|
+
const statusCode = numberValue(root.statusCode) ?? numberValue(root.status_code);
|
|
229
|
+
if (status === "error" || (statusCode != null && statusCode !== 0)) {
|
|
230
|
+
const message = stringValue(root.message) ?? stringValue(root.statusMsg) ?? stringValue(root.status_msg) ?? "unknown error";
|
|
231
|
+
throw new Error(`Source ${source} fetch failed: ${message}`);
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
function extractItems(response) {
|
|
235
|
+
const root = asRecord(response);
|
|
236
|
+
if (!root)
|
|
237
|
+
return [];
|
|
238
|
+
const result = asRecord(root.result);
|
|
239
|
+
const candidates = [
|
|
240
|
+
root.itemList,
|
|
241
|
+
root.collectionItemList,
|
|
242
|
+
root.item_list,
|
|
243
|
+
root.aweme_list,
|
|
244
|
+
root.items,
|
|
245
|
+
root.videos,
|
|
246
|
+
root.result,
|
|
247
|
+
result?.itemList,
|
|
248
|
+
result?.collectionItemList,
|
|
249
|
+
result?.item_list,
|
|
250
|
+
result?.aweme_list,
|
|
251
|
+
result?.items,
|
|
252
|
+
result?.videos,
|
|
253
|
+
];
|
|
254
|
+
for (const candidate of candidates) {
|
|
255
|
+
if (Array.isArray(candidate))
|
|
256
|
+
return candidate.flatMap((item) => (asRecord(item) ? [asRecord(item)] : []));
|
|
257
|
+
}
|
|
258
|
+
if (result)
|
|
259
|
+
return [result];
|
|
260
|
+
return [root];
|
|
261
|
+
}
|
|
262
|
+
function isLikelyVideoItem(item) {
|
|
263
|
+
const id = stringValue(item.id) ?? stringValue(item.awemeId) ?? stringValue(item.aweme_id);
|
|
264
|
+
if (id && /^\d{8,}$/.test(id))
|
|
265
|
+
return true;
|
|
266
|
+
const url = stringValue(item.url) ?? stringValue(item.shareUrl);
|
|
267
|
+
return Boolean(inferVideoId(url));
|
|
268
|
+
}
|
|
269
|
+
function hasMore(response) {
|
|
270
|
+
const root = asRecord(response);
|
|
271
|
+
const result = asRecord(root?.result);
|
|
272
|
+
const value = result?.hasMore ?? root?.hasMore;
|
|
273
|
+
return value === true || value === "true";
|
|
274
|
+
}
|
|
275
|
+
export function normalizeVideo(item, context) {
|
|
276
|
+
const id = stringValue(item.id) ?? stringValue(item.awemeId) ?? stringValue(item.aweme_id) ?? inferVideoId(stringValue(item.url)) ?? stableHash(JSON.stringify(item));
|
|
277
|
+
const authorRecord = asRecord(item.author) ?? asRecord(item.owner);
|
|
278
|
+
const statsRecord = asRecord(item.stats) ?? asRecord(item.statistics);
|
|
279
|
+
const videoRecord = asRecord(item.video);
|
|
280
|
+
const musicRecord = asRecord(item.music);
|
|
281
|
+
const textExtra = Array.isArray(item.textExtra) ? item.textExtra.flatMap((entry) => (asRecord(entry) ? [entry] : [])) : [];
|
|
282
|
+
const challenges = Array.isArray(item.challenges) ? item.challenges.flatMap((entry) => (asRecord(entry) ? [entry] : [])) : [];
|
|
283
|
+
const hashtags = uniqueStrings([
|
|
284
|
+
...arrayOfStrings(item.hashtag),
|
|
285
|
+
...textExtra.map((entry) => stringValue(entry.hashtagName)),
|
|
286
|
+
...challenges.map((entry) => stringValue(entry.title)),
|
|
287
|
+
]);
|
|
288
|
+
const username = stringValue(authorRecord?.username) ??
|
|
289
|
+
stringValue(authorRecord?.uniqueId) ??
|
|
290
|
+
stringValue(item.username) ??
|
|
291
|
+
inferUsername(stringValue(item.url));
|
|
292
|
+
const url = stringValue(item.url) ?? stringValue(item.shareUrl) ?? (username ? `https://www.tiktok.com/@${username}/video/${id}` : `https://www.tiktok.com/video/${id}`);
|
|
293
|
+
const createdAt = toIsoTime(item.createTime);
|
|
294
|
+
const now = new Date().toISOString();
|
|
295
|
+
const raw = JSON.parse(JSON.stringify(item));
|
|
296
|
+
return {
|
|
297
|
+
id,
|
|
298
|
+
url,
|
|
299
|
+
canonicalUrl: username ? `https://www.tiktok.com/@${username}/video/${id}` : url,
|
|
300
|
+
description: stringValue(item.desc) ?? stringValue(item.description) ?? stringValue(item.title),
|
|
301
|
+
createdAt,
|
|
302
|
+
savedAt: now,
|
|
303
|
+
syncedAt: now,
|
|
304
|
+
source: context.source,
|
|
305
|
+
collection: {
|
|
306
|
+
source: context.source,
|
|
307
|
+
id: context.collectionId,
|
|
308
|
+
name: context.collectionName,
|
|
309
|
+
url: context.collectionUrl,
|
|
310
|
+
},
|
|
311
|
+
author: {
|
|
312
|
+
id: stringValue(authorRecord?.id) ?? stringValue(authorRecord?.uid),
|
|
313
|
+
username,
|
|
314
|
+
displayName: stringValue(authorRecord?.nickname) ?? stringValue(authorRecord?.displayName),
|
|
315
|
+
signature: stringValue(authorRecord?.signature),
|
|
316
|
+
verified: booleanValue(authorRecord?.verified),
|
|
317
|
+
avatarUrl: firstString(authorRecord?.avatarThumb) ?? firstString(authorRecord?.avatarMedium) ?? stringValue(authorRecord?.avatar),
|
|
318
|
+
},
|
|
319
|
+
hashtags,
|
|
320
|
+
stats: {
|
|
321
|
+
plays: numberValue(statsRecord?.playCount),
|
|
322
|
+
likes: numberValue(statsRecord?.likeCount) ?? numberValue(statsRecord?.diggCount),
|
|
323
|
+
comments: numberValue(statsRecord?.commentCount),
|
|
324
|
+
shares: numberValue(statsRecord?.shareCount),
|
|
325
|
+
saves: numberValue(statsRecord?.collectCount),
|
|
326
|
+
reposts: numberValue(statsRecord?.repostCount),
|
|
327
|
+
},
|
|
328
|
+
music: {
|
|
329
|
+
id: stringValue(musicRecord?.id),
|
|
330
|
+
title: stringValue(musicRecord?.title),
|
|
331
|
+
author: stringValue(musicRecord?.authorName) ?? stringValue(musicRecord?.author),
|
|
332
|
+
durationSeconds: numberValue(musicRecord?.duration),
|
|
333
|
+
url: firstString(musicRecord?.playUrl) ?? stringValue(musicRecord?.playUrl),
|
|
334
|
+
},
|
|
335
|
+
media: {
|
|
336
|
+
videoUrl: firstString(videoRecord?.playAddr) ?? stringValue(videoRecord?.playAddr),
|
|
337
|
+
downloadUrl: firstString(videoRecord?.downloadAddr) ?? stringValue(videoRecord?.downloadAddr),
|
|
338
|
+
coverUrl: firstString(videoRecord?.cover) ?? stringValue(videoRecord?.cover) ?? stringValue(item.cover),
|
|
339
|
+
dynamicCoverUrl: firstString(videoRecord?.dynamicCover) ?? stringValue(videoRecord?.dynamicCover),
|
|
340
|
+
durationSeconds: numberValue(videoRecord?.duration),
|
|
341
|
+
width: numberValue(videoRecord?.width),
|
|
342
|
+
height: numberValue(videoRecord?.height),
|
|
343
|
+
},
|
|
344
|
+
raw,
|
|
345
|
+
};
|
|
346
|
+
}
|
|
347
|
+
function dedupeById(videos) {
|
|
348
|
+
return [...new Map(videos.map((video) => [video.id, video])).values()];
|
|
349
|
+
}
|
|
350
|
+
function isRecord(value) {
|
|
351
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
352
|
+
}
|
|
353
|
+
function asRecord(value) {
|
|
354
|
+
return isRecord(value) ? value : undefined;
|
|
355
|
+
}
|
|
356
|
+
function stringValue(value) {
|
|
357
|
+
if (typeof value === "string" && value.trim())
|
|
358
|
+
return value.trim();
|
|
359
|
+
if (typeof value === "number" && Number.isFinite(value))
|
|
360
|
+
return String(value);
|
|
361
|
+
return undefined;
|
|
362
|
+
}
|
|
363
|
+
function numberValue(value) {
|
|
364
|
+
if (typeof value === "number" && Number.isFinite(value))
|
|
365
|
+
return value;
|
|
366
|
+
if (typeof value === "string") {
|
|
367
|
+
const normalized = value.replace(/,/g, "").trim();
|
|
368
|
+
const parsed = Number(normalized);
|
|
369
|
+
return Number.isFinite(parsed) ? parsed : undefined;
|
|
370
|
+
}
|
|
371
|
+
return undefined;
|
|
372
|
+
}
|
|
373
|
+
function booleanValue(value) {
|
|
374
|
+
if (typeof value === "boolean")
|
|
375
|
+
return value;
|
|
376
|
+
if (value === "true")
|
|
377
|
+
return true;
|
|
378
|
+
if (value === "false")
|
|
379
|
+
return false;
|
|
380
|
+
return undefined;
|
|
381
|
+
}
|
|
382
|
+
function firstString(value) {
|
|
383
|
+
if (Array.isArray(value))
|
|
384
|
+
return value.find((entry) => typeof entry === "string" && entry.trim().length > 0);
|
|
385
|
+
return stringValue(value);
|
|
386
|
+
}
|
|
387
|
+
function arrayOfStrings(value) {
|
|
388
|
+
if (!Array.isArray(value))
|
|
389
|
+
return [];
|
|
390
|
+
return value.flatMap((entry) => {
|
|
391
|
+
const text = stringValue(entry);
|
|
392
|
+
return text ? [text] : [];
|
|
393
|
+
});
|
|
394
|
+
}
|
|
395
|
+
function toIsoTime(value) {
|
|
396
|
+
const numeric = numberValue(value);
|
|
397
|
+
if (numeric == null)
|
|
398
|
+
return undefined;
|
|
399
|
+
const ms = numeric > 10_000_000_000 ? numeric : numeric * 1000;
|
|
400
|
+
const date = new Date(ms);
|
|
401
|
+
return Number.isNaN(date.getTime()) ? undefined : date.toISOString();
|
|
402
|
+
}
|
|
403
|
+
function inferTrailingId(input) {
|
|
404
|
+
const match = input.match(/(\d{8,})(?:\D*)$/);
|
|
405
|
+
return match?.[1];
|
|
406
|
+
}
|
|
407
|
+
function inferCollectionId(input) {
|
|
408
|
+
if (/^\d+$/.test(input.trim()))
|
|
409
|
+
return input.trim();
|
|
410
|
+
return input.match(/collection\/[^/\-]*-?(\d+)/i)?.[1] ?? inferTrailingId(input);
|
|
411
|
+
}
|
|
412
|
+
export function normalizeCollectionInput(input, fallbackUsername) {
|
|
413
|
+
const value = input.trim();
|
|
414
|
+
let url;
|
|
415
|
+
if (/^https?:\/\//i.test(value))
|
|
416
|
+
url = value;
|
|
417
|
+
else if (/^(www\.)?tiktok\.com\/@/i.test(value))
|
|
418
|
+
url = `https://${value}`;
|
|
419
|
+
else if (/^@[^/]+\/collection\//i.test(value))
|
|
420
|
+
url = `https://www.tiktok.com/${value}`;
|
|
421
|
+
else if (fallbackUsername && !/^\d+$/.test(value))
|
|
422
|
+
url = `https://www.tiktok.com/@${fallbackUsername}/collection/${value}`;
|
|
423
|
+
const collectionId = inferCollectionId(url ?? value);
|
|
424
|
+
return { collectionId, collectionUrl: url, username: inferUsername(url) ?? fallbackUsername };
|
|
425
|
+
}
|
|
426
|
+
function inferVideoId(input) {
|
|
427
|
+
if (!input)
|
|
428
|
+
return undefined;
|
|
429
|
+
return input.match(/\/video\/(\d+)/)?.[1] ?? inferTrailingId(input);
|
|
430
|
+
}
|
|
431
|
+
function inferUsername(input) {
|
|
432
|
+
if (!input)
|
|
433
|
+
return undefined;
|
|
434
|
+
return input.match(/tiktok\.com\/@([^/?#]+)/)?.[1];
|
|
435
|
+
}
|
|
436
|
+
function looksLikeUrl(input) {
|
|
437
|
+
return /^https?:\/\//i.test(input);
|
|
438
|
+
}
|
|
439
|
+
export function jsonValueFromUnknown(value) {
|
|
440
|
+
return JSON.parse(JSON.stringify(value));
|
|
441
|
+
}
|
|
442
|
+
export function jsonObjectFromUnknown(value) {
|
|
443
|
+
const json = jsonValueFromUnknown(value);
|
|
444
|
+
return typeof json === "object" && json !== null && !Array.isArray(json) ? json : undefined;
|
|
445
|
+
}
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { ensureDataDirs, transcriptDir } from "./paths.js";
|
|
4
|
+
import { quoteShell, runProcess, runShell, templateCommand } from "./process.js";
|
|
5
|
+
import { sanitizeFilePart } from "./store.js";
|
|
6
|
+
export async function transcribeVideo(video, options) {
|
|
7
|
+
ensureDataDirs();
|
|
8
|
+
if (!options.force && video.transcript?.text) {
|
|
9
|
+
return { id: video.id, changed: false, transcript: video.transcript };
|
|
10
|
+
}
|
|
11
|
+
const input = video.media?.audioPath ?? video.media?.videoPath;
|
|
12
|
+
if (!input)
|
|
13
|
+
throw new Error(`${video.id} has no downloaded audio or video. Run tokwise fetch-media --audio first.`);
|
|
14
|
+
const safeId = sanitizeFilePart(video.id);
|
|
15
|
+
const outJson = path.join(transcriptDir(), `${safeId}.json`);
|
|
16
|
+
const outText = path.join(transcriptDir(), `${safeId}.txt`);
|
|
17
|
+
let raw;
|
|
18
|
+
let stdout = "";
|
|
19
|
+
if (options.engine === "whisper") {
|
|
20
|
+
raw = await runWhisper(input, options);
|
|
21
|
+
}
|
|
22
|
+
else if (options.engine === "whisper-cpp") {
|
|
23
|
+
raw = await runWhisperCpp(input, safeId, options);
|
|
24
|
+
}
|
|
25
|
+
else {
|
|
26
|
+
const result = await runCustom(input, outJson, options);
|
|
27
|
+
raw = result.raw;
|
|
28
|
+
stdout = result.stdout;
|
|
29
|
+
}
|
|
30
|
+
const parsed = await transcriptFromRaw(raw, stdout, {
|
|
31
|
+
id: video.id,
|
|
32
|
+
input,
|
|
33
|
+
outJson,
|
|
34
|
+
outText,
|
|
35
|
+
engine: options.engine,
|
|
36
|
+
model: options.model,
|
|
37
|
+
language: options.language,
|
|
38
|
+
});
|
|
39
|
+
await fs.writeFile(outJson, `${JSON.stringify(raw ?? parsed, null, 2)}\n`, "utf8");
|
|
40
|
+
await fs.writeFile(outText, `${parsed.text.trim()}\n`, "utf8");
|
|
41
|
+
return { id: video.id, changed: true, transcript: parsed };
|
|
42
|
+
}
|
|
43
|
+
async function runWhisper(input, options) {
|
|
44
|
+
const command = options.command ?? "whisper";
|
|
45
|
+
const args = [input, "--output_dir", transcriptDir(), "--output_format", "json"];
|
|
46
|
+
if (options.model)
|
|
47
|
+
args.push("--model", options.model);
|
|
48
|
+
if (options.language)
|
|
49
|
+
args.push("--language", options.language);
|
|
50
|
+
const result = await runProcess(command, args);
|
|
51
|
+
if (result.code !== 0)
|
|
52
|
+
throw new Error(`whisper failed: ${result.stderr || result.stdout}`);
|
|
53
|
+
const expected = path.join(transcriptDir(), `${path.basename(input, path.extname(input))}.json`);
|
|
54
|
+
return readJsonIfExists(expected) ?? { stdout: result.stdout };
|
|
55
|
+
}
|
|
56
|
+
async function runWhisperCpp(input, safeId, options) {
|
|
57
|
+
const command = options.command ?? "whisper-cli";
|
|
58
|
+
const prefix = path.join(transcriptDir(), safeId);
|
|
59
|
+
const args = ["-f", input, "-oj", "-of", prefix];
|
|
60
|
+
if (options.model)
|
|
61
|
+
args.unshift("-m", options.model);
|
|
62
|
+
if (options.language)
|
|
63
|
+
args.push("-l", options.language);
|
|
64
|
+
const result = await runProcess(command, args);
|
|
65
|
+
if (result.code !== 0)
|
|
66
|
+
throw new Error(`whisper.cpp failed: ${result.stderr || result.stdout}`);
|
|
67
|
+
return readJsonIfExists(`${prefix}.json`) ?? { stdout: result.stdout };
|
|
68
|
+
}
|
|
69
|
+
async function runCustom(input, output, options) {
|
|
70
|
+
if (!options.command)
|
|
71
|
+
throw new Error("--command is required for --engine custom.");
|
|
72
|
+
const command = templateCommand(options.command, {
|
|
73
|
+
input,
|
|
74
|
+
output,
|
|
75
|
+
language: options.language,
|
|
76
|
+
model: options.model,
|
|
77
|
+
});
|
|
78
|
+
const result = await runShell(command);
|
|
79
|
+
if (result.code !== 0)
|
|
80
|
+
throw new Error(`custom STT failed: ${result.stderr || result.stdout}`);
|
|
81
|
+
const raw = (await readJsonIfExists(output)) ?? (await readTextIfExists(output)) ?? { stdout: result.stdout };
|
|
82
|
+
return { raw, stdout: result.stdout };
|
|
83
|
+
}
|
|
84
|
+
async function transcriptFromRaw(raw, stdout, context) {
|
|
85
|
+
const record = typeof raw === "object" && raw !== null ? raw : undefined;
|
|
86
|
+
const text = stringValue(record?.text) ??
|
|
87
|
+
stringValue(record?.transcript) ??
|
|
88
|
+
stringValue(record?.stdout) ??
|
|
89
|
+
(typeof raw === "string" ? raw : undefined) ??
|
|
90
|
+
stdout;
|
|
91
|
+
const segments = parseSegments(record?.segments);
|
|
92
|
+
return {
|
|
93
|
+
text: text.trim(),
|
|
94
|
+
language: stringValue(record?.language) ?? context.language,
|
|
95
|
+
engine: context.engine,
|
|
96
|
+
model: context.model,
|
|
97
|
+
sourcePath: context.input,
|
|
98
|
+
jsonPath: context.outJson,
|
|
99
|
+
textPath: context.outText,
|
|
100
|
+
generatedAt: new Date().toISOString(),
|
|
101
|
+
segments,
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
function parseSegments(value) {
|
|
105
|
+
if (!Array.isArray(value))
|
|
106
|
+
return undefined;
|
|
107
|
+
const segments = value.flatMap((segment) => {
|
|
108
|
+
if (typeof segment !== "object" || segment === null)
|
|
109
|
+
return [];
|
|
110
|
+
const record = segment;
|
|
111
|
+
const text = stringValue(record.text);
|
|
112
|
+
if (!text)
|
|
113
|
+
return [];
|
|
114
|
+
return [
|
|
115
|
+
{
|
|
116
|
+
start: numberValue(record.start),
|
|
117
|
+
end: numberValue(record.end),
|
|
118
|
+
text,
|
|
119
|
+
},
|
|
120
|
+
];
|
|
121
|
+
});
|
|
122
|
+
return segments.length > 0 ? segments : undefined;
|
|
123
|
+
}
|
|
124
|
+
async function readJsonIfExists(filePath) {
|
|
125
|
+
try {
|
|
126
|
+
return JSON.parse(await fs.readFile(filePath, "utf8"));
|
|
127
|
+
}
|
|
128
|
+
catch {
|
|
129
|
+
return undefined;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
async function readTextIfExists(filePath) {
|
|
133
|
+
try {
|
|
134
|
+
return fs.readFile(filePath, "utf8");
|
|
135
|
+
}
|
|
136
|
+
catch {
|
|
137
|
+
return undefined;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
function stringValue(value) {
|
|
141
|
+
if (typeof value === "string" && value.trim())
|
|
142
|
+
return value;
|
|
143
|
+
return undefined;
|
|
144
|
+
}
|
|
145
|
+
function numberValue(value) {
|
|
146
|
+
if (typeof value === "number" && Number.isFinite(value))
|
|
147
|
+
return value;
|
|
148
|
+
if (typeof value === "string") {
|
|
149
|
+
const parsed = Number(value);
|
|
150
|
+
return Number.isFinite(parsed) ? parsed : undefined;
|
|
151
|
+
}
|
|
152
|
+
return undefined;
|
|
153
|
+
}
|
|
154
|
+
export function renderCustomTemplateHelp() {
|
|
155
|
+
return [
|
|
156
|
+
"Custom STT command placeholders:",
|
|
157
|
+
` {input} ${quoteShell("/path/to/audio.m4a")}`,
|
|
158
|
+
` {output} ${quoteShell("/path/to/transcript.json")}`,
|
|
159
|
+
" {language} requested language",
|
|
160
|
+
" {model} requested model",
|
|
161
|
+
].join("\n");
|
|
162
|
+
}
|
package/dist/types.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/package.json
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "tokwise",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Local-first CLI for syncing, downloading, transcribing, searching, and analyzing saved short-form videos.",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"author": "Sebastian Crossa",
|
|
7
|
+
"repository": {
|
|
8
|
+
"type": "git",
|
|
9
|
+
"url": "git+https://github.com/sebastiancrossa/tokwise-cli.git"
|
|
10
|
+
},
|
|
11
|
+
"homepage": "https://github.com/sebastiancrossa/tokwise-cli#readme",
|
|
12
|
+
"bugs": {
|
|
13
|
+
"url": "https://github.com/sebastiancrossa/tokwise-cli/issues"
|
|
14
|
+
},
|
|
15
|
+
"bin": {
|
|
16
|
+
"tokwise": "dist/cli.js",
|
|
17
|
+
"tw": "dist/cli.js"
|
|
18
|
+
},
|
|
19
|
+
"scripts": {
|
|
20
|
+
"build": "tsc -p tsconfig.json",
|
|
21
|
+
"dev": "tsx src/cli.ts",
|
|
22
|
+
"test": "node --import tsx --test tests/**/*.test.ts",
|
|
23
|
+
"prepublishOnly": "npm run build"
|
|
24
|
+
},
|
|
25
|
+
"files": [
|
|
26
|
+
"dist/",
|
|
27
|
+
"README.md",
|
|
28
|
+
"LICENSE"
|
|
29
|
+
],
|
|
30
|
+
"engines": {
|
|
31
|
+
"node": ">=20"
|
|
32
|
+
},
|
|
33
|
+
"license": "MIT",
|
|
34
|
+
"keywords": [
|
|
35
|
+
"tokwise",
|
|
36
|
+
"short-form-video",
|
|
37
|
+
"cli",
|
|
38
|
+
"transcription",
|
|
39
|
+
"local-first",
|
|
40
|
+
"knowledge-base",
|
|
41
|
+
"agent-tools",
|
|
42
|
+
"search",
|
|
43
|
+
"bm25",
|
|
44
|
+
"yt-dlp",
|
|
45
|
+
"whisper"
|
|
46
|
+
],
|
|
47
|
+
"dependencies": {
|
|
48
|
+
"@tobyg74/tiktok-api-dl": "^1.3.7",
|
|
49
|
+
"commander": "^15.0.0",
|
|
50
|
+
"picocolors": "^1.1.1"
|
|
51
|
+
},
|
|
52
|
+
"devDependencies": {
|
|
53
|
+
"@types/node": "^24.0.0",
|
|
54
|
+
"tsx": "^4.22.4",
|
|
55
|
+
"typescript": "^6.0.3"
|
|
56
|
+
}
|
|
57
|
+
}
|