tokwise 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +185 -0
- package/dist/ask.js +58 -0
- package/dist/browser-cookies.js +160 -0
- package/dist/classify.js +118 -0
- package/dist/cli.js +894 -0
- package/dist/jsonl.js +51 -0
- package/dist/library.js +138 -0
- package/dist/markdown.js +211 -0
- package/dist/media.js +117 -0
- package/dist/paths.js +87 -0
- package/dist/process.js +68 -0
- package/dist/progress.js +56 -0
- package/dist/render.js +114 -0
- package/dist/search.js +226 -0
- package/dist/skill.js +57 -0
- package/dist/store.js +158 -0
- package/dist/tiktok.js +445 -0
- package/dist/transcribe.js +162 -0
- package/dist/types.js +1 -0
- package/package.json +57 -0
package/dist/jsonl.js
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
export async function readJsonl(filePath) {
|
|
4
|
+
let text;
|
|
5
|
+
try {
|
|
6
|
+
text = await fs.readFile(filePath, "utf8");
|
|
7
|
+
}
|
|
8
|
+
catch (error) {
|
|
9
|
+
if (error.code === "ENOENT")
|
|
10
|
+
return [];
|
|
11
|
+
throw error;
|
|
12
|
+
}
|
|
13
|
+
const rows = [];
|
|
14
|
+
for (const [idx, line] of text.split(/\r?\n/).entries()) {
|
|
15
|
+
const trimmed = line.trim();
|
|
16
|
+
if (!trimmed)
|
|
17
|
+
continue;
|
|
18
|
+
try {
|
|
19
|
+
rows.push(JSON.parse(trimmed));
|
|
20
|
+
}
|
|
21
|
+
catch (error) {
|
|
22
|
+
throw new Error(`Invalid JSONL at ${filePath}:${idx + 1}: ${error.message}`);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
return rows;
|
|
26
|
+
}
|
|
27
|
+
export async function writeJsonl(filePath, rows) {
|
|
28
|
+
await fs.mkdir(path.dirname(filePath), { recursive: true });
|
|
29
|
+
const tmpPath = `${filePath}.tmp-${process.pid}`;
|
|
30
|
+
const body = rows.map((row) => JSON.stringify(row)).join("\n");
|
|
31
|
+
await fs.writeFile(tmpPath, body.length > 0 ? `${body}\n` : "", "utf8");
|
|
32
|
+
await fs.rename(tmpPath, filePath);
|
|
33
|
+
}
|
|
34
|
+
export async function readJsonFile(filePath, fallback) {
|
|
35
|
+
try {
|
|
36
|
+
return JSON.parse(await fs.readFile(filePath, "utf8"));
|
|
37
|
+
}
|
|
38
|
+
catch (error) {
|
|
39
|
+
if (error.code === "ENOENT")
|
|
40
|
+
return fallback;
|
|
41
|
+
throw error;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
export async function writeJsonFile(filePath, value, mode) {
|
|
45
|
+
await fs.mkdir(path.dirname(filePath), { recursive: true });
|
|
46
|
+
const tmpPath = `${filePath}.tmp-${process.pid}`;
|
|
47
|
+
await fs.writeFile(tmpPath, `${JSON.stringify(value, null, 2)}\n`, mode == null ? undefined : { mode });
|
|
48
|
+
await fs.rename(tmpPath, filePath);
|
|
49
|
+
if (mode != null)
|
|
50
|
+
await fs.chmod(filePath, mode);
|
|
51
|
+
}
|
package/dist/library.js
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import crypto from "node:crypto";
|
|
2
|
+
import fs from "node:fs/promises";
|
|
3
|
+
import path from "node:path";
|
|
4
|
+
import { commandsDir, ensureDataDirs, libraryDir } from "./paths.js";
|
|
5
|
+
import { readTextInput, resolveMaybeRelative } from "./store.js";
|
|
6
|
+
import { tokenize } from "./search.js";
|
|
7
|
+
export async function searchLibrary(query, limit = 20) {
|
|
8
|
+
ensureDataDirs();
|
|
9
|
+
const terms = new Set(tokenize(query));
|
|
10
|
+
const files = await listMarkdownFiles(libraryDir());
|
|
11
|
+
const results = [];
|
|
12
|
+
for (const file of files) {
|
|
13
|
+
const text = await fs.readFile(file, "utf8");
|
|
14
|
+
const tokens = tokenize(text);
|
|
15
|
+
const score = tokens.reduce((sum, token) => sum + (terms.has(token) ? 1 : 0), 0);
|
|
16
|
+
if (score > 0) {
|
|
17
|
+
results.push({
|
|
18
|
+
path: path.relative(libraryDir(), file),
|
|
19
|
+
score,
|
|
20
|
+
preview: text.replace(/\s+/g, " ").slice(0, 220),
|
|
21
|
+
});
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
return results.sort((a, b) => b.score - a.score).slice(0, limit);
|
|
25
|
+
}
|
|
26
|
+
export async function showLibraryPage(pagePath) {
|
|
27
|
+
const safePath = resolveUnder(libraryDir(), pagePath);
|
|
28
|
+
const body = await fs.readFile(safePath, "utf8");
|
|
29
|
+
return { path: path.relative(libraryDir(), safePath), sha256: sha256(body), body };
|
|
30
|
+
}
|
|
31
|
+
export async function createLibraryPage(pagePath, inputPath) {
|
|
32
|
+
const safePath = resolveUnder(libraryDir(), pagePath);
|
|
33
|
+
const body = await readTextInput(inputPath);
|
|
34
|
+
await fs.mkdir(path.dirname(safePath), { recursive: true });
|
|
35
|
+
await fs.writeFile(safePath, body, { encoding: "utf8", flag: "wx" });
|
|
36
|
+
return safePath;
|
|
37
|
+
}
|
|
38
|
+
export async function updateLibraryPage(pagePath, inputPath, expectedSha256) {
|
|
39
|
+
const safePath = resolveUnder(libraryDir(), pagePath);
|
|
40
|
+
const previous = await fs.readFile(safePath, "utf8");
|
|
41
|
+
if (expectedSha256 && sha256(previous) !== expectedSha256) {
|
|
42
|
+
throw new Error("Library page changed since it was read. Re-run show and pass the new sha256.");
|
|
43
|
+
}
|
|
44
|
+
const body = await readTextInput(inputPath);
|
|
45
|
+
await fs.writeFile(safePath, body, "utf8");
|
|
46
|
+
return safePath;
|
|
47
|
+
}
|
|
48
|
+
export async function deleteLibraryPage(pagePath) {
|
|
49
|
+
const safePath = resolveUnder(libraryDir(), pagePath);
|
|
50
|
+
const trashDir = path.join(libraryDir(), ".trash");
|
|
51
|
+
await fs.mkdir(trashDir, { recursive: true });
|
|
52
|
+
const target = path.join(trashDir, `${Date.now()}-${path.basename(pagePath)}`);
|
|
53
|
+
await fs.rename(safePath, target);
|
|
54
|
+
return target;
|
|
55
|
+
}
|
|
56
|
+
export async function listCommands() {
|
|
57
|
+
ensureDataDirs();
|
|
58
|
+
try {
|
|
59
|
+
return (await fs.readdir(commandsDir())).filter((name) => name.endsWith(".md")).sort();
|
|
60
|
+
}
|
|
61
|
+
catch {
|
|
62
|
+
return [];
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
export async function createCommand(name) {
|
|
66
|
+
ensureDataDirs();
|
|
67
|
+
const fileName = name.endsWith(".md") ? name : `${name}.md`;
|
|
68
|
+
const safePath = resolveUnder(commandsDir(), fileName);
|
|
69
|
+
const body = [
|
|
70
|
+
`# ${path.basename(fileName, ".md")}`,
|
|
71
|
+
"",
|
|
72
|
+
"## Purpose",
|
|
73
|
+
"",
|
|
74
|
+
"Describe the reusable workflow this command should run.",
|
|
75
|
+
"",
|
|
76
|
+
"## Steps",
|
|
77
|
+
"",
|
|
78
|
+
"1. Search the local Tokwise archive when relevant.",
|
|
79
|
+
"2. Ground claims in video ids or Markdown pages.",
|
|
80
|
+
"3. Report uncertainty clearly.",
|
|
81
|
+
"",
|
|
82
|
+
].join("\n");
|
|
83
|
+
await fs.writeFile(safePath, body, { encoding: "utf8", flag: "wx" });
|
|
84
|
+
return safePath;
|
|
85
|
+
}
|
|
86
|
+
export async function validateCommands(name) {
|
|
87
|
+
const names = name ? [name.endsWith(".md") ? name : `${name}.md`] : await listCommands();
|
|
88
|
+
const ok = [];
|
|
89
|
+
const issues = [];
|
|
90
|
+
for (const commandName of names) {
|
|
91
|
+
const filePath = resolveUnder(commandsDir(), commandName);
|
|
92
|
+
try {
|
|
93
|
+
const body = await fs.readFile(filePath, "utf8");
|
|
94
|
+
if (!body.startsWith("# "))
|
|
95
|
+
issues.push(`${commandName}: missing title`);
|
|
96
|
+
else if (!body.includes("##"))
|
|
97
|
+
issues.push(`${commandName}: add at least one section heading`);
|
|
98
|
+
else
|
|
99
|
+
ok.push(commandName);
|
|
100
|
+
}
|
|
101
|
+
catch (error) {
|
|
102
|
+
issues.push(`${commandName}: ${error.message}`);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
return { ok, issues };
|
|
106
|
+
}
|
|
107
|
+
function resolveUnder(root, requested) {
|
|
108
|
+
const resolved = path.resolve(root, requested);
|
|
109
|
+
const normalizedRoot = path.resolve(root);
|
|
110
|
+
if (!resolved.startsWith(`${normalizedRoot}${path.sep}`) && resolved !== normalizedRoot) {
|
|
111
|
+
throw new Error(`Path escapes ${root}: ${requested}`);
|
|
112
|
+
}
|
|
113
|
+
return resolved;
|
|
114
|
+
}
|
|
115
|
+
async function listMarkdownFiles(dir) {
|
|
116
|
+
let entries;
|
|
117
|
+
try {
|
|
118
|
+
entries = await fs.readdir(dir, { withFileTypes: true });
|
|
119
|
+
}
|
|
120
|
+
catch {
|
|
121
|
+
return [];
|
|
122
|
+
}
|
|
123
|
+
const files = [];
|
|
124
|
+
for (const entry of entries) {
|
|
125
|
+
const fullPath = path.join(dir, entry.name);
|
|
126
|
+
if (entry.isDirectory() && entry.name !== ".trash")
|
|
127
|
+
files.push(...(await listMarkdownFiles(fullPath)));
|
|
128
|
+
else if (entry.isFile() && entry.name.endsWith(".md"))
|
|
129
|
+
files.push(fullPath);
|
|
130
|
+
}
|
|
131
|
+
return files;
|
|
132
|
+
}
|
|
133
|
+
function sha256(body) {
|
|
134
|
+
return crypto.createHash("sha256").update(body).digest("hex");
|
|
135
|
+
}
|
|
136
|
+
export function resolveExternalPath(filePath) {
|
|
137
|
+
return resolveMaybeRelative(filePath);
|
|
138
|
+
}
|
package/dist/markdown.js
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
import crypto from "node:crypto";
|
|
2
|
+
import fs from "node:fs/promises";
|
|
3
|
+
import path from "node:path";
|
|
4
|
+
import { ensureDataDirs, libraryDir, markdownCategoriesDir, markdownDomainsDir, markdownVideosDir, } from "./paths.js";
|
|
5
|
+
import { sanitizeFilePart } from "./store.js";
|
|
6
|
+
export async function exportMarkdown(videos, options) {
|
|
7
|
+
ensureDataDirs();
|
|
8
|
+
let written = 0;
|
|
9
|
+
let skipped = 0;
|
|
10
|
+
const files = [];
|
|
11
|
+
for (const video of videos) {
|
|
12
|
+
const filePath = videoMarkdownPath(video);
|
|
13
|
+
const body = renderVideoMarkdown(video);
|
|
14
|
+
if (options?.changedOnly && (await sameContent(filePath, body))) {
|
|
15
|
+
skipped += 1;
|
|
16
|
+
continue;
|
|
17
|
+
}
|
|
18
|
+
await fs.mkdir(path.dirname(filePath), { recursive: true });
|
|
19
|
+
await fs.writeFile(filePath, body, "utf8");
|
|
20
|
+
written += 1;
|
|
21
|
+
files.push(filePath);
|
|
22
|
+
}
|
|
23
|
+
return { written, skipped, files };
|
|
24
|
+
}
|
|
25
|
+
export async function compileWiki(videos) {
|
|
26
|
+
const exported = await exportMarkdown(videos);
|
|
27
|
+
await fs.mkdir(markdownCategoriesDir(), { recursive: true });
|
|
28
|
+
await fs.mkdir(markdownDomainsDir(), { recursive: true });
|
|
29
|
+
const byCategory = groupBy(videos, (video) => video.classification?.category ?? "uncategorized");
|
|
30
|
+
const byDomain = groupBy(videos, (video) => video.classification?.domain ?? "general");
|
|
31
|
+
const files = [...exported.files];
|
|
32
|
+
for (const [category, group] of byCategory) {
|
|
33
|
+
const filePath = path.join(markdownCategoriesDir(), `${sanitizeFilePart(category)}.md`);
|
|
34
|
+
await fs.writeFile(filePath, renderGroupPage("Category", category, group), "utf8");
|
|
35
|
+
files.push(filePath);
|
|
36
|
+
}
|
|
37
|
+
for (const [domain, group] of byDomain) {
|
|
38
|
+
const filePath = path.join(markdownDomainsDir(), `${sanitizeFilePart(domain)}.md`);
|
|
39
|
+
await fs.writeFile(filePath, renderGroupPage("Domain", domain, group), "utf8");
|
|
40
|
+
files.push(filePath);
|
|
41
|
+
}
|
|
42
|
+
const indexPath = path.join(libraryDir(), "index.md");
|
|
43
|
+
await fs.writeFile(indexPath, renderIndex(videos, byCategory, byDomain), "utf8");
|
|
44
|
+
files.push(indexPath);
|
|
45
|
+
return { written: files.length, skipped: exported.skipped, files };
|
|
46
|
+
}
|
|
47
|
+
export function videoMarkdownPath(video) {
|
|
48
|
+
return path.join(markdownVideosDir(), `${sanitizeFilePart(video.id)}.md`);
|
|
49
|
+
}
|
|
50
|
+
export function renderVideoMarkdown(video) {
|
|
51
|
+
const title = video.description?.split(/\r?\n/)[0]?.slice(0, 80) || `Clip ${video.id}`;
|
|
52
|
+
const category = video.classification?.category ?? "uncategorized";
|
|
53
|
+
const domain = video.classification?.domain ?? "general";
|
|
54
|
+
const topics = video.classification?.topics ?? [];
|
|
55
|
+
return [
|
|
56
|
+
"---",
|
|
57
|
+
`id: ${yamlString(video.id)}`,
|
|
58
|
+
`url: ${yamlString(video.canonicalUrl ?? video.url)}`,
|
|
59
|
+
`author: ${yamlString(video.author?.username ?? "")}`,
|
|
60
|
+
`created_at: ${yamlString(video.createdAt ?? "")}`,
|
|
61
|
+
`category: ${yamlString(category)}`,
|
|
62
|
+
`domain: ${yamlString(domain)}`,
|
|
63
|
+
`topics: [${topics.map(yamlString).join(", ")}]`,
|
|
64
|
+
"---",
|
|
65
|
+
"",
|
|
66
|
+
`# ${title}`,
|
|
67
|
+
"",
|
|
68
|
+
`Source: ${video.canonicalUrl ?? video.url}`,
|
|
69
|
+
video.author?.username ? `Author: @${video.author.username}` : "",
|
|
70
|
+
`Category: [[categories/${sanitizeFilePart(category)}|${category}]]`,
|
|
71
|
+
`Domain: [[domains/${sanitizeFilePart(domain)}|${domain}]]`,
|
|
72
|
+
"",
|
|
73
|
+
"## Summary",
|
|
74
|
+
"",
|
|
75
|
+
video.classification?.summary ?? "No summary yet.",
|
|
76
|
+
"",
|
|
77
|
+
"## Description",
|
|
78
|
+
"",
|
|
79
|
+
video.description ?? "",
|
|
80
|
+
"",
|
|
81
|
+
"## Transcript",
|
|
82
|
+
"",
|
|
83
|
+
video.transcript?.text ?? "_No transcript yet._",
|
|
84
|
+
"",
|
|
85
|
+
"## Metadata",
|
|
86
|
+
"",
|
|
87
|
+
`- Hashtags: ${video.hashtags.map((tag) => `#${tag}`).join(" ") || "none"}`,
|
|
88
|
+
`- Music: ${[video.music?.title, video.music?.author].filter(Boolean).join(" - ") || "unknown"}`,
|
|
89
|
+
`- Plays: ${video.stats?.plays ?? "unknown"}`,
|
|
90
|
+
`- Likes: ${video.stats?.likes ?? "unknown"}`,
|
|
91
|
+
"",
|
|
92
|
+
]
|
|
93
|
+
.filter((line) => line !== "")
|
|
94
|
+
.join("\n");
|
|
95
|
+
}
|
|
96
|
+
export async function lintWiki(options) {
|
|
97
|
+
const files = await listMarkdownFiles(libraryDir());
|
|
98
|
+
const existing = new Set(files.map((file) => stripMd(path.relative(libraryDir(), file))));
|
|
99
|
+
const broken = [];
|
|
100
|
+
for (const file of files) {
|
|
101
|
+
const text = await fs.readFile(file, "utf8");
|
|
102
|
+
for (const link of extractWikiLinks(text)) {
|
|
103
|
+
const target = stripAlias(link);
|
|
104
|
+
if (/^https?:\/\//.test(target))
|
|
105
|
+
continue;
|
|
106
|
+
if (!existing.has(stripMd(target)))
|
|
107
|
+
broken.push(`${path.relative(libraryDir(), file)} -> ${target}`);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
let fixed = 0;
|
|
111
|
+
if (options?.fix) {
|
|
112
|
+
for (const item of broken) {
|
|
113
|
+
const target = item.split(" -> ")[1];
|
|
114
|
+
if (!target)
|
|
115
|
+
continue;
|
|
116
|
+
const filePath = path.join(libraryDir(), `${stripMd(target)}.md`);
|
|
117
|
+
await fs.mkdir(path.dirname(filePath), { recursive: true });
|
|
118
|
+
try {
|
|
119
|
+
await fs.access(filePath);
|
|
120
|
+
}
|
|
121
|
+
catch {
|
|
122
|
+
await fs.writeFile(filePath, `# ${path.basename(target)}\n\n`, "utf8");
|
|
123
|
+
fixed += 1;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
return { broken, fixed };
|
|
128
|
+
}
|
|
129
|
+
function renderIndex(videos, byCategory, byDomain) {
|
|
130
|
+
return [
|
|
131
|
+
"# Tokwise Library",
|
|
132
|
+
"",
|
|
133
|
+
`${videos.length} videos. ${videos.filter((video) => video.transcript?.text).length} transcripts.`,
|
|
134
|
+
"",
|
|
135
|
+
"## Categories",
|
|
136
|
+
"",
|
|
137
|
+
...[...byCategory.entries()]
|
|
138
|
+
.sort((a, b) => b[1].length - a[1].length)
|
|
139
|
+
.map(([category, group]) => `- [[categories/${sanitizeFilePart(category)}|${category}]] (${group.length})`),
|
|
140
|
+
"",
|
|
141
|
+
"## Domains",
|
|
142
|
+
"",
|
|
143
|
+
...[...byDomain.entries()]
|
|
144
|
+
.sort((a, b) => b[1].length - a[1].length)
|
|
145
|
+
.map(([domain, group]) => `- [[domains/${sanitizeFilePart(domain)}|${domain}]] (${group.length})`),
|
|
146
|
+
"",
|
|
147
|
+
"## Recent Videos",
|
|
148
|
+
"",
|
|
149
|
+
...videos.slice(0, 50).map((video) => `- [[videos/${sanitizeFilePart(video.id)}|${videoTitle(video)}]]`),
|
|
150
|
+
"",
|
|
151
|
+
].join("\n");
|
|
152
|
+
}
|
|
153
|
+
function renderGroupPage(kind, name, videos) {
|
|
154
|
+
return [
|
|
155
|
+
`# ${kind}: ${name}`,
|
|
156
|
+
"",
|
|
157
|
+
`${videos.length} videos.`,
|
|
158
|
+
"",
|
|
159
|
+
...videos.map((video) => `- [[videos/${sanitizeFilePart(video.id)}|${videoTitle(video)}]]`),
|
|
160
|
+
"",
|
|
161
|
+
].join("\n");
|
|
162
|
+
}
|
|
163
|
+
function videoTitle(video) {
|
|
164
|
+
return video.description?.replace(/\s+/g, " ").slice(0, 80) || video.id;
|
|
165
|
+
}
|
|
166
|
+
function groupBy(items, keyFn) {
|
|
167
|
+
const groups = new Map();
|
|
168
|
+
for (const item of items) {
|
|
169
|
+
const key = keyFn(item);
|
|
170
|
+
groups.set(key, [...(groups.get(key) ?? []), item]);
|
|
171
|
+
}
|
|
172
|
+
return groups;
|
|
173
|
+
}
|
|
174
|
+
function yamlString(value) {
|
|
175
|
+
return JSON.stringify(value);
|
|
176
|
+
}
|
|
177
|
+
async function sameContent(filePath, body) {
|
|
178
|
+
try {
|
|
179
|
+
return crypto.createHash("sha256").update(await fs.readFile(filePath, "utf8")).digest("hex") === crypto.createHash("sha256").update(body).digest("hex");
|
|
180
|
+
}
|
|
181
|
+
catch {
|
|
182
|
+
return false;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
async function listMarkdownFiles(dir) {
|
|
186
|
+
let entries;
|
|
187
|
+
try {
|
|
188
|
+
entries = await fs.readdir(dir, { withFileTypes: true });
|
|
189
|
+
}
|
|
190
|
+
catch {
|
|
191
|
+
return [];
|
|
192
|
+
}
|
|
193
|
+
const files = [];
|
|
194
|
+
for (const entry of entries) {
|
|
195
|
+
const fullPath = path.join(dir, entry.name);
|
|
196
|
+
if (entry.isDirectory())
|
|
197
|
+
files.push(...(await listMarkdownFiles(fullPath)));
|
|
198
|
+
else if (entry.isFile() && entry.name.endsWith(".md"))
|
|
199
|
+
files.push(fullPath);
|
|
200
|
+
}
|
|
201
|
+
return files;
|
|
202
|
+
}
|
|
203
|
+
function extractWikiLinks(text) {
|
|
204
|
+
return [...text.matchAll(/\[\[([^\]]+)\]\]/g)].map((match) => match[1]).filter((value) => Boolean(value));
|
|
205
|
+
}
|
|
206
|
+
function stripAlias(link) {
|
|
207
|
+
return link.split("|")[0] ?? link;
|
|
208
|
+
}
|
|
209
|
+
function stripMd(link) {
|
|
210
|
+
return link.replace(/\.md$/i, "");
|
|
211
|
+
}
|
package/dist/media.js
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { audioDir, ensureDataDirs, mediaDir } from "./paths.js";
|
|
4
|
+
import { runProcess } from "./process.js";
|
|
5
|
+
import { sanitizeFilePart } from "./store.js";
|
|
6
|
+
// TikTok advertises aac on every format, but without yt-dlp impersonation its
|
|
7
|
+
// HEVC (bytevc1) "best" formats download video-only. That breaks audio
|
|
8
|
+
// extraction ("unable to obtain file audio codec") and yields silent videos.
|
|
9
|
+
// h264 formats carry a real audio track, so prefer them and only fall back to
|
|
10
|
+
// yt-dlp's default best when no h264 rendition exists.
|
|
11
|
+
const PREFERRED_FORMAT = "b[vcodec^=h264]/b";
|
|
12
|
+
export async function downloadMedia(video, options = {}) {
|
|
13
|
+
ensureDataDirs();
|
|
14
|
+
const command = options.ytDlp ?? "yt-dlp";
|
|
15
|
+
const outputDir = options.audioOnly ? audioDir() : mediaDir();
|
|
16
|
+
const safeId = sanitizeFilePart(video.id);
|
|
17
|
+
const existingPath = options.audioOnly ? video.media?.audioPath : video.media?.videoPath;
|
|
18
|
+
if (!options.force && existingPath && (await exists(existingPath))) {
|
|
19
|
+
return { id: video.id, changed: false, media: video.media ?? {} };
|
|
20
|
+
}
|
|
21
|
+
const before = new Set(await listFiles(outputDir));
|
|
22
|
+
const args = [
|
|
23
|
+
"--no-playlist",
|
|
24
|
+
"--restrict-filenames",
|
|
25
|
+
// Redownload instead of reusing a leftover intermediate. A prior failed
|
|
26
|
+
// audio extraction can leave a video-only <id>.mp4 behind; yt-dlp would
|
|
27
|
+
// otherwise treat it as "already downloaded" and keep failing to extract
|
|
28
|
+
// audio from that stale file on every subsequent run.
|
|
29
|
+
"--force-overwrites",
|
|
30
|
+
"-f",
|
|
31
|
+
PREFERRED_FORMAT,
|
|
32
|
+
"--print",
|
|
33
|
+
"after_move:filepath",
|
|
34
|
+
"-o",
|
|
35
|
+
path.join(outputDir, `${safeId}.%(ext)s`),
|
|
36
|
+
];
|
|
37
|
+
if (options.proxy)
|
|
38
|
+
args.push("--proxy", options.proxy);
|
|
39
|
+
if (options.cookiesFile)
|
|
40
|
+
args.push("--cookies", options.cookiesFile);
|
|
41
|
+
if (options.cookiesFromBrowser)
|
|
42
|
+
args.push("--cookies-from-browser", options.cookiesFromBrowser);
|
|
43
|
+
if (options.audioOnly) {
|
|
44
|
+
args.push("-x", "--audio-format", options.audioFormat ?? "m4a");
|
|
45
|
+
}
|
|
46
|
+
else {
|
|
47
|
+
args.push("--write-info-json");
|
|
48
|
+
}
|
|
49
|
+
args.push(downloadTargetUrl(video));
|
|
50
|
+
const result = await runProcess(command, args);
|
|
51
|
+
if (result.code !== 0) {
|
|
52
|
+
throw new Error(`yt-dlp failed for ${video.id}: ${result.stderr || result.stdout}`);
|
|
53
|
+
}
|
|
54
|
+
const printed = result.stdout
|
|
55
|
+
.split(/\r?\n/)
|
|
56
|
+
.map((line) => line.trim())
|
|
57
|
+
.filter(Boolean)
|
|
58
|
+
.reverse()
|
|
59
|
+
.find((line) => path.isAbsolute(line));
|
|
60
|
+
const discovered = printed ?? (await newestCreatedFile(outputDir, before, safeId));
|
|
61
|
+
const nextMedia = { ...(video.media ?? {}), downloadedAt: new Date().toISOString() };
|
|
62
|
+
if (options.audioOnly)
|
|
63
|
+
nextMedia.audioPath = discovered;
|
|
64
|
+
else {
|
|
65
|
+
nextMedia.videoPath = discovered;
|
|
66
|
+
const infoPath = await findInfoJson(outputDir, safeId);
|
|
67
|
+
if (infoPath)
|
|
68
|
+
nextMedia.infoJsonPath = infoPath;
|
|
69
|
+
}
|
|
70
|
+
return { id: video.id, changed: true, media: nextMedia };
|
|
71
|
+
}
|
|
72
|
+
export function downloadTargetUrl(video) {
|
|
73
|
+
const target = video.canonicalUrl ?? video.url;
|
|
74
|
+
if (isMalformedTikTokVideoUrl(target)) {
|
|
75
|
+
throw new Error(`${video.id} does not have a valid source video URL (${target}). Remove this malformed record or resync the source with the latest CLI.`);
|
|
76
|
+
}
|
|
77
|
+
return target;
|
|
78
|
+
}
|
|
79
|
+
function isMalformedTikTokVideoUrl(url) {
|
|
80
|
+
if (!/https?:\/\/(?:www\.)?tiktok\.com\//i.test(url))
|
|
81
|
+
return false;
|
|
82
|
+
if (/\/404(?:[/?#]|$)/i.test(url))
|
|
83
|
+
return true;
|
|
84
|
+
const videoMatch = url.match(/\/video\/([^/?#]+)/i);
|
|
85
|
+
return Boolean(videoMatch && !/^\d{8,}$/.test(videoMatch[1] ?? ""));
|
|
86
|
+
}
|
|
87
|
+
async function listFiles(dir) {
|
|
88
|
+
try {
|
|
89
|
+
return (await fs.readdir(dir)).map((name) => path.join(dir, name));
|
|
90
|
+
}
|
|
91
|
+
catch {
|
|
92
|
+
return [];
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
async function exists(filePath) {
|
|
96
|
+
try {
|
|
97
|
+
await fs.access(filePath);
|
|
98
|
+
return true;
|
|
99
|
+
}
|
|
100
|
+
catch {
|
|
101
|
+
return false;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
async function newestCreatedFile(dir, before, prefix) {
|
|
105
|
+
const candidates = (await listFiles(dir)).filter((filePath) => !before.has(filePath) && path.basename(filePath).startsWith(prefix));
|
|
106
|
+
let newest;
|
|
107
|
+
for (const filePath of candidates) {
|
|
108
|
+
const stat = await fs.stat(filePath);
|
|
109
|
+
if (!newest || stat.mtimeMs > newest.mtimeMs)
|
|
110
|
+
newest = { filePath, mtimeMs: stat.mtimeMs };
|
|
111
|
+
}
|
|
112
|
+
return newest?.filePath;
|
|
113
|
+
}
|
|
114
|
+
async function findInfoJson(dir, prefix) {
|
|
115
|
+
const files = await listFiles(dir);
|
|
116
|
+
return files.find((filePath) => path.basename(filePath).startsWith(prefix) && filePath.endsWith(".info.json"));
|
|
117
|
+
}
|
package/dist/paths.js
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import os from "node:os";
|
|
3
|
+
import path from "node:path";
|
|
4
|
+
export function expandHome(input) {
|
|
5
|
+
if (input === "~")
|
|
6
|
+
return os.homedir();
|
|
7
|
+
if (input.startsWith("~/"))
|
|
8
|
+
return path.join(os.homedir(), input.slice(2));
|
|
9
|
+
return input;
|
|
10
|
+
}
|
|
11
|
+
export function dataDir() {
|
|
12
|
+
const preferred = process.env.TOKWISE_DATA_DIR ?? process.env.TW_DATA_DIR;
|
|
13
|
+
if (preferred)
|
|
14
|
+
return path.resolve(expandHome(preferred));
|
|
15
|
+
const legacy = process.env.TT_DATA_DIR ?? process.env.TIKTOK_THEORY_DATA_DIR;
|
|
16
|
+
if (legacy)
|
|
17
|
+
return path.resolve(expandHome(legacy));
|
|
18
|
+
const defaultDir = path.join(os.homedir(), ".tokwise");
|
|
19
|
+
const legacyDir = path.join(os.homedir(), ".tiktoktheory");
|
|
20
|
+
if (!fs.existsSync(defaultDir) && fs.existsSync(legacyDir))
|
|
21
|
+
return legacyDir;
|
|
22
|
+
return path.resolve(expandHome("~/.tokwise"));
|
|
23
|
+
}
|
|
24
|
+
export function videosDir() {
|
|
25
|
+
return path.join(dataDir(), "videos");
|
|
26
|
+
}
|
|
27
|
+
export function mediaDir() {
|
|
28
|
+
return path.join(videosDir(), "media");
|
|
29
|
+
}
|
|
30
|
+
export function audioDir() {
|
|
31
|
+
return path.join(videosDir(), "audio");
|
|
32
|
+
}
|
|
33
|
+
export function transcriptDir() {
|
|
34
|
+
return path.join(videosDir(), "transcripts");
|
|
35
|
+
}
|
|
36
|
+
export function videosJsonlPath() {
|
|
37
|
+
return path.join(videosDir(), "videos.jsonl");
|
|
38
|
+
}
|
|
39
|
+
export function searchIndexPath() {
|
|
40
|
+
return path.join(videosDir(), "search-index.json");
|
|
41
|
+
}
|
|
42
|
+
export function authPath() {
|
|
43
|
+
return path.join(videosDir(), "auth.json");
|
|
44
|
+
}
|
|
45
|
+
export function preferencesPath() {
|
|
46
|
+
return path.join(dataDir(), "preferences.json");
|
|
47
|
+
}
|
|
48
|
+
export function libraryDir() {
|
|
49
|
+
return path.resolve(expandHome(process.env.TOKWISE_LIBRARY_DIR ?? process.env.TW_LIBRARY_DIR ?? process.env.TT_LIBRARY_DIR ?? path.join(dataDir(), "library")));
|
|
50
|
+
}
|
|
51
|
+
export function markdownVideosDir() {
|
|
52
|
+
return path.join(libraryDir(), "videos");
|
|
53
|
+
}
|
|
54
|
+
export function markdownCategoriesDir() {
|
|
55
|
+
return path.join(libraryDir(), "categories");
|
|
56
|
+
}
|
|
57
|
+
export function markdownDomainsDir() {
|
|
58
|
+
return path.join(libraryDir(), "domains");
|
|
59
|
+
}
|
|
60
|
+
export function commandsDir() {
|
|
61
|
+
return path.resolve(expandHome(process.env.TOKWISE_COMMANDS_DIR ?? process.env.TW_COMMANDS_DIR ?? process.env.TT_COMMANDS_DIR ?? path.join(dataDir(), "commands")));
|
|
62
|
+
}
|
|
63
|
+
export function ensureDir(dir) {
|
|
64
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
65
|
+
}
|
|
66
|
+
export function ensureDataDirs() {
|
|
67
|
+
for (const dir of [
|
|
68
|
+
dataDir(),
|
|
69
|
+
videosDir(),
|
|
70
|
+
mediaDir(),
|
|
71
|
+
audioDir(),
|
|
72
|
+
transcriptDir(),
|
|
73
|
+
libraryDir(),
|
|
74
|
+
markdownVideosDir(),
|
|
75
|
+
markdownCategoriesDir(),
|
|
76
|
+
markdownDomainsDir(),
|
|
77
|
+
commandsDir(),
|
|
78
|
+
]) {
|
|
79
|
+
ensureDir(dir);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
export function toDisplayPath(filePath) {
|
|
83
|
+
if (!filePath)
|
|
84
|
+
return undefined;
|
|
85
|
+
const home = os.homedir();
|
|
86
|
+
return filePath.startsWith(home) ? `~${filePath.slice(home.length)}` : filePath;
|
|
87
|
+
}
|
package/dist/process.js
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import { exec as execCallback } from "node:child_process";
|
|
2
|
+
import { spawn } from "node:child_process";
|
|
3
|
+
import { promisify } from "node:util";
|
|
4
|
+
const exec = promisify(execCallback);
|
|
5
|
+
export async function runProcess(command, args, options) {
|
|
6
|
+
return new Promise((resolve, reject) => {
|
|
7
|
+
const child = spawn(command, args, { cwd: options?.cwd, stdio: ["ignore", "pipe", "pipe"] });
|
|
8
|
+
let settled = false;
|
|
9
|
+
let stdout = "";
|
|
10
|
+
let stderr = "";
|
|
11
|
+
child.stdout.setEncoding("utf8");
|
|
12
|
+
child.stderr.setEncoding("utf8");
|
|
13
|
+
child.stdout.on("data", (chunk) => {
|
|
14
|
+
stdout += chunk;
|
|
15
|
+
});
|
|
16
|
+
child.stderr.on("data", (chunk) => {
|
|
17
|
+
stderr += chunk;
|
|
18
|
+
});
|
|
19
|
+
child.on("error", (error) => {
|
|
20
|
+
if (settled)
|
|
21
|
+
return;
|
|
22
|
+
settled = true;
|
|
23
|
+
if (error.code === "ENOENT") {
|
|
24
|
+
resolve({
|
|
25
|
+
code: 127,
|
|
26
|
+
stdout,
|
|
27
|
+
stderr: `Command not found: ${command}. Install it or pass a custom command path.`,
|
|
28
|
+
});
|
|
29
|
+
return;
|
|
30
|
+
}
|
|
31
|
+
if (error.code === "EACCES") {
|
|
32
|
+
resolve({
|
|
33
|
+
code: 126,
|
|
34
|
+
stdout,
|
|
35
|
+
stderr: `Command is not executable: ${command}. Check permissions or pass a custom command path.`,
|
|
36
|
+
});
|
|
37
|
+
return;
|
|
38
|
+
}
|
|
39
|
+
reject(error);
|
|
40
|
+
});
|
|
41
|
+
child.on("close", (code) => {
|
|
42
|
+
if (settled)
|
|
43
|
+
return;
|
|
44
|
+
settled = true;
|
|
45
|
+
resolve({ code: code ?? 0, stdout, stderr });
|
|
46
|
+
});
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
export async function runShell(command, options) {
|
|
50
|
+
try {
|
|
51
|
+
const { stdout, stderr } = await exec(command, { cwd: options?.cwd, maxBuffer: 1024 * 1024 * 20 });
|
|
52
|
+
return { code: 0, stdout, stderr };
|
|
53
|
+
}
|
|
54
|
+
catch (error) {
|
|
55
|
+
const err = error;
|
|
56
|
+
return { code: err.code ?? 1, stdout: err.stdout ?? "", stderr: err.stderr ?? err.message };
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
export async function commandExists(command) {
|
|
60
|
+
const result = await runProcess(command, ["--version"]);
|
|
61
|
+
return result.code === 0;
|
|
62
|
+
}
|
|
63
|
+
export function quoteShell(value) {
|
|
64
|
+
return `'${value.replace(/'/g, "'\\''")}'`;
|
|
65
|
+
}
|
|
66
|
+
export function templateCommand(template, values) {
|
|
67
|
+
return template.replace(/\{([a-zA-Z0-9_-]+)\}/g, (_match, key) => quoteShell(values[key] ?? ""));
|
|
68
|
+
}
|