tokwise 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +185 -0
- package/dist/ask.js +58 -0
- package/dist/browser-cookies.js +160 -0
- package/dist/classify.js +118 -0
- package/dist/cli.js +894 -0
- package/dist/jsonl.js +51 -0
- package/dist/library.js +138 -0
- package/dist/markdown.js +211 -0
- package/dist/media.js +117 -0
- package/dist/paths.js +87 -0
- package/dist/process.js +68 -0
- package/dist/progress.js +56 -0
- package/dist/render.js +114 -0
- package/dist/search.js +226 -0
- package/dist/skill.js +57 -0
- package/dist/store.js +158 -0
- package/dist/tiktok.js +445 -0
- package/dist/transcribe.js +162 -0
- package/dist/types.js +1 -0
- package/package.json +57 -0
package/dist/progress.js
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import { c } from "./render.js";
|
|
2
|
+
const FRAMES = ["\u280b", "\u2819", "\u2839", "\u2838", "\u283c", "\u2834", "\u2826", "\u2827", "\u2807", "\u280f"];
|
|
3
|
+
const CLEAR_LINE = "\r\u001b[2K";
|
|
4
|
+
const FRAME_INTERVAL_MS = 80;
|
|
5
|
+
export function createProgress(options) {
|
|
6
|
+
const { total, label } = options;
|
|
7
|
+
const stream = process.stderr;
|
|
8
|
+
const tty = Boolean(stream.isTTY);
|
|
9
|
+
let count = 0;
|
|
10
|
+
let frame = 0;
|
|
11
|
+
let current = "";
|
|
12
|
+
let failed = false;
|
|
13
|
+
let timer;
|
|
14
|
+
function draw() {
|
|
15
|
+
const spinner = c.accent(FRAMES[frame % FRAMES.length] ?? "");
|
|
16
|
+
const counter = c.muted(`${count}/${total}`);
|
|
17
|
+
const status = failed ? c.danger(current) : current;
|
|
18
|
+
stream.write(`${CLEAR_LINE}${spinner} ${label} ${counter} ${status}`.trimEnd());
|
|
19
|
+
}
|
|
20
|
+
if (tty && total > 0) {
|
|
21
|
+
draw();
|
|
22
|
+
timer = setInterval(() => {
|
|
23
|
+
frame += 1;
|
|
24
|
+
draw();
|
|
25
|
+
}, FRAME_INTERVAL_MS);
|
|
26
|
+
timer.unref();
|
|
27
|
+
}
|
|
28
|
+
function update(message, isFailure) {
|
|
29
|
+
count += 1;
|
|
30
|
+
current = message;
|
|
31
|
+
failed = isFailure;
|
|
32
|
+
if (!tty) {
|
|
33
|
+
stream.write(`${label} ${count}/${total}: ${message}\n`);
|
|
34
|
+
return;
|
|
35
|
+
}
|
|
36
|
+
draw();
|
|
37
|
+
}
|
|
38
|
+
return {
|
|
39
|
+
tick(message = "") {
|
|
40
|
+
update(message, false);
|
|
41
|
+
},
|
|
42
|
+
fail(message = "") {
|
|
43
|
+
update(message, true);
|
|
44
|
+
},
|
|
45
|
+
done(summary) {
|
|
46
|
+
if (timer) {
|
|
47
|
+
clearInterval(timer);
|
|
48
|
+
timer = undefined;
|
|
49
|
+
}
|
|
50
|
+
if (tty)
|
|
51
|
+
stream.write(CLEAR_LINE);
|
|
52
|
+
if (summary)
|
|
53
|
+
stream.write(`${summary}\n`);
|
|
54
|
+
},
|
|
55
|
+
};
|
|
56
|
+
}
|
package/dist/render.js
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import pc from "picocolors";
|
|
2
|
+
const raw = pc.createColors(true);
|
|
3
|
+
let colorEnabled = pc.isColorSupported;
|
|
4
|
+
export function setColorEnabled(on) {
|
|
5
|
+
colorEnabled = on;
|
|
6
|
+
}
|
|
7
|
+
export function isColorEnabled() {
|
|
8
|
+
return colorEnabled;
|
|
9
|
+
}
|
|
10
|
+
function style(fn) {
|
|
11
|
+
return (text) => (colorEnabled ? fn(text) : text);
|
|
12
|
+
}
|
|
13
|
+
export const c = {
|
|
14
|
+
heading: style((text) => raw.bold(raw.cyan(text))),
|
|
15
|
+
label: style((text) => text),
|
|
16
|
+
value: style((text) => raw.bold(text)),
|
|
17
|
+
muted: style((text) => raw.dim(text)),
|
|
18
|
+
accent: style((text) => raw.cyan(text)),
|
|
19
|
+
success: style((text) => raw.green(text)),
|
|
20
|
+
warn: style((text) => raw.yellow(text)),
|
|
21
|
+
danger: style((text) => raw.red(text)),
|
|
22
|
+
};
|
|
23
|
+
const PALETTE = [raw.cyan, raw.green, raw.yellow, raw.magenta, raw.blue, raw.red];
|
|
24
|
+
function paletteColor(index) {
|
|
25
|
+
const fn = PALETTE[index % PALETTE.length] ?? raw.cyan;
|
|
26
|
+
return style(fn);
|
|
27
|
+
}
|
|
28
|
+
const ANSI_PATTERN = /\u001b\[[0-9;]*m/g;
|
|
29
|
+
export function stripAnsi(text) {
|
|
30
|
+
return text.replace(ANSI_PATTERN, "");
|
|
31
|
+
}
|
|
32
|
+
export function visibleWidth(text) {
|
|
33
|
+
return stripAnsi(text).length;
|
|
34
|
+
}
|
|
35
|
+
export function padEndVisible(text, width) {
|
|
36
|
+
const pad = width - visibleWidth(text);
|
|
37
|
+
return pad > 0 ? text + " ".repeat(pad) : text;
|
|
38
|
+
}
|
|
39
|
+
export function truncate(text, max) {
|
|
40
|
+
if (text.length <= max)
|
|
41
|
+
return text;
|
|
42
|
+
if (max <= 1)
|
|
43
|
+
return text.slice(0, Math.max(0, max));
|
|
44
|
+
return `${text.slice(0, max - 1)}\u2026`;
|
|
45
|
+
}
|
|
46
|
+
export function rule(width = 60) {
|
|
47
|
+
return c.muted("\u2500".repeat(width));
|
|
48
|
+
}
|
|
49
|
+
export function box(title, lines) {
|
|
50
|
+
const titleWidth = visibleWidth(title);
|
|
51
|
+
const bodyWidth = Math.max(titleWidth + 1, 0, ...lines.map(visibleWidth));
|
|
52
|
+
const interior = bodyWidth + 2;
|
|
53
|
+
const dashCount = Math.max(0, interior - (titleWidth + 3));
|
|
54
|
+
const top = c.muted("\u256d\u2500 ") + c.heading(title) + c.muted(` ${"\u2500".repeat(dashCount)}\u256e`);
|
|
55
|
+
const bottom = c.muted(`\u2570${"\u2500".repeat(interior)}\u256f`);
|
|
56
|
+
const body = lines.map((line) => `${c.muted("\u2502")} ${padEndVisible(line, bodyWidth)} ${c.muted("\u2502")}`);
|
|
57
|
+
return [top, ...body, bottom].join("\n");
|
|
58
|
+
}
|
|
59
|
+
const EIGHTHS = ["", "\u258f", "\u258e", "\u258d", "\u258c", "\u258b", "\u258a", "\u2589"];
|
|
60
|
+
const FULL_BLOCK = "\u2588";
|
|
61
|
+
function bar(value, max, width) {
|
|
62
|
+
if (max <= 0 || value <= 0)
|
|
63
|
+
return "";
|
|
64
|
+
const eighths = Math.round((value / max) * width * 8);
|
|
65
|
+
const full = Math.floor(eighths / 8);
|
|
66
|
+
const remainder = eighths % 8;
|
|
67
|
+
let result = FULL_BLOCK.repeat(full);
|
|
68
|
+
if (remainder > 0)
|
|
69
|
+
result += EIGHTHS[remainder];
|
|
70
|
+
if (result === "")
|
|
71
|
+
result = EIGHTHS[1] ?? FULL_BLOCK;
|
|
72
|
+
return result;
|
|
73
|
+
}
|
|
74
|
+
export function barChart(entries, options = {}) {
|
|
75
|
+
const width = options.width ?? 24;
|
|
76
|
+
const limit = options.limit ?? entries.length;
|
|
77
|
+
const sorted = [...entries].sort((a, b) => b[1] - a[1]).slice(0, limit);
|
|
78
|
+
if (sorted.length === 0)
|
|
79
|
+
return c.muted("(none)");
|
|
80
|
+
const total = options.total ?? entries.reduce((sum, [, count]) => sum + count, 0);
|
|
81
|
+
const max = Math.max(1, ...sorted.map(([, count]) => count));
|
|
82
|
+
const labelWidth = Math.max(...sorted.map(([label]) => label.length));
|
|
83
|
+
const countWidth = Math.max(...sorted.map(([, count]) => String(count).length));
|
|
84
|
+
return sorted
|
|
85
|
+
.map(([label, count], index) => {
|
|
86
|
+
const barText = bar(count, max, width);
|
|
87
|
+
const coloredBar = options.color === false ? barText : paletteColor(index)(barText);
|
|
88
|
+
const percent = total > 0 ? `${Math.round((count / total) * 100)}%` : "";
|
|
89
|
+
return [
|
|
90
|
+
" ",
|
|
91
|
+
c.label(padEndVisible(label, labelWidth)),
|
|
92
|
+
" ",
|
|
93
|
+
padEndVisible(coloredBar, width),
|
|
94
|
+
" ",
|
|
95
|
+
c.value(String(count).padStart(countWidth)),
|
|
96
|
+
" ",
|
|
97
|
+
c.muted(percent),
|
|
98
|
+
].join("");
|
|
99
|
+
})
|
|
100
|
+
.join("\n");
|
|
101
|
+
}
|
|
102
|
+
export function kvList(pairs) {
|
|
103
|
+
if (pairs.length === 0)
|
|
104
|
+
return "";
|
|
105
|
+
const keyWidth = Math.max(...pairs.map(([key]) => visibleWidth(key)));
|
|
106
|
+
return pairs.map(([key, value]) => `${c.label(padEndVisible(key, keyWidth))} ${value}`).join("\n");
|
|
107
|
+
}
|
|
108
|
+
export function table(headers, rows) {
|
|
109
|
+
const widths = headers.map((header, index) => Math.max(visibleWidth(header), ...rows.map((row) => visibleWidth(row[index] ?? ""))));
|
|
110
|
+
const renderRow = (cells) => cells.map((cell, index) => padEndVisible(cell, widths[index] ?? 0)).join(" ");
|
|
111
|
+
const head = c.heading(renderRow(headers));
|
|
112
|
+
const body = rows.map((row) => renderRow(row));
|
|
113
|
+
return [head, ...body].join("\n");
|
|
114
|
+
}
|
package/dist/search.js
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import { searchIndexPath } from "./paths.js";
|
|
3
|
+
import { readJsonFile, writeJsonFile } from "./jsonl.js";
|
|
4
|
+
import { c } from "./render.js";
|
|
5
|
+
const STOP_WORDS = new Set([
|
|
6
|
+
"a",
|
|
7
|
+
"an",
|
|
8
|
+
"and",
|
|
9
|
+
"are",
|
|
10
|
+
"as",
|
|
11
|
+
"at",
|
|
12
|
+
"be",
|
|
13
|
+
"but",
|
|
14
|
+
"by",
|
|
15
|
+
"for",
|
|
16
|
+
"from",
|
|
17
|
+
"how",
|
|
18
|
+
"i",
|
|
19
|
+
"in",
|
|
20
|
+
"into",
|
|
21
|
+
"is",
|
|
22
|
+
"it",
|
|
23
|
+
"of",
|
|
24
|
+
"on",
|
|
25
|
+
"or",
|
|
26
|
+
"that",
|
|
27
|
+
"the",
|
|
28
|
+
"this",
|
|
29
|
+
"to",
|
|
30
|
+
"was",
|
|
31
|
+
"with",
|
|
32
|
+
"you",
|
|
33
|
+
"your",
|
|
34
|
+
]);
|
|
35
|
+
export function tokenize(input) {
|
|
36
|
+
if (!input)
|
|
37
|
+
return [];
|
|
38
|
+
return input
|
|
39
|
+
.toLowerCase()
|
|
40
|
+
.normalize("NFKD")
|
|
41
|
+
.replace(/['']/g, "")
|
|
42
|
+
.split(/[^a-z0-9]+/g)
|
|
43
|
+
.filter((token) => token.length >= 2 && !STOP_WORDS.has(token));
|
|
44
|
+
}
|
|
45
|
+
export function searchableText(video) {
|
|
46
|
+
return [
|
|
47
|
+
video.description,
|
|
48
|
+
video.transcript?.text,
|
|
49
|
+
video.classification?.summary,
|
|
50
|
+
video.classification?.category,
|
|
51
|
+
video.classification?.domain,
|
|
52
|
+
...(video.classification?.topics ?? []),
|
|
53
|
+
...video.hashtags,
|
|
54
|
+
video.author?.username,
|
|
55
|
+
video.author?.displayName,
|
|
56
|
+
video.music?.title,
|
|
57
|
+
video.music?.author,
|
|
58
|
+
]
|
|
59
|
+
.filter(Boolean)
|
|
60
|
+
.join("\n");
|
|
61
|
+
}
|
|
62
|
+
export function buildSearchIndex(videos) {
|
|
63
|
+
const docs = videos.map(toSearchDocument);
|
|
64
|
+
const termDocFreq = {};
|
|
65
|
+
for (const doc of docs) {
|
|
66
|
+
for (const term of Object.keys(doc.terms)) {
|
|
67
|
+
termDocFreq[term] = (termDocFreq[term] ?? 0) + 1;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
const totalLength = docs.reduce((sum, doc) => sum + doc.length, 0);
|
|
71
|
+
return {
|
|
72
|
+
version: 1,
|
|
73
|
+
builtAt: new Date().toISOString(),
|
|
74
|
+
recordCount: docs.length,
|
|
75
|
+
avgDocLength: docs.length === 0 ? 0 : totalLength / docs.length,
|
|
76
|
+
termDocFreq,
|
|
77
|
+
docs,
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
function toSearchDocument(video) {
|
|
81
|
+
const weightedTerms = {};
|
|
82
|
+
addTerms(weightedTerms, tokenize(video.description), 3);
|
|
83
|
+
addTerms(weightedTerms, tokenize(video.transcript?.text), 2);
|
|
84
|
+
addTerms(weightedTerms, tokenize(video.classification?.summary), 3);
|
|
85
|
+
addTerms(weightedTerms, tokenize(video.classification?.category), 4);
|
|
86
|
+
addTerms(weightedTerms, tokenize(video.classification?.domain), 4);
|
|
87
|
+
addTerms(weightedTerms, tokenize((video.classification?.topics ?? []).join(" ")), 4);
|
|
88
|
+
addTerms(weightedTerms, tokenize(video.hashtags.join(" ")), 4);
|
|
89
|
+
addTerms(weightedTerms, tokenize([video.author?.username, video.author?.displayName].filter(Boolean).join(" ")), 2);
|
|
90
|
+
addTerms(weightedTerms, tokenize([video.music?.title, video.music?.author].filter(Boolean).join(" ")), 1);
|
|
91
|
+
const length = Object.values(weightedTerms).reduce((sum, count) => sum + count, 0);
|
|
92
|
+
return {
|
|
93
|
+
id: video.id,
|
|
94
|
+
length,
|
|
95
|
+
terms: weightedTerms,
|
|
96
|
+
title: video.description?.split(/\r?\n/)[0]?.slice(0, 100) ?? video.id,
|
|
97
|
+
preview: makePreview(video),
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
function addTerms(target, terms, weight) {
|
|
101
|
+
for (const term of terms) {
|
|
102
|
+
target[term] = (target[term] ?? 0) + weight;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
function makePreview(video) {
|
|
106
|
+
const text = video.transcript?.text || video.description || "";
|
|
107
|
+
return text.replace(/\s+/g, " ").trim().slice(0, 220);
|
|
108
|
+
}
|
|
109
|
+
export async function saveSearchIndex(videos) {
|
|
110
|
+
const index = buildSearchIndex(videos);
|
|
111
|
+
await writeJsonFile(searchIndexPath(), index);
|
|
112
|
+
return index;
|
|
113
|
+
}
|
|
114
|
+
export async function loadSearchIndex() {
|
|
115
|
+
try {
|
|
116
|
+
await fs.access(searchIndexPath());
|
|
117
|
+
}
|
|
118
|
+
catch {
|
|
119
|
+
return undefined;
|
|
120
|
+
}
|
|
121
|
+
return readJsonFile(searchIndexPath(), undefined);
|
|
122
|
+
}
|
|
123
|
+
export function filterVideos(videos, filters) {
|
|
124
|
+
const author = filters.author?.replace(/^@/, "").toLowerCase();
|
|
125
|
+
const category = filters.category?.toLowerCase();
|
|
126
|
+
const domain = filters.domain?.toLowerCase();
|
|
127
|
+
const collection = filters.collection?.toLowerCase();
|
|
128
|
+
const after = filters.after ? new Date(filters.after) : undefined;
|
|
129
|
+
const before = filters.before ? new Date(filters.before) : undefined;
|
|
130
|
+
return videos.filter((video) => {
|
|
131
|
+
if (author && !video.author?.username?.toLowerCase().includes(author))
|
|
132
|
+
return false;
|
|
133
|
+
if (category && video.classification?.category?.toLowerCase() !== category)
|
|
134
|
+
return false;
|
|
135
|
+
if (domain && video.classification?.domain?.toLowerCase() !== domain)
|
|
136
|
+
return false;
|
|
137
|
+
if (collection) {
|
|
138
|
+
const haystack = [video.collection?.id, video.collection?.name, video.collection?.url].filter(Boolean).join(" ");
|
|
139
|
+
if (!haystack.toLowerCase().includes(collection))
|
|
140
|
+
return false;
|
|
141
|
+
}
|
|
142
|
+
if (filters.source && video.source !== filters.source)
|
|
143
|
+
return false;
|
|
144
|
+
if (filters.hasTranscript != null && Boolean(video.transcript?.text) !== filters.hasTranscript)
|
|
145
|
+
return false;
|
|
146
|
+
const date = video.createdAt ? new Date(video.createdAt) : undefined;
|
|
147
|
+
if (after && date && date < after)
|
|
148
|
+
return false;
|
|
149
|
+
if (before && date && date > before)
|
|
150
|
+
return false;
|
|
151
|
+
return true;
|
|
152
|
+
});
|
|
153
|
+
}
|
|
154
|
+
export function searchWithIndex(videos, index, filters) {
|
|
155
|
+
const filtered = filterVideos(videos, filters);
|
|
156
|
+
const allowedIds = new Set(filtered.map((video) => video.id));
|
|
157
|
+
const byId = new Map(videos.map((video) => [video.id, video]));
|
|
158
|
+
const query = filters.query?.trim() ?? "";
|
|
159
|
+
const limit = filters.limit ?? 20;
|
|
160
|
+
const offset = filters.offset ?? 0;
|
|
161
|
+
if (!query) {
|
|
162
|
+
return filtered.slice(offset, offset + limit).map((video) => ({ video, score: 0, highlights: highlights(video, []) }));
|
|
163
|
+
}
|
|
164
|
+
const queryTerms = tokenize(query);
|
|
165
|
+
const phrase = query.toLowerCase();
|
|
166
|
+
const scored = [];
|
|
167
|
+
for (const doc of index.docs) {
|
|
168
|
+
if (!allowedIds.has(doc.id))
|
|
169
|
+
continue;
|
|
170
|
+
const video = byId.get(doc.id);
|
|
171
|
+
if (!video)
|
|
172
|
+
continue;
|
|
173
|
+
let score = 0;
|
|
174
|
+
for (const term of queryTerms) {
|
|
175
|
+
score += bm25(term, doc, index);
|
|
176
|
+
}
|
|
177
|
+
if (searchableText(video).toLowerCase().includes(phrase))
|
|
178
|
+
score += 1.5;
|
|
179
|
+
if (score > 0)
|
|
180
|
+
scored.push({ video, score, highlights: highlights(video, queryTerms) });
|
|
181
|
+
}
|
|
182
|
+
return scored.sort((a, b) => b.score - a.score).slice(offset, offset + limit);
|
|
183
|
+
}
|
|
184
|
+
function bm25(term, doc, index) {
|
|
185
|
+
const tf = doc.terms[term] ?? 0;
|
|
186
|
+
if (tf <= 0)
|
|
187
|
+
return 0;
|
|
188
|
+
const n = index.recordCount;
|
|
189
|
+
const df = index.termDocFreq[term] ?? 0;
|
|
190
|
+
const idf = Math.log(1 + (n - df + 0.5) / (df + 0.5));
|
|
191
|
+
const k1 = 1.4;
|
|
192
|
+
const b = 0.75;
|
|
193
|
+
const avg = index.avgDocLength || 1;
|
|
194
|
+
return idf * ((tf * (k1 + 1)) / (tf + k1 * (1 - b + b * (doc.length / avg))));
|
|
195
|
+
}
|
|
196
|
+
function highlights(video, terms) {
|
|
197
|
+
const parts = [video.description, video.transcript?.text].filter(Boolean);
|
|
198
|
+
if (terms.length === 0)
|
|
199
|
+
return parts.slice(0, 1).map((part) => part.replace(/\s+/g, " ").slice(0, 220));
|
|
200
|
+
const normalizedTerms = new Set(terms);
|
|
201
|
+
for (const part of parts) {
|
|
202
|
+
const sentences = part.split(/(?<=[.!?])\s+|\n+/);
|
|
203
|
+
const matched = sentences.find((sentence) => tokenize(sentence).some((term) => normalizedTerms.has(term)));
|
|
204
|
+
if (matched)
|
|
205
|
+
return [matched.replace(/\s+/g, " ").slice(0, 240)];
|
|
206
|
+
}
|
|
207
|
+
return parts.slice(0, 1).map((part) => part.replace(/\s+/g, " ").slice(0, 220));
|
|
208
|
+
}
|
|
209
|
+
export function formatSearchResults(results, options) {
|
|
210
|
+
if (options?.json)
|
|
211
|
+
return JSON.stringify(results, null, 2);
|
|
212
|
+
if (results.length === 0)
|
|
213
|
+
return c.muted("No matches.");
|
|
214
|
+
return results
|
|
215
|
+
.map((result, idx) => {
|
|
216
|
+
const video = result.video;
|
|
217
|
+
const author = video.author?.username ? c.accent(`@${video.author.username}`) : c.muted("unknown");
|
|
218
|
+
const category = video.classification?.category ? ` ${c.warn(`[${video.classification.category}]`)}` : "";
|
|
219
|
+
const score = result.score > 0 ? ` ${c.muted(`score ${result.score.toFixed(2)}`)}` : "";
|
|
220
|
+
const line = `${c.muted(`${idx + 1}.`)} ${c.value(video.id)} ${author}${category}${score}`;
|
|
221
|
+
const desc = video.description ? ` ${video.description.replace(/\s+/g, " ").slice(0, 160)}` : "";
|
|
222
|
+
const hit = result.highlights[0] ? ` ${c.muted(">")} ${c.success(result.highlights[0])}` : "";
|
|
223
|
+
return [line, desc, hit, ` ${c.muted(video.canonicalUrl ?? video.url)}`].filter(Boolean).join("\n");
|
|
224
|
+
})
|
|
225
|
+
.join("\n\n");
|
|
226
|
+
}
|
package/dist/skill.js
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import os from "node:os";
|
|
3
|
+
import path from "node:path";
|
|
4
|
+
export function skillContent() {
|
|
5
|
+
return [
|
|
6
|
+
"---",
|
|
7
|
+
"name: tokwise",
|
|
8
|
+
"description: Search and use the user's local Tokwise archive of saved short-form video transcripts.",
|
|
9
|
+
"---",
|
|
10
|
+
"",
|
|
11
|
+
"# Tokwise",
|
|
12
|
+
"",
|
|
13
|
+
"Use this skill when the user asks about saved short-form videos, life advice clips, transcript patterns, or similarities across saved clips.",
|
|
14
|
+
"",
|
|
15
|
+
"## Commands",
|
|
16
|
+
"",
|
|
17
|
+
"- `tokwise status` shows whether the archive exists and how many transcripts are available.",
|
|
18
|
+
"- `tokwise search \"query\" --limit 8` searches descriptions, hashtags, summaries, and transcripts.",
|
|
19
|
+
"- `tokwise show <id>` prints full metadata and transcript for one video.",
|
|
20
|
+
"- `tokwise similar <id>` finds related videos by transcript and metadata overlap.",
|
|
21
|
+
"- `tokwise ask \"question\"` returns top local evidence; add `--engine ollama --model <model>` only when the user wants local synthesis.",
|
|
22
|
+
"- `tokwise md` and `tokwise wiki` export Markdown pages under the local library.",
|
|
23
|
+
"- `tw` is the short alias for `tokwise`.",
|
|
24
|
+
"",
|
|
25
|
+
"## Grounding",
|
|
26
|
+
"",
|
|
27
|
+
"Cite video ids or Markdown page paths when drawing conclusions. Treat transcripts as user-owned local context and do not assume videos are public.",
|
|
28
|
+
"",
|
|
29
|
+
].join("\n");
|
|
30
|
+
}
|
|
31
|
+
export async function installSkill(target = "all") {
|
|
32
|
+
const destinations = [];
|
|
33
|
+
if (target === "codex" || target === "all") {
|
|
34
|
+
destinations.push(path.join(os.homedir(), ".codex", "skills", "tokwise", "SKILL.md"));
|
|
35
|
+
}
|
|
36
|
+
if (target === "claude" || target === "all") {
|
|
37
|
+
destinations.push(path.join(os.homedir(), ".claude", "skills", "tokwise", "SKILL.md"));
|
|
38
|
+
}
|
|
39
|
+
for (const destination of destinations) {
|
|
40
|
+
await fs.mkdir(path.dirname(destination), { recursive: true });
|
|
41
|
+
await fs.writeFile(destination, skillContent(), "utf8");
|
|
42
|
+
}
|
|
43
|
+
return destinations;
|
|
44
|
+
}
|
|
45
|
+
export async function uninstallSkill(target = "all") {
|
|
46
|
+
const destinations = [];
|
|
47
|
+
if (target === "codex" || target === "all") {
|
|
48
|
+
destinations.push(path.join(os.homedir(), ".codex", "skills", "tokwise"));
|
|
49
|
+
}
|
|
50
|
+
if (target === "claude" || target === "all") {
|
|
51
|
+
destinations.push(path.join(os.homedir(), ".claude", "skills", "tokwise"));
|
|
52
|
+
}
|
|
53
|
+
for (const destination of destinations) {
|
|
54
|
+
await fs.rm(destination, { recursive: true, force: true });
|
|
55
|
+
}
|
|
56
|
+
return destinations;
|
|
57
|
+
}
|
package/dist/store.js
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import crypto from "node:crypto";
|
|
2
|
+
import fs from "node:fs/promises";
|
|
3
|
+
import path from "node:path";
|
|
4
|
+
import { authPath, ensureDataDirs, preferencesPath, videosJsonlPath } from "./paths.js";
|
|
5
|
+
import { readJsonFile, readJsonl, writeJsonFile, writeJsonl } from "./jsonl.js";
|
|
6
|
+
export function stableHash(input) {
|
|
7
|
+
return crypto.createHash("sha1").update(input).digest("hex").slice(0, 16);
|
|
8
|
+
}
|
|
9
|
+
export function sanitizeFilePart(input) {
|
|
10
|
+
return input
|
|
11
|
+
.replace(/[^a-zA-Z0-9._-]+/g, "-")
|
|
12
|
+
.replace(/^-+|-+$/g, "")
|
|
13
|
+
.slice(0, 120) || "untitled";
|
|
14
|
+
}
|
|
15
|
+
export async function loadVideos() {
|
|
16
|
+
ensureDataDirs();
|
|
17
|
+
return readJsonl(videosJsonlPath());
|
|
18
|
+
}
|
|
19
|
+
export async function saveVideos(videos) {
|
|
20
|
+
ensureDataDirs();
|
|
21
|
+
const sorted = [...videos].sort((a, b) => {
|
|
22
|
+
const aTime = a.savedAt ?? a.createdAt ?? a.syncedAt;
|
|
23
|
+
const bTime = b.savedAt ?? b.createdAt ?? b.syncedAt;
|
|
24
|
+
return bTime.localeCompare(aTime);
|
|
25
|
+
});
|
|
26
|
+
await writeJsonl(videosJsonlPath(), sorted);
|
|
27
|
+
}
|
|
28
|
+
export function mergeVideo(existing, incoming) {
|
|
29
|
+
if (!existing)
|
|
30
|
+
return incoming;
|
|
31
|
+
const classification = mergeClassification(existing.classification, incoming.classification);
|
|
32
|
+
return {
|
|
33
|
+
...existing,
|
|
34
|
+
...incoming,
|
|
35
|
+
savedAt: incoming.savedAt ?? existing.savedAt,
|
|
36
|
+
syncedAt: incoming.syncedAt,
|
|
37
|
+
author: { ...existing.author, ...incoming.author },
|
|
38
|
+
stats: { ...existing.stats, ...incoming.stats },
|
|
39
|
+
media: {
|
|
40
|
+
...existing.media,
|
|
41
|
+
...incoming.media,
|
|
42
|
+
videoPath: existing.media?.videoPath ?? incoming.media?.videoPath,
|
|
43
|
+
audioPath: existing.media?.audioPath ?? incoming.media?.audioPath,
|
|
44
|
+
infoJsonPath: existing.media?.infoJsonPath ?? incoming.media?.infoJsonPath,
|
|
45
|
+
downloadedAt: existing.media?.downloadedAt ?? incoming.media?.downloadedAt,
|
|
46
|
+
},
|
|
47
|
+
transcript: existing.transcript ?? incoming.transcript,
|
|
48
|
+
classification,
|
|
49
|
+
hashtags: uniqueStrings([...existing.hashtags, ...incoming.hashtags]),
|
|
50
|
+
raw: incoming.raw ?? existing.raw,
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
function mergeClassification(existing, incoming) {
|
|
54
|
+
if (!existing)
|
|
55
|
+
return incoming;
|
|
56
|
+
if (!incoming)
|
|
57
|
+
return existing;
|
|
58
|
+
return {
|
|
59
|
+
...existing,
|
|
60
|
+
...incoming,
|
|
61
|
+
topics: uniqueStrings([...(existing.topics ?? []), ...(incoming.topics ?? [])]),
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
export function uniqueStrings(values) {
|
|
65
|
+
return [...new Set(values.map((value) => value?.trim()).filter((value) => Boolean(value)))];
|
|
66
|
+
}
|
|
67
|
+
export async function mergeVideos(incoming, options) {
|
|
68
|
+
const existing = options?.rebuild ? [] : await loadVideos();
|
|
69
|
+
const existingById = new Map(existing.map((video) => [video.id, video]));
|
|
70
|
+
const before = new Map(existing.map((video) => [video.id, JSON.stringify(video)]));
|
|
71
|
+
for (const video of incoming) {
|
|
72
|
+
existingById.set(video.id, mergeVideo(existingById.get(video.id), video));
|
|
73
|
+
}
|
|
74
|
+
const merged = [...existingById.values()];
|
|
75
|
+
await saveVideos(merged);
|
|
76
|
+
let added = 0;
|
|
77
|
+
let updated = 0;
|
|
78
|
+
let unchanged = 0;
|
|
79
|
+
for (const video of incoming) {
|
|
80
|
+
const old = before.get(video.id);
|
|
81
|
+
if (old == null) {
|
|
82
|
+
added += 1;
|
|
83
|
+
}
|
|
84
|
+
else if (old === JSON.stringify(existingById.get(video.id))) {
|
|
85
|
+
unchanged += 1;
|
|
86
|
+
}
|
|
87
|
+
else {
|
|
88
|
+
updated += 1;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
return { added, updated, unchanged, total: merged.length, ids: incoming.map((video) => video.id) };
|
|
92
|
+
}
|
|
93
|
+
export function findVideo(videos, query) {
|
|
94
|
+
const normalized = query.trim();
|
|
95
|
+
return (videos.find((video) => video.id === normalized) ??
|
|
96
|
+
videos.find((video) => video.id.startsWith(normalized)) ??
|
|
97
|
+
videos.find((video) => video.url === normalized || video.canonicalUrl === normalized));
|
|
98
|
+
}
|
|
99
|
+
export async function updateVideosById(updates) {
|
|
100
|
+
const videos = await loadVideos();
|
|
101
|
+
const next = videos.map((video) => {
|
|
102
|
+
const update = updates.get(video.id);
|
|
103
|
+
if (!update)
|
|
104
|
+
return video;
|
|
105
|
+
if (typeof update === "function")
|
|
106
|
+
return update(video);
|
|
107
|
+
return mergeVideo(video, { ...video, ...update, syncedAt: update.syncedAt ?? video.syncedAt });
|
|
108
|
+
});
|
|
109
|
+
await saveVideos(next);
|
|
110
|
+
return next;
|
|
111
|
+
}
|
|
112
|
+
export async function readTextInput(filePath) {
|
|
113
|
+
if (filePath === "-") {
|
|
114
|
+
return new Promise((resolve, reject) => {
|
|
115
|
+
let body = "";
|
|
116
|
+
process.stdin.setEncoding("utf8");
|
|
117
|
+
process.stdin.on("data", (chunk) => {
|
|
118
|
+
body += chunk;
|
|
119
|
+
});
|
|
120
|
+
process.stdin.on("end", () => resolve(body));
|
|
121
|
+
process.stdin.on("error", reject);
|
|
122
|
+
});
|
|
123
|
+
}
|
|
124
|
+
return fs.readFile(filePath, "utf8");
|
|
125
|
+
}
|
|
126
|
+
export async function loadPreferences() {
|
|
127
|
+
return readJsonFile(preferencesPath(), {});
|
|
128
|
+
}
|
|
129
|
+
export async function savePreferences(preferences) {
|
|
130
|
+
await writeJsonFile(preferencesPath(), preferences, 0o600);
|
|
131
|
+
}
|
|
132
|
+
export async function loadAuth() {
|
|
133
|
+
return readJsonFile(authPath(), {});
|
|
134
|
+
}
|
|
135
|
+
export async function saveAuth(auth) {
|
|
136
|
+
await writeJsonFile(authPath(), auth, 0o600);
|
|
137
|
+
}
|
|
138
|
+
export async function clearAuth() {
|
|
139
|
+
try {
|
|
140
|
+
await fs.rm(authPath());
|
|
141
|
+
}
|
|
142
|
+
catch (error) {
|
|
143
|
+
if (error.code !== "ENOENT")
|
|
144
|
+
throw error;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
export async function loadCookie(options) {
|
|
148
|
+
if (options?.cookie)
|
|
149
|
+
return options.cookie;
|
|
150
|
+
if (options?.cookieFile)
|
|
151
|
+
return (await fs.readFile(options.cookieFile, "utf8")).trim();
|
|
152
|
+
return (await loadAuth()).cookie;
|
|
153
|
+
}
|
|
154
|
+
export function resolveMaybeRelative(filePath, cwd = process.cwd()) {
|
|
155
|
+
if (path.isAbsolute(filePath))
|
|
156
|
+
return filePath;
|
|
157
|
+
return path.resolve(cwd, filePath);
|
|
158
|
+
}
|