@kt3k/tku 1.0.8 → 1.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.js +79 -29
- package/dist/tokenize-worker.js +30 -0
- package/package.json +3 -3
package/dist/main.js
CHANGED
|
@@ -60,7 +60,9 @@ async function listTextFiles(repoPath, options = {}) {
|
|
|
60
60
|
const cwd = resolve(repoPath);
|
|
61
61
|
const files = await listFiles(repoPath, options);
|
|
62
62
|
const results = [];
|
|
63
|
-
for (
|
|
63
|
+
for (let i = 0; i < files.length; i++) {
|
|
64
|
+
const file = files[i];
|
|
65
|
+
options.onProgress?.(file, i + 1, files.length);
|
|
64
66
|
const fullPath = resolve(cwd, file);
|
|
65
67
|
try {
|
|
66
68
|
if (!await isBinary(fullPath)) {
|
|
@@ -73,33 +75,61 @@ async function listTextFiles(repoPath, options = {}) {
|
|
|
73
75
|
}
|
|
74
76
|
|
|
75
77
|
// src/tokenize.ts
|
|
76
|
-
import { readFile } from "node:fs/promises";
|
|
77
78
|
import { resolve as resolve2 } from "node:path";
|
|
79
|
+
import { availableParallelism } from "node:os";
|
|
80
|
+
import { Worker } from "node:worker_threads";
|
|
78
81
|
import { get_encoding } from "tiktoken";
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
path: file,
|
|
90
|
-
tokens
|
|
91
|
-
});
|
|
92
|
-
totalTokens += tokens;
|
|
93
|
-
}
|
|
94
|
-
return {
|
|
95
|
-
encoding,
|
|
96
|
-
files: results,
|
|
97
|
-
totalTokens,
|
|
98
|
-
totalFiles: results.length
|
|
99
|
-
};
|
|
100
|
-
} finally {
|
|
101
|
-
enc.free();
|
|
82
|
+
var workerExt = import.meta.url.endsWith(".ts") ? ".ts" : ".js";
|
|
83
|
+
var workerUrl = new URL(`./tokenize-worker${workerExt}`, import.meta.url);
|
|
84
|
+
async function tokenizeFiles(repoPath, files, encoding, options = {}) {
|
|
85
|
+
const resolvedRepoPath = resolve2(repoPath);
|
|
86
|
+
const numWorkers = Math.min(availableParallelism(), files.length || 1);
|
|
87
|
+
const chunks = Array.from({
|
|
88
|
+
length: numWorkers
|
|
89
|
+
}, () => []);
|
|
90
|
+
for (let i = 0; i < files.length; i++) {
|
|
91
|
+
chunks[i % numWorkers].push(files[i]);
|
|
102
92
|
}
|
|
93
|
+
let progressCount = 0;
|
|
94
|
+
const total = files.length;
|
|
95
|
+
const workerPromises = chunks.map((chunk) => {
|
|
96
|
+
if (chunk.length === 0) return Promise.resolve([]);
|
|
97
|
+
return new Promise((resolvePromise, reject) => {
|
|
98
|
+
const worker = new Worker(workerUrl);
|
|
99
|
+
worker.on("message", (msg) => {
|
|
100
|
+
if (msg.type === "progress") {
|
|
101
|
+
progressCount++;
|
|
102
|
+
options.onProgress?.(msg.file, progressCount, total);
|
|
103
|
+
} else if (msg.type === "done") {
|
|
104
|
+
resolvePromise(msg.results);
|
|
105
|
+
worker.terminate();
|
|
106
|
+
}
|
|
107
|
+
});
|
|
108
|
+
worker.on("error", (err) => {
|
|
109
|
+
reject(err);
|
|
110
|
+
worker.terminate();
|
|
111
|
+
});
|
|
112
|
+
worker.postMessage({
|
|
113
|
+
repoPath: resolvedRepoPath,
|
|
114
|
+
files: chunk,
|
|
115
|
+
encoding
|
|
116
|
+
});
|
|
117
|
+
});
|
|
118
|
+
});
|
|
119
|
+
const chunkResults = await Promise.all(workerPromises);
|
|
120
|
+
const results = chunkResults.flat();
|
|
121
|
+
const orderMap = new Map(files.map((f, i) => [
|
|
122
|
+
f,
|
|
123
|
+
i
|
|
124
|
+
]));
|
|
125
|
+
results.sort((a, b) => orderMap.get(a.path) - orderMap.get(b.path));
|
|
126
|
+
const totalTokens = results.reduce((sum, f) => sum + f.tokens, 0);
|
|
127
|
+
return {
|
|
128
|
+
encoding,
|
|
129
|
+
files: results,
|
|
130
|
+
totalTokens,
|
|
131
|
+
totalFiles: results.length
|
|
132
|
+
};
|
|
103
133
|
}
|
|
104
134
|
|
|
105
135
|
// src/format.ts
|
|
@@ -112,7 +142,7 @@ function formatTokenCount(n) {
|
|
|
112
142
|
}
|
|
113
143
|
return String(n);
|
|
114
144
|
}
|
|
115
|
-
function formatTable(result) {
|
|
145
|
+
function formatTable(result, omitted = 0) {
|
|
116
146
|
const lines = [];
|
|
117
147
|
const formatted = result.files.map((f) => ({
|
|
118
148
|
path: f.path,
|
|
@@ -123,6 +153,9 @@ function formatTable(result) {
|
|
|
123
153
|
for (const f of formatted) {
|
|
124
154
|
lines.push(`${f.display.padStart(maxWidth)} ${f.path}`);
|
|
125
155
|
}
|
|
156
|
+
if (omitted > 0) {
|
|
157
|
+
lines.push(`${"".padStart(maxWidth)} ... ${omitted} more files`);
|
|
158
|
+
}
|
|
126
159
|
lines.push(`${"\u2500".repeat(maxWidth)}\u2500\u2500`);
|
|
127
160
|
const totalDisplay = formatTokenCount(result.totalTokens);
|
|
128
161
|
lines.push(`${totalDisplay.padStart(maxWidth)} total (${result.totalFiles} files)`);
|
|
@@ -139,11 +172,12 @@ function formatResult(result, options = {}) {
|
|
|
139
172
|
sorted.sort((a, b) => b.tokens - a.tokens);
|
|
140
173
|
}
|
|
141
174
|
const filtered = top !== void 0 ? sorted.slice(0, top) : sorted;
|
|
175
|
+
const omitted = sorted.length - filtered.length;
|
|
142
176
|
const adjusted = {
|
|
143
177
|
...result,
|
|
144
178
|
files: filtered
|
|
145
179
|
};
|
|
146
|
-
return json ? JSON.stringify(adjusted, null, 2) : formatTable(adjusted);
|
|
180
|
+
return json ? JSON.stringify(adjusted, null, 2) : formatTable(adjusted, omitted);
|
|
147
181
|
}
|
|
148
182
|
|
|
149
183
|
// src/main.ts
|
|
@@ -203,12 +237,28 @@ async function main() {
|
|
|
203
237
|
const encoding = values.encoding;
|
|
204
238
|
const sort = values.sort;
|
|
205
239
|
const top = values.top !== void 0 ? Number(values.top) : void 0;
|
|
240
|
+
const isTTY = process.stderr.isTTY;
|
|
241
|
+
function status(msg) {
|
|
242
|
+
if (isTTY) {
|
|
243
|
+
process.stderr.write(`\r\x1B[K${msg}`);
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
function clearStatus() {
|
|
247
|
+
if (isTTY) {
|
|
248
|
+
process.stderr.write("\r\x1B[K");
|
|
249
|
+
}
|
|
250
|
+
}
|
|
206
251
|
try {
|
|
207
252
|
const files = await listTextFiles(repoPath, {
|
|
208
253
|
exclude: values.exclude,
|
|
209
|
-
noGitignore: !values.gitignore
|
|
254
|
+
noGitignore: !values.gitignore,
|
|
255
|
+
onProgress: (file, i, total) => status(`Scanning [${i}/${total}] ${file}`)
|
|
256
|
+
});
|
|
257
|
+
clearStatus();
|
|
258
|
+
const result = await tokenizeFiles(repoPath, files, encoding, {
|
|
259
|
+
onProgress: (file, i, total) => status(`Tokenizing [${i}/${total}] ${file}`)
|
|
210
260
|
});
|
|
211
|
-
|
|
261
|
+
clearStatus();
|
|
212
262
|
const output = formatResult(result, {
|
|
213
263
|
json: values.json,
|
|
214
264
|
top,
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
// src/tokenize-worker.ts
|
|
2
|
+
import { parentPort } from "node:worker_threads";
|
|
3
|
+
import { readFile } from "node:fs/promises";
|
|
4
|
+
import { resolve } from "node:path";
|
|
5
|
+
import { get_encoding } from "tiktoken";
|
|
6
|
+
parentPort.on("message", async (msg) => {
|
|
7
|
+
const enc = get_encoding(msg.encoding);
|
|
8
|
+
try {
|
|
9
|
+
const results = [];
|
|
10
|
+
for (const file of msg.files) {
|
|
11
|
+
const fullPath = resolve(msg.repoPath, file);
|
|
12
|
+
const content = await readFile(fullPath, "utf-8");
|
|
13
|
+
const tokens = enc.encode_ordinary(content).length;
|
|
14
|
+
results.push({
|
|
15
|
+
path: file,
|
|
16
|
+
tokens
|
|
17
|
+
});
|
|
18
|
+
parentPort.postMessage({
|
|
19
|
+
type: "progress",
|
|
20
|
+
file
|
|
21
|
+
});
|
|
22
|
+
}
|
|
23
|
+
parentPort.postMessage({
|
|
24
|
+
type: "done",
|
|
25
|
+
results
|
|
26
|
+
});
|
|
27
|
+
} finally {
|
|
28
|
+
enc.free();
|
|
29
|
+
}
|
|
30
|
+
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@kt3k/tku",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.10",
|
|
4
4
|
"description": "",
|
|
5
5
|
"main": "dist/main.js",
|
|
6
6
|
"bin": {
|
|
@@ -10,8 +10,8 @@
|
|
|
10
10
|
"dist"
|
|
11
11
|
],
|
|
12
12
|
"scripts": {
|
|
13
|
-
"build": "deno bundle --external tiktoken --external picomatch -o dist/main.js src/main.ts && chmod +x dist/main.js",
|
|
14
|
-
"
|
|
13
|
+
"build": "deno bundle --external tiktoken --external picomatch -o dist/main.js src/main.ts && deno bundle --external tiktoken -o dist/tokenize-worker.js src/tokenize-worker.ts && chmod +x dist/main.js",
|
|
14
|
+
"prepublishOnly": "npm run build",
|
|
15
15
|
"test": "vitest run"
|
|
16
16
|
},
|
|
17
17
|
"keywords": [],
|