@kt3k/tku 1.0.8 → 1.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -60,7 +60,9 @@ async function listTextFiles(repoPath, options = {}) {
60
60
  const cwd = resolve(repoPath);
61
61
  const files = await listFiles(repoPath, options);
62
62
  const results = [];
63
- for (const file of files) {
63
+ for (let i = 0; i < files.length; i++) {
64
+ const file = files[i];
65
+ options.onProgress?.(file, i + 1, files.length);
64
66
  const fullPath = resolve(cwd, file);
65
67
  try {
66
68
  if (!await isBinary(fullPath)) {
@@ -73,33 +75,61 @@ async function listTextFiles(repoPath, options = {}) {
73
75
  }
74
76
 
75
77
  // src/tokenize.ts
76
- import { readFile } from "node:fs/promises";
77
78
  import { resolve as resolve2 } from "node:path";
79
+ import { availableParallelism } from "node:os";
80
+ import { Worker } from "node:worker_threads";
78
81
  import { get_encoding } from "tiktoken";
79
- async function tokenizeFiles(repoPath, files, encoding) {
80
- const enc = get_encoding(encoding);
81
- try {
82
- const results = [];
83
- let totalTokens = 0;
84
- for (const file of files) {
85
- const fullPath = resolve2(repoPath, file);
86
- const content = await readFile(fullPath, "utf-8");
87
- const tokens = enc.encode_ordinary(content).length;
88
- results.push({
89
- path: file,
90
- tokens
91
- });
92
- totalTokens += tokens;
93
- }
94
- return {
95
- encoding,
96
- files: results,
97
- totalTokens,
98
- totalFiles: results.length
99
- };
100
- } finally {
101
- enc.free();
82
+ var workerExt = import.meta.url.endsWith(".ts") ? ".ts" : ".js";
83
+ var workerUrl = new URL(`./tokenize-worker${workerExt}`, import.meta.url);
84
+ async function tokenizeFiles(repoPath, files, encoding, options = {}) {
85
+ const resolvedRepoPath = resolve2(repoPath);
86
+ const numWorkers = Math.min(availableParallelism(), files.length || 1);
87
+ const chunks = Array.from({
88
+ length: numWorkers
89
+ }, () => []);
90
+ for (let i = 0; i < files.length; i++) {
91
+ chunks[i % numWorkers].push(files[i]);
102
92
  }
93
+ let progressCount = 0;
94
+ const total = files.length;
95
+ const workerPromises = chunks.map((chunk) => {
96
+ if (chunk.length === 0) return Promise.resolve([]);
97
+ return new Promise((resolvePromise, reject) => {
98
+ const worker = new Worker(workerUrl);
99
+ worker.on("message", (msg) => {
100
+ if (msg.type === "progress") {
101
+ progressCount++;
102
+ options.onProgress?.(msg.file, progressCount, total);
103
+ } else if (msg.type === "done") {
104
+ resolvePromise(msg.results);
105
+ worker.terminate();
106
+ }
107
+ });
108
+ worker.on("error", (err) => {
109
+ reject(err);
110
+ worker.terminate();
111
+ });
112
+ worker.postMessage({
113
+ repoPath: resolvedRepoPath,
114
+ files: chunk,
115
+ encoding
116
+ });
117
+ });
118
+ });
119
+ const chunkResults = await Promise.all(workerPromises);
120
+ const results = chunkResults.flat();
121
+ const orderMap = new Map(files.map((f, i) => [
122
+ f,
123
+ i
124
+ ]));
125
+ results.sort((a, b) => orderMap.get(a.path) - orderMap.get(b.path));
126
+ const totalTokens = results.reduce((sum, f) => sum + f.tokens, 0);
127
+ return {
128
+ encoding,
129
+ files: results,
130
+ totalTokens,
131
+ totalFiles: results.length
132
+ };
103
133
  }
104
134
 
105
135
  // src/format.ts
@@ -112,7 +142,7 @@ function formatTokenCount(n) {
112
142
  }
113
143
  return String(n);
114
144
  }
115
- function formatTable(result) {
145
+ function formatTable(result, omitted = 0) {
116
146
  const lines = [];
117
147
  const formatted = result.files.map((f) => ({
118
148
  path: f.path,
@@ -123,6 +153,9 @@ function formatTable(result) {
123
153
  for (const f of formatted) {
124
154
  lines.push(`${f.display.padStart(maxWidth)} ${f.path}`);
125
155
  }
156
+ if (omitted > 0) {
157
+ lines.push(`${"".padStart(maxWidth)} ... ${omitted} more files`);
158
+ }
126
159
  lines.push(`${"\u2500".repeat(maxWidth)}\u2500\u2500`);
127
160
  const totalDisplay = formatTokenCount(result.totalTokens);
128
161
  lines.push(`${totalDisplay.padStart(maxWidth)} total (${result.totalFiles} files)`);
@@ -139,11 +172,12 @@ function formatResult(result, options = {}) {
139
172
  sorted.sort((a, b) => b.tokens - a.tokens);
140
173
  }
141
174
  const filtered = top !== void 0 ? sorted.slice(0, top) : sorted;
175
+ const omitted = sorted.length - filtered.length;
142
176
  const adjusted = {
143
177
  ...result,
144
178
  files: filtered
145
179
  };
146
- return json ? JSON.stringify(adjusted, null, 2) : formatTable(adjusted);
180
+ return json ? JSON.stringify(adjusted, null, 2) : formatTable(adjusted, omitted);
147
181
  }
148
182
 
149
183
  // src/main.ts
@@ -203,12 +237,28 @@ async function main() {
203
237
  const encoding = values.encoding;
204
238
  const sort = values.sort;
205
239
  const top = values.top !== void 0 ? Number(values.top) : void 0;
240
+ const isTTY = process.stderr.isTTY;
241
+ function status(msg) {
242
+ if (isTTY) {
243
+ process.stderr.write(`\r\x1B[K${msg}`);
244
+ }
245
+ }
246
+ function clearStatus() {
247
+ if (isTTY) {
248
+ process.stderr.write("\r\x1B[K");
249
+ }
250
+ }
206
251
  try {
207
252
  const files = await listTextFiles(repoPath, {
208
253
  exclude: values.exclude,
209
- noGitignore: !values.gitignore
254
+ noGitignore: !values.gitignore,
255
+ onProgress: (file, i, total) => status(`Scanning [${i}/${total}] ${file}`)
256
+ });
257
+ clearStatus();
258
+ const result = await tokenizeFiles(repoPath, files, encoding, {
259
+ onProgress: (file, i, total) => status(`Tokenizing [${i}/${total}] ${file}`)
210
260
  });
211
- const result = await tokenizeFiles(repoPath, files, encoding);
261
+ clearStatus();
212
262
  const output = formatResult(result, {
213
263
  json: values.json,
214
264
  top,
@@ -0,0 +1,30 @@
1
+ // src/tokenize-worker.ts
2
+ import { parentPort } from "node:worker_threads";
3
+ import { readFile } from "node:fs/promises";
4
+ import { resolve } from "node:path";
5
+ import { get_encoding } from "tiktoken";
6
+ parentPort.on("message", async (msg) => {
7
+ const enc = get_encoding(msg.encoding);
8
+ try {
9
+ const results = [];
10
+ for (const file of msg.files) {
11
+ const fullPath = resolve(msg.repoPath, file);
12
+ const content = await readFile(fullPath, "utf-8");
13
+ const tokens = enc.encode_ordinary(content).length;
14
+ results.push({
15
+ path: file,
16
+ tokens
17
+ });
18
+ parentPort.postMessage({
19
+ type: "progress",
20
+ file
21
+ });
22
+ }
23
+ parentPort.postMessage({
24
+ type: "done",
25
+ results
26
+ });
27
+ } finally {
28
+ enc.free();
29
+ }
30
+ });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@kt3k/tku",
3
- "version": "1.0.8",
3
+ "version": "1.0.10",
4
4
  "description": "",
5
5
  "main": "dist/main.js",
6
6
  "bin": {
@@ -10,8 +10,8 @@
10
10
  "dist"
11
11
  ],
12
12
  "scripts": {
13
- "build": "deno bundle --external tiktoken --external picomatch -o dist/main.js src/main.ts && chmod +x dist/main.js",
14
- "prepublish": "npm run build",
13
+ "build": "deno bundle --external tiktoken --external picomatch -o dist/main.js src/main.ts && deno bundle --external tiktoken -o dist/tokenize-worker.js src/tokenize-worker.ts && chmod +x dist/main.js",
14
+ "prepublishOnly": "npm run build",
15
15
  "test": "vitest run"
16
16
  },
17
17
  "keywords": [],