@kt3k/tku 1.0.9 → 1.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -75,35 +75,61 @@ async function listTextFiles(repoPath, options = {}) {
75
75
  }
76
76
 
77
77
  // src/tokenize.ts
78
- import { readFile } from "node:fs/promises";
79
78
  import { resolve as resolve2 } from "node:path";
79
+ import { availableParallelism } from "node:os";
80
+ import { Worker } from "node:worker_threads";
80
81
  import { get_encoding } from "tiktoken";
82
+ var workerExt = import.meta.url.endsWith(".ts") ? ".ts" : ".js";
83
+ var workerUrl = new URL(`./tokenize-worker${workerExt}`, import.meta.url);
81
84
  async function tokenizeFiles(repoPath, files, encoding, options = {}) {
82
- const enc = get_encoding(encoding);
83
- try {
84
- const results = [];
85
- let totalTokens = 0;
86
- for (let i = 0; i < files.length; i++) {
87
- const file = files[i];
88
- options.onProgress?.(file, i + 1, files.length);
89
- const fullPath = resolve2(repoPath, file);
90
- const content = await readFile(fullPath, "utf-8");
91
- const tokens = enc.encode_ordinary(content).length;
92
- results.push({
93
- path: file,
94
- tokens
95
- });
96
- totalTokens += tokens;
97
- }
98
- return {
99
- encoding,
100
- files: results,
101
- totalTokens,
102
- totalFiles: results.length
103
- };
104
- } finally {
105
- enc.free();
85
+ const resolvedRepoPath = resolve2(repoPath);
86
+ const numWorkers = Math.min(availableParallelism(), files.length || 1);
87
+ const chunks = Array.from({
88
+ length: numWorkers
89
+ }, () => []);
90
+ for (let i = 0; i < files.length; i++) {
91
+ chunks[i % numWorkers].push(files[i]);
106
92
  }
93
+ let progressCount = 0;
94
+ const total = files.length;
95
+ const workerPromises = chunks.map((chunk) => {
96
+ if (chunk.length === 0) return Promise.resolve([]);
97
+ return new Promise((resolvePromise, reject) => {
98
+ const worker = new Worker(workerUrl);
99
+ worker.on("message", (msg) => {
100
+ if (msg.type === "progress") {
101
+ progressCount++;
102
+ options.onProgress?.(msg.file, progressCount, total);
103
+ } else if (msg.type === "done") {
104
+ resolvePromise(msg.results);
105
+ worker.terminate();
106
+ }
107
+ });
108
+ worker.on("error", (err) => {
109
+ reject(err);
110
+ worker.terminate();
111
+ });
112
+ worker.postMessage({
113
+ repoPath: resolvedRepoPath,
114
+ files: chunk,
115
+ encoding
116
+ });
117
+ });
118
+ });
119
+ const chunkResults = await Promise.all(workerPromises);
120
+ const results = chunkResults.flat();
121
+ const orderMap = new Map(files.map((f, i) => [
122
+ f,
123
+ i
124
+ ]));
125
+ results.sort((a, b) => orderMap.get(a.path) - orderMap.get(b.path));
126
+ const totalTokens = results.reduce((sum, f) => sum + f.tokens, 0);
127
+ return {
128
+ encoding,
129
+ files: results,
130
+ totalTokens,
131
+ totalFiles: results.length
132
+ };
107
133
  }
108
134
 
109
135
  // src/format.ts
@@ -214,7 +240,9 @@ async function main() {
214
240
  const isTTY = process.stderr.isTTY;
215
241
  function status(msg) {
216
242
  if (isTTY) {
217
- process.stderr.write(`\r\x1B[K${msg}`);
243
+ const cols = process.stderr.columns || 80;
244
+ const truncated = msg.length > cols ? msg.slice(0, cols) : msg;
245
+ process.stderr.write(`\r\x1B[K${truncated}`);
218
246
  }
219
247
  }
220
248
  function clearStatus() {
@@ -0,0 +1,30 @@
1
+ // src/tokenize-worker.ts
2
+ import { parentPort } from "node:worker_threads";
3
+ import { readFile } from "node:fs/promises";
4
+ import { resolve } from "node:path";
5
+ import { get_encoding } from "tiktoken";
6
+ parentPort.on("message", async (msg) => {
7
+ const enc = get_encoding(msg.encoding);
8
+ try {
9
+ const results = [];
10
+ for (const file of msg.files) {
11
+ const fullPath = resolve(msg.repoPath, file);
12
+ const content = await readFile(fullPath, "utf-8");
13
+ const tokens = enc.encode_ordinary(content).length;
14
+ results.push({
15
+ path: file,
16
+ tokens
17
+ });
18
+ parentPort.postMessage({
19
+ type: "progress",
20
+ file
21
+ });
22
+ }
23
+ parentPort.postMessage({
24
+ type: "done",
25
+ results
26
+ });
27
+ } finally {
28
+ enc.free();
29
+ }
30
+ });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@kt3k/tku",
3
- "version": "1.0.9",
3
+ "version": "1.0.11",
4
4
  "description": "",
5
5
  "main": "dist/main.js",
6
6
  "bin": {
@@ -10,8 +10,8 @@
10
10
  "dist"
11
11
  ],
12
12
  "scripts": {
13
- "build": "deno bundle --external tiktoken --external picomatch -o dist/main.js src/main.ts && chmod +x dist/main.js",
14
- "prepublish": "npm run build",
13
+ "build": "deno bundle --external tiktoken --external picomatch -o dist/main.js src/main.ts && deno bundle --external tiktoken -o dist/tokenize-worker.js src/tokenize-worker.ts && chmod +x dist/main.js",
14
+ "prepublishOnly": "npm run build",
15
15
  "test": "vitest run"
16
16
  },
17
17
  "keywords": [],