@kt3k/tku 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +81 -0
  3. package/dist/main.js +218 -0
  4. package/package.json +29 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright 2026 Yoshiya Hinosawa ( @kt3k )
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,81 @@
1
+ # @kt3k/tku — Git Repository Token Counter
2
+
3
+ ## Overview
4
+
5
+ `@kt3k/tku` is a CLI tool that counts the total number of tokens in a git
6
+ repository. It uses [tiktoken](https://www.npmjs.com/package/tiktoken) to
7
+ tokenize file contents and reports the token count per file and in total.
8
+
9
+ ## Motivation
10
+
11
+ When working with LLMs, knowing the token size of a codebase helps estimate
12
+ context window usage, cost, and whether a repository (or subset) fits within
13
+ model limits. `tku` provides a quick, accurate measurement directly from the
14
+ command line.
15
+
16
+ ## Usage
17
+
18
+ ```sh
19
+ npx @kt3k/tku [options] [path]
20
+ ```
21
+
22
+ - `path` — Path to a git repository (defaults to current directory).
23
+
24
+ ### Options
25
+
26
+ | Flag | Description | Default |
27
+ | ------------------------- | ---------------------------------------------------- | -------------------- |
28
+ | `--encoding <encoding>` | Tiktoken encoding (e.g. `o200k_base`, `cl100k_base`) | `o200k_base` |
29
+ | `-e, --exclude <glob...>` | Additional glob patterns to exclude | none |
30
+ | `--no-gitignore` | Do not respect `.gitignore` rules | respect `.gitignore` |
31
+ | `--json` | Output results as JSON | false |
32
+ | `--top <n>` | Show only the top N files by token count | show all |
33
+ | `--sort <field>` | Sort by `tokens` or `path` | `tokens` |
34
+
35
+ ### Examples
36
+
37
+ ```sh
38
+ # Count tokens in the current repo
39
+ npx @kt3k/tku
40
+
41
+ # Count tokens using cl100k_base encoding, show top 20 files
42
+ npx @kt3k/tku --encoding cl100k_base --top 20
43
+
44
+ # JSON output for scripting
45
+ npx @kt3k/tku --json
46
+
47
+ # Exclude test files
48
+ npx @kt3k/tku --exclude "**/*.test.*" --exclude "**/fixtures/**"
49
+ ```
50
+
51
+ ## Output
52
+
53
+ ### Default (human-readable)
54
+
55
+ ```
56
+ tokens path
57
+ 1.2 K src/index.ts
58
+ 892 src/utils.ts
59
+ 345 README.md
60
+ ──────
61
+ 2.4 K total (3 files)
62
+ ```
63
+
64
+ ### JSON (`--json`)
65
+
66
+ ```json
67
+ {
68
+ "encoding": "o200k_base",
69
+ "files": [
70
+ { "path": "src/index.ts", "tokens": 1204 },
71
+ { "path": "src/utils.ts", "tokens": 892 },
72
+ { "path": "README.md", "tokens": 345 }
73
+ ],
74
+ "totalTokens": 2441,
75
+ "totalFiles": 3
76
+ }
77
+ ```
78
+
79
+ ## License
80
+
81
+ MIT
package/dist/main.js ADDED
@@ -0,0 +1,218 @@
1
+ // src/main.ts
2
+ import process from "node:process";
3
+ import { parseArgs } from "node:util";
4
+
5
+ // src/files.ts
6
+ import { resolve } from "node:path";
7
+ import { open } from "node:fs/promises";
8
+ import { execFile } from "node:child_process";
9
+ import { promisify } from "node:util";
10
+ var execFileAsync = promisify(execFile);
11
+ async function listFiles(repoPath, options = {}) {
12
+ const cwd = resolve(repoPath);
13
+ const args = [
14
+ "ls-files"
15
+ ];
16
+ if (options.noGitignore) {
17
+ args.push("--cached", "--others", "--exclude-standard");
18
+ }
19
+ let stdout;
20
+ try {
21
+ const result = await execFileAsync("git", args, {
22
+ cwd
23
+ });
24
+ stdout = result.stdout;
25
+ } catch (e) {
26
+ const msg = e.stderr?.trim() ?? String(e);
27
+ throw new Error(`Not a git repository or git error: ${msg}`);
28
+ }
29
+ let files = stdout.trim().split("\n").filter((f) => f.length > 0);
30
+ if (options.exclude && options.exclude.length > 0) {
31
+ const patterns = options.exclude.map((g) => new URLPattern({
32
+ pathname: g
33
+ }));
34
+ files = files.filter((f) => !patterns.some((p) => p.test({
35
+ pathname: f
36
+ })));
37
+ }
38
+ return files;
39
+ }
40
+ async function isBinary(filePath) {
41
+ const handle = await open(filePath, "r");
42
+ try {
43
+ const buf = new Uint8Array(8192);
44
+ const { bytesRead } = await handle.read(buf);
45
+ if (bytesRead === null || bytesRead === 0) return false;
46
+ for (let i = 0; i < bytesRead; i++) {
47
+ if (buf[i] === 0) return true;
48
+ }
49
+ return false;
50
+ } finally {
51
+ await handle.close();
52
+ }
53
+ }
54
+ async function listTextFiles(repoPath, options = {}) {
55
+ const cwd = resolve(repoPath);
56
+ const files = await listFiles(repoPath, options);
57
+ const results = [];
58
+ for (const file of files) {
59
+ const fullPath = resolve(cwd, file);
60
+ try {
61
+ if (!await isBinary(fullPath)) {
62
+ results.push(file);
63
+ }
64
+ } catch {
65
+ }
66
+ }
67
+ return results;
68
+ }
69
+
70
+ // src/tokenize.ts
71
+ import { readFile } from "node:fs/promises";
72
+ import { resolve as resolve2 } from "node:path";
73
+ import { get_encoding } from "tiktoken";
74
+ async function tokenizeFiles(repoPath, files, encoding) {
75
+ const enc = get_encoding(encoding);
76
+ try {
77
+ const results = [];
78
+ let totalTokens = 0;
79
+ for (const file of files) {
80
+ const fullPath = resolve2(repoPath, file);
81
+ const content = await readFile(fullPath, "utf-8");
82
+ const tokens = enc.encode_ordinary(content).length;
83
+ results.push({
84
+ path: file,
85
+ tokens
86
+ });
87
+ totalTokens += tokens;
88
+ }
89
+ return {
90
+ encoding,
91
+ files: results,
92
+ totalTokens,
93
+ totalFiles: results.length
94
+ };
95
+ } finally {
96
+ enc.free();
97
+ }
98
+ }
99
+
100
+ // src/format.ts
101
+ function formatTokenCount(n) {
102
+ if (n >= 1e6) {
103
+ return `${(n / 1e6).toFixed(1)} M`;
104
+ }
105
+ if (n >= 1e3) {
106
+ return `${(n / 1e3).toFixed(1)} K`;
107
+ }
108
+ return String(n);
109
+ }
110
+ function formatTable(result) {
111
+ const lines = [];
112
+ const formatted = result.files.map((f) => ({
113
+ path: f.path,
114
+ display: formatTokenCount(f.tokens)
115
+ }));
116
+ const maxWidth = Math.max("tokens".length, ...formatted.map((f) => f.display.length));
117
+ lines.push(`${"tokens".padStart(maxWidth)} path`);
118
+ for (const f of formatted) {
119
+ lines.push(`${f.display.padStart(maxWidth)} ${f.path}`);
120
+ }
121
+ lines.push(`${"\u2500".repeat(maxWidth)}\u2500\u2500`);
122
+ const totalDisplay = formatTokenCount(result.totalTokens);
123
+ lines.push(`${totalDisplay.padStart(maxWidth)} total (${result.totalFiles} files)`);
124
+ return lines.join("\n");
125
+ }
126
+ function formatResult(result, options = {}) {
127
+ const { json = false, top, sort = "tokens" } = options;
128
+ const sorted = [
129
+ ...result.files
130
+ ];
131
+ if (sort === "path") {
132
+ sorted.sort((a, b) => a.path.localeCompare(b.path));
133
+ } else {
134
+ sorted.sort((a, b) => b.tokens - a.tokens);
135
+ }
136
+ const filtered = top !== void 0 ? sorted.slice(0, top) : sorted;
137
+ const adjusted = {
138
+ ...result,
139
+ files: filtered
140
+ };
141
+ return json ? JSON.stringify(adjusted, null, 2) : formatTable(adjusted);
142
+ }
143
+
144
+ // src/main.ts
145
+ function printUsage() {
146
+ console.log(`Usage: tku [options] [path]
147
+
148
+ Options:
149
+ --encoding <encoding> Tiktoken encoding (default: o200k_base)
150
+ -e, --exclude <glob...> Glob patterns to exclude (repeatable)
151
+ --no-gitignore Do not respect .gitignore rules
152
+ --json Output results as JSON
153
+ --top <n> Show only the top N files by token count
154
+ --sort <field> Sort by "tokens" or "path" (default: tokens)
155
+ -h, --help Show this help message`);
156
+ }
157
+ async function main() {
158
+ const { values, positionals } = parseArgs({
159
+ args: process.argv.slice(2),
160
+ options: {
161
+ encoding: {
162
+ type: "string",
163
+ default: "o200k_base"
164
+ },
165
+ exclude: {
166
+ type: "string",
167
+ short: "e",
168
+ multiple: true
169
+ },
170
+ gitignore: {
171
+ type: "boolean",
172
+ default: true
173
+ },
174
+ json: {
175
+ type: "boolean",
176
+ default: false
177
+ },
178
+ top: {
179
+ type: "string"
180
+ },
181
+ sort: {
182
+ type: "string",
183
+ default: "tokens"
184
+ },
185
+ help: {
186
+ type: "boolean",
187
+ short: "h",
188
+ default: false
189
+ }
190
+ },
191
+ allowPositionals: true
192
+ });
193
+ if (values.help) {
194
+ printUsage();
195
+ process.exit(0);
196
+ }
197
+ const repoPath = positionals[0] ?? ".";
198
+ const encoding = values.encoding;
199
+ const sort = values.sort;
200
+ const top = values.top !== void 0 ? Number(values.top) : void 0;
201
+ try {
202
+ const files = await listTextFiles(repoPath, {
203
+ exclude: values.exclude,
204
+ noGitignore: !values.gitignore
205
+ });
206
+ const result = await tokenizeFiles(repoPath, files, encoding);
207
+ const output = formatResult(result, {
208
+ json: values.json,
209
+ top,
210
+ sort
211
+ });
212
+ console.log(output);
213
+ } catch (e) {
214
+ console.error(e instanceof Error ? e.message : String(e));
215
+ process.exit(1);
216
+ }
217
+ }
218
+ main();
package/package.json ADDED
@@ -0,0 +1,29 @@
1
+ {
2
+ "name": "@kt3k/tku",
3
+ "version": "1.0.0",
4
+ "description": "",
5
+ "main": "dist/main.js",
6
+ "bin": {
7
+ "tku": "dist/main.js"
8
+ },
9
+ "files": [
10
+ "dist"
11
+ ],
12
+ "scripts": {
13
+ "build": "deno bundle --external tiktoken -o dist/main.js src/main.ts",
14
+ "prepublishOnly": "npm run build",
15
+ "test": "vitest run"
16
+ },
17
+ "keywords": [],
18
+ "author": "Yoshiya Hinosawa",
19
+ "license": "MIT",
20
+ "type": "module",
21
+ "dependencies": {
22
+ "tiktoken": "^1"
23
+ },
24
+ "devDependencies": {
25
+ "@types/node": "^25",
26
+ "string-dedent": "^3",
27
+ "vitest": "^3"
28
+ }
29
+ }