@kt3k/tku 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +81 -0
- package/dist/main.js +218 -0
- package/package.json +29 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License
|
|
2
|
+
|
|
3
|
+
Copyright 2026 Yoshiya Hinosawa ( @kt3k )
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# @kt3k/tku — Git Repository Token Counter
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
`@kt3k/tku` is a CLI tool that counts the total number of tokens in a git
|
|
6
|
+
repository. It uses [tiktoken](https://www.npmjs.com/package/tiktoken) to
|
|
7
|
+
tokenize file contents and reports the token count per file and in total.
|
|
8
|
+
|
|
9
|
+
## Motivation
|
|
10
|
+
|
|
11
|
+
When working with LLMs, knowing the token size of a codebase helps estimate
|
|
12
|
+
context window usage, cost, and whether a repository (or subset) fits within
|
|
13
|
+
model limits. `tku` provides a quick, accurate measurement directly from the
|
|
14
|
+
command line.
|
|
15
|
+
|
|
16
|
+
## Usage
|
|
17
|
+
|
|
18
|
+
```sh
|
|
19
|
+
npx @kt3k/tku [options] [path]
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
- `path` — Path to a git repository (defaults to current directory).
|
|
23
|
+
|
|
24
|
+
### Options
|
|
25
|
+
|
|
26
|
+
| Flag | Description | Default |
|
|
27
|
+
| ------------------------- | ---------------------------------------------------- | -------------------- |
|
|
28
|
+
| `--encoding <encoding>` | Tiktoken encoding (e.g. `o200k_base`, `cl100k_base`) | `o200k_base` |
|
|
29
|
+
| `-e, --exclude <glob...>` | Additional glob patterns to exclude | none |
|
|
30
|
+
| `--no-gitignore` | Do not respect `.gitignore` rules | respect `.gitignore` |
|
|
31
|
+
| `--json` | Output results as JSON | false |
|
|
32
|
+
| `--top <n>` | Show only the top N files by token count | show all |
|
|
33
|
+
| `--sort <field>` | Sort by `tokens` or `path` | `tokens` |
|
|
34
|
+
|
|
35
|
+
### Examples
|
|
36
|
+
|
|
37
|
+
```sh
|
|
38
|
+
# Count tokens in the current repo
|
|
39
|
+
npx @kt3k/tku
|
|
40
|
+
|
|
41
|
+
# Count tokens using cl100k_base encoding, show top 20 files
|
|
42
|
+
npx @kt3k/tku --encoding cl100k_base --top 20
|
|
43
|
+
|
|
44
|
+
# JSON output for scripting
|
|
45
|
+
npx @kt3k/tku --json
|
|
46
|
+
|
|
47
|
+
# Exclude test files
|
|
48
|
+
npx @kt3k/tku --exclude "**/*.test.*" --exclude "**/fixtures/**"
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Output
|
|
52
|
+
|
|
53
|
+
### Default (human-readable)
|
|
54
|
+
|
|
55
|
+
```
|
|
56
|
+
tokens path
|
|
57
|
+
1.2 K src/index.ts
|
|
58
|
+
892 src/utils.ts
|
|
59
|
+
345 README.md
|
|
60
|
+
──────
|
|
61
|
+
2.4 K total (3 files)
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
### JSON (`--json`)
|
|
65
|
+
|
|
66
|
+
```json
|
|
67
|
+
{
|
|
68
|
+
"encoding": "o200k_base",
|
|
69
|
+
"files": [
|
|
70
|
+
{ "path": "src/index.ts", "tokens": 1204 },
|
|
71
|
+
{ "path": "src/utils.ts", "tokens": 892 },
|
|
72
|
+
{ "path": "README.md", "tokens": 345 }
|
|
73
|
+
],
|
|
74
|
+
"totalTokens": 2441,
|
|
75
|
+
"totalFiles": 3
|
|
76
|
+
}
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## License
|
|
80
|
+
|
|
81
|
+
MIT
|
package/dist/main.js
ADDED
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
// src/main.ts
|
|
2
|
+
import process from "node:process";
|
|
3
|
+
import { parseArgs } from "node:util";
|
|
4
|
+
|
|
5
|
+
// src/files.ts
|
|
6
|
+
import { resolve } from "node:path";
|
|
7
|
+
import { open } from "node:fs/promises";
|
|
8
|
+
import { execFile } from "node:child_process";
|
|
9
|
+
import { promisify } from "node:util";
|
|
10
|
+
var execFileAsync = promisify(execFile);
|
|
11
|
+
async function listFiles(repoPath, options = {}) {
|
|
12
|
+
const cwd = resolve(repoPath);
|
|
13
|
+
const args = [
|
|
14
|
+
"ls-files"
|
|
15
|
+
];
|
|
16
|
+
if (options.noGitignore) {
|
|
17
|
+
args.push("--cached", "--others", "--exclude-standard");
|
|
18
|
+
}
|
|
19
|
+
let stdout;
|
|
20
|
+
try {
|
|
21
|
+
const result = await execFileAsync("git", args, {
|
|
22
|
+
cwd
|
|
23
|
+
});
|
|
24
|
+
stdout = result.stdout;
|
|
25
|
+
} catch (e) {
|
|
26
|
+
const msg = e.stderr?.trim() ?? String(e);
|
|
27
|
+
throw new Error(`Not a git repository or git error: ${msg}`);
|
|
28
|
+
}
|
|
29
|
+
let files = stdout.trim().split("\n").filter((f) => f.length > 0);
|
|
30
|
+
if (options.exclude && options.exclude.length > 0) {
|
|
31
|
+
const patterns = options.exclude.map((g) => new URLPattern({
|
|
32
|
+
pathname: g
|
|
33
|
+
}));
|
|
34
|
+
files = files.filter((f) => !patterns.some((p) => p.test({
|
|
35
|
+
pathname: f
|
|
36
|
+
})));
|
|
37
|
+
}
|
|
38
|
+
return files;
|
|
39
|
+
}
|
|
40
|
+
async function isBinary(filePath) {
|
|
41
|
+
const handle = await open(filePath, "r");
|
|
42
|
+
try {
|
|
43
|
+
const buf = new Uint8Array(8192);
|
|
44
|
+
const { bytesRead } = await handle.read(buf);
|
|
45
|
+
if (bytesRead === null || bytesRead === 0) return false;
|
|
46
|
+
for (let i = 0; i < bytesRead; i++) {
|
|
47
|
+
if (buf[i] === 0) return true;
|
|
48
|
+
}
|
|
49
|
+
return false;
|
|
50
|
+
} finally {
|
|
51
|
+
await handle.close();
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
async function listTextFiles(repoPath, options = {}) {
|
|
55
|
+
const cwd = resolve(repoPath);
|
|
56
|
+
const files = await listFiles(repoPath, options);
|
|
57
|
+
const results = [];
|
|
58
|
+
for (const file of files) {
|
|
59
|
+
const fullPath = resolve(cwd, file);
|
|
60
|
+
try {
|
|
61
|
+
if (!await isBinary(fullPath)) {
|
|
62
|
+
results.push(file);
|
|
63
|
+
}
|
|
64
|
+
} catch {
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
return results;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// src/tokenize.ts
|
|
71
|
+
import { readFile } from "node:fs/promises";
|
|
72
|
+
import { resolve as resolve2 } from "node:path";
|
|
73
|
+
import { get_encoding } from "tiktoken";
|
|
74
|
+
async function tokenizeFiles(repoPath, files, encoding) {
|
|
75
|
+
const enc = get_encoding(encoding);
|
|
76
|
+
try {
|
|
77
|
+
const results = [];
|
|
78
|
+
let totalTokens = 0;
|
|
79
|
+
for (const file of files) {
|
|
80
|
+
const fullPath = resolve2(repoPath, file);
|
|
81
|
+
const content = await readFile(fullPath, "utf-8");
|
|
82
|
+
const tokens = enc.encode_ordinary(content).length;
|
|
83
|
+
results.push({
|
|
84
|
+
path: file,
|
|
85
|
+
tokens
|
|
86
|
+
});
|
|
87
|
+
totalTokens += tokens;
|
|
88
|
+
}
|
|
89
|
+
return {
|
|
90
|
+
encoding,
|
|
91
|
+
files: results,
|
|
92
|
+
totalTokens,
|
|
93
|
+
totalFiles: results.length
|
|
94
|
+
};
|
|
95
|
+
} finally {
|
|
96
|
+
enc.free();
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// src/format.ts
|
|
101
|
+
function formatTokenCount(n) {
|
|
102
|
+
if (n >= 1e6) {
|
|
103
|
+
return `${(n / 1e6).toFixed(1)} M`;
|
|
104
|
+
}
|
|
105
|
+
if (n >= 1e3) {
|
|
106
|
+
return `${(n / 1e3).toFixed(1)} K`;
|
|
107
|
+
}
|
|
108
|
+
return String(n);
|
|
109
|
+
}
|
|
110
|
+
function formatTable(result) {
|
|
111
|
+
const lines = [];
|
|
112
|
+
const formatted = result.files.map((f) => ({
|
|
113
|
+
path: f.path,
|
|
114
|
+
display: formatTokenCount(f.tokens)
|
|
115
|
+
}));
|
|
116
|
+
const maxWidth = Math.max("tokens".length, ...formatted.map((f) => f.display.length));
|
|
117
|
+
lines.push(`${"tokens".padStart(maxWidth)} path`);
|
|
118
|
+
for (const f of formatted) {
|
|
119
|
+
lines.push(`${f.display.padStart(maxWidth)} ${f.path}`);
|
|
120
|
+
}
|
|
121
|
+
lines.push(`${"\u2500".repeat(maxWidth)}\u2500\u2500`);
|
|
122
|
+
const totalDisplay = formatTokenCount(result.totalTokens);
|
|
123
|
+
lines.push(`${totalDisplay.padStart(maxWidth)} total (${result.totalFiles} files)`);
|
|
124
|
+
return lines.join("\n");
|
|
125
|
+
}
|
|
126
|
+
function formatResult(result, options = {}) {
|
|
127
|
+
const { json = false, top, sort = "tokens" } = options;
|
|
128
|
+
const sorted = [
|
|
129
|
+
...result.files
|
|
130
|
+
];
|
|
131
|
+
if (sort === "path") {
|
|
132
|
+
sorted.sort((a, b) => a.path.localeCompare(b.path));
|
|
133
|
+
} else {
|
|
134
|
+
sorted.sort((a, b) => b.tokens - a.tokens);
|
|
135
|
+
}
|
|
136
|
+
const filtered = top !== void 0 ? sorted.slice(0, top) : sorted;
|
|
137
|
+
const adjusted = {
|
|
138
|
+
...result,
|
|
139
|
+
files: filtered
|
|
140
|
+
};
|
|
141
|
+
return json ? JSON.stringify(adjusted, null, 2) : formatTable(adjusted);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// src/main.ts
|
|
145
|
+
function printUsage() {
|
|
146
|
+
console.log(`Usage: tku [options] [path]
|
|
147
|
+
|
|
148
|
+
Options:
|
|
149
|
+
--encoding <encoding> Tiktoken encoding (default: o200k_base)
|
|
150
|
+
-e, --exclude <glob...> Glob patterns to exclude (repeatable)
|
|
151
|
+
--no-gitignore Do not respect .gitignore rules
|
|
152
|
+
--json Output results as JSON
|
|
153
|
+
--top <n> Show only the top N files by token count
|
|
154
|
+
--sort <field> Sort by "tokens" or "path" (default: tokens)
|
|
155
|
+
-h, --help Show this help message`);
|
|
156
|
+
}
|
|
157
|
+
async function main() {
|
|
158
|
+
const { values, positionals } = parseArgs({
|
|
159
|
+
args: process.argv.slice(2),
|
|
160
|
+
options: {
|
|
161
|
+
encoding: {
|
|
162
|
+
type: "string",
|
|
163
|
+
default: "o200k_base"
|
|
164
|
+
},
|
|
165
|
+
exclude: {
|
|
166
|
+
type: "string",
|
|
167
|
+
short: "e",
|
|
168
|
+
multiple: true
|
|
169
|
+
},
|
|
170
|
+
gitignore: {
|
|
171
|
+
type: "boolean",
|
|
172
|
+
default: true
|
|
173
|
+
},
|
|
174
|
+
json: {
|
|
175
|
+
type: "boolean",
|
|
176
|
+
default: false
|
|
177
|
+
},
|
|
178
|
+
top: {
|
|
179
|
+
type: "string"
|
|
180
|
+
},
|
|
181
|
+
sort: {
|
|
182
|
+
type: "string",
|
|
183
|
+
default: "tokens"
|
|
184
|
+
},
|
|
185
|
+
help: {
|
|
186
|
+
type: "boolean",
|
|
187
|
+
short: "h",
|
|
188
|
+
default: false
|
|
189
|
+
}
|
|
190
|
+
},
|
|
191
|
+
allowPositionals: true
|
|
192
|
+
});
|
|
193
|
+
if (values.help) {
|
|
194
|
+
printUsage();
|
|
195
|
+
process.exit(0);
|
|
196
|
+
}
|
|
197
|
+
const repoPath = positionals[0] ?? ".";
|
|
198
|
+
const encoding = values.encoding;
|
|
199
|
+
const sort = values.sort;
|
|
200
|
+
const top = values.top !== void 0 ? Number(values.top) : void 0;
|
|
201
|
+
try {
|
|
202
|
+
const files = await listTextFiles(repoPath, {
|
|
203
|
+
exclude: values.exclude,
|
|
204
|
+
noGitignore: !values.gitignore
|
|
205
|
+
});
|
|
206
|
+
const result = await tokenizeFiles(repoPath, files, encoding);
|
|
207
|
+
const output = formatResult(result, {
|
|
208
|
+
json: values.json,
|
|
209
|
+
top,
|
|
210
|
+
sort
|
|
211
|
+
});
|
|
212
|
+
console.log(output);
|
|
213
|
+
} catch (e) {
|
|
214
|
+
console.error(e instanceof Error ? e.message : String(e));
|
|
215
|
+
process.exit(1);
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
main();
|
package/package.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@kt3k/tku",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "",
|
|
5
|
+
"main": "dist/main.js",
|
|
6
|
+
"bin": {
|
|
7
|
+
"tku": "dist/main.js"
|
|
8
|
+
},
|
|
9
|
+
"files": [
|
|
10
|
+
"dist"
|
|
11
|
+
],
|
|
12
|
+
"scripts": {
|
|
13
|
+
"build": "deno bundle --external tiktoken -o dist/main.js src/main.ts",
|
|
14
|
+
"prepublishOnly": "npm run build",
|
|
15
|
+
"test": "vitest run"
|
|
16
|
+
},
|
|
17
|
+
"keywords": [],
|
|
18
|
+
"author": "Yoshiya Hinosawa",
|
|
19
|
+
"license": "MIT",
|
|
20
|
+
"type": "module",
|
|
21
|
+
"dependencies": {
|
|
22
|
+
"tiktoken": "^1"
|
|
23
|
+
},
|
|
24
|
+
"devDependencies": {
|
|
25
|
+
"@types/node": "^25",
|
|
26
|
+
"string-dedent": "^3",
|
|
27
|
+
"vitest": "^3"
|
|
28
|
+
}
|
|
29
|
+
}
|