tokenfill 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +92 -0
- package/package.json +5 -1
package/README.md
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# tokenfill
|
|
2
|
+
|
|
3
|
+
Generate deterministic filler text with exact token counts.
|
|
4
|
+
|
|
5
|
+
`tokenfill` is available as:
|
|
6
|
+
|
|
7
|
+
- A CLI: `tokenfill <count>`
|
|
8
|
+
- A library: `tokenfill(count, options)`
|
|
9
|
+
- A tokenizer utility wrapper: `createTokenizer(options)`
|
|
10
|
+
|
|
11
|
+
## Install
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
npm install tokenfill
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
Run with `npx`:
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
npx tokenfill 256
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## CLI
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
tokenfill <count> [--json] [--tokenizer <encoding>]
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Examples:
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
tokenfill 512 > sample.txt
|
|
33
|
+
tokenfill 128 --json
|
|
34
|
+
tokenfill 256 --tokenizer o200k_base --json
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
Behavior:
|
|
38
|
+
|
|
39
|
+
- `<count>` must be a non-negative integer.
|
|
40
|
+
- Default tokenizer encoding is `cl100k_base`.
|
|
41
|
+
- Without `--json`, generated text is written to `stdout` and stats to `stderr`.
|
|
42
|
+
- With `--json`, output is:
|
|
43
|
+
|
|
44
|
+
```json
|
|
45
|
+
{
|
|
46
|
+
"text": "…",
|
|
47
|
+
"stats": {
|
|
48
|
+
"requestedTokens": 128,
|
|
49
|
+
"actualTokens": 128,
|
|
50
|
+
"encoding": "cl100k_base"
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## Library Usage
|
|
56
|
+
|
|
57
|
+
```ts
|
|
58
|
+
import { tokenfill } from "tokenfill";
|
|
59
|
+
|
|
60
|
+
const result = tokenfill(1024);
|
|
61
|
+
|
|
62
|
+
console.log(result.actualTokens); // 1024
|
|
63
|
+
console.log(result.text.length > 0); // true
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
With an explicit encoding:
|
|
67
|
+
|
|
68
|
+
```ts
|
|
69
|
+
import { tokenfill } from "tokenfill";
|
|
70
|
+
|
|
71
|
+
const result = tokenfill(256, { encoding: "o200k_base" });
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Tokenizer Utility
|
|
75
|
+
|
|
76
|
+
```ts
|
|
77
|
+
import { createTokenizer } from "tokenfill";
|
|
78
|
+
|
|
79
|
+
const tokenizer = createTokenizer({ encoding: "cl100k_base" });
|
|
80
|
+
|
|
81
|
+
const tokens = tokenizer.encode("hello world");
|
|
82
|
+
const text = tokenizer.decode(tokens);
|
|
83
|
+
const count = tokenizer.count(text);
|
|
84
|
+
const truncated = tokenizer.truncate(text, 1);
|
|
85
|
+
|
|
86
|
+
tokenizer.free();
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Notes
|
|
90
|
+
|
|
91
|
+
- Output is deterministic for the same token count and encoding.
|
|
92
|
+
- Requests larger than the built-in corpus size throw an error.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "tokenfill",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.2",
|
|
4
4
|
"private": false,
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -23,5 +23,9 @@
|
|
|
23
23
|
"dependencies": {
|
|
24
24
|
"commander": "^14.0.3",
|
|
25
25
|
"tiktoken": "^1.0.22"
|
|
26
|
+
},
|
|
27
|
+
"repository": {
|
|
28
|
+
"type": "git",
|
|
29
|
+
"url": "git+https://github.com/poe-platform/poe-code.git"
|
|
26
30
|
}
|
|
27
31
|
}
|