tokenfill 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin.d.ts +2 -0
- package/dist/bin.js +6 -0
- package/dist/cli.d.ts +9 -0
- package/dist/cli.js +65 -0
- package/dist/corpus/001-archaeoastronomy.md +479 -0
- package/dist/corpus/002-magnetohydrodynamics.md +475 -0
- package/dist/corpus/003-biosemiotics.md +483 -0
- package/dist/corpus/004-cryopedology.md +483 -0
- package/dist/corpus/005-geomicrobiology.md +479 -0
- package/dist/corpus/006-aeronomy.md +487 -0
- package/dist/corpus/007-paleoclimatology.md +479 -0
- package/dist/corpus/008-hydrogeophysics.md +479 -0
- package/dist/corpus/009-magnetostratigraphy.md +475 -0
- package/dist/corpus/010-isotope-hydrology.md +481 -0
- package/dist/corpus/011-speleothem-geochemistry.md +474 -0
- package/dist/corpus/012-astrobiogeochemistry.md +475 -0
- package/dist/corpus/013-neuroethology.md +483 -0
- package/dist/corpus/014-chronophysiology.md +483 -0
- package/dist/corpus/015-limnogeochemistry.md +475 -0
- package/dist/corpus/016-palynology.md +483 -0
- package/dist/corpus/017-volcanotectonics.md +473 -0
- package/dist/corpus/018-seismotectonics.md +473 -0
- package/dist/corpus/019-biogeomorphology.md +475 -0
- package/dist/corpus/020-geobiophysics.md +479 -0
- package/dist/corpus/021-phytolith-analysis.md +481 -0
- package/dist/corpus/022-archaeometallurgy.md +479 -0
- package/dist/corpus/023-paleomagnetism.md +479 -0
- package/dist/corpus/024-biocalorimetry.md +475 -0
- package/dist/corpus/025-atmospheric-chemiluminescence.md +473 -0
- package/dist/corpus/026-cryoseismology.md +479 -0
- package/dist/corpus/027-extremophile-radiobiology.md +475 -0
- package/dist/corpus/028-heliophysics.md +479 -0
- package/dist/corpus/029-astroparticle-geophysics.md +474 -0
- package/dist/corpus/030-glaciohydrology.md +479 -0
- package/dist/corpus/031-permafrost-microbiology.md +477 -0
- package/dist/corpus/032-ecoacoustics.md +479 -0
- package/dist/corpus/033-dendroclimatology.md +473 -0
- package/dist/corpus/034-ionospheric-tomography.md +477 -0
- package/dist/corpus/035-marine-geodesy.md +481 -0
- package/dist/corpus/036-sedimentary-ancient-dna.md +481 -0
- package/dist/corpus/037-myrmecochory-dynamics.md +474 -0
- package/dist/corpus/038-chemosensory-ecology.md +477 -0
- package/dist/corpus/039-spintronics-materials.md +479 -0
- package/dist/corpus/040-nanotoxicology.md +483 -0
- package/dist/corpus/041-cosmochemistry.md +483 -0
- package/dist/corpus/042-quaternary-geochronology.md +471 -0
- package/dist/corpus/043-biophotonics.md +479 -0
- package/dist/corpus/044-evolutionary-morphometrics.md +481 -0
- package/dist/corpus/045-cryovolcanology.md +475 -0
- package/dist/corpus/046-exoplanet-atmospheric-dynamics.md +479 -0
- package/dist/corpus/047-microbial-electrosynthesis.md +477 -0
- package/dist/corpus/048-paleoseismology.md +479 -0
- package/dist/corpus/049-actinide-geochemistry.md +477 -0
- package/dist/corpus/050-quantum-biology.md +489 -0
- package/dist/corpus.d.ts +2 -0
- package/dist/corpus.js +19 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.js +2 -0
- package/dist/tokenfill.d.ts +9 -0
- package/dist/tokenfill.js +34 -0
- package/dist/tokenizer.d.ts +14 -0
- package/dist/tokenizer.js +31 -0
- package/package.json +27 -0
package/dist/bin.d.ts
ADDED
package/dist/bin.js
ADDED
package/dist/cli.d.ts
ADDED
package/dist/cli.js
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import { Command, CommanderError, InvalidArgumentError } from "commander";
|
|
2
|
+
import { tokenfill } from "./tokenfill.js";
|
|
3
|
+
import { DEFAULT_ENCODING } from "./tokenizer.js";
|
|
4
|
+
const defaultOutput = {
|
|
5
|
+
stdout: process.stdout,
|
|
6
|
+
stderr: process.stderr
|
|
7
|
+
};
|
|
8
|
+
function parseTokenCount(value) {
|
|
9
|
+
const tokenCount = Number(value);
|
|
10
|
+
if (!Number.isInteger(tokenCount) || tokenCount < 0) {
|
|
11
|
+
throw new InvalidArgumentError("count must be a non-negative integer");
|
|
12
|
+
}
|
|
13
|
+
return tokenCount;
|
|
14
|
+
}
|
|
15
|
+
function createTokenfillProgram(output) {
|
|
16
|
+
const program = new Command();
|
|
17
|
+
program
|
|
18
|
+
.name("tokenfill")
|
|
19
|
+
.description("Generate deterministic text with exact token counts")
|
|
20
|
+
.argument("<count>", "Number of tokens to generate", parseTokenCount)
|
|
21
|
+
.option("--json", "Output structured JSON to stdout")
|
|
22
|
+
.option("--tokenizer <encoding>", `Tokenizer encoding (default: ${DEFAULT_ENCODING})`)
|
|
23
|
+
.action((count, options) => {
|
|
24
|
+
const encoding = (options.tokenizer ?? DEFAULT_ENCODING);
|
|
25
|
+
const result = tokenfill(count, { encoding });
|
|
26
|
+
if (options.json) {
|
|
27
|
+
output.stdout.write(`${JSON.stringify({
|
|
28
|
+
text: result.text,
|
|
29
|
+
stats: {
|
|
30
|
+
requestedTokens: count,
|
|
31
|
+
actualTokens: result.actualTokens,
|
|
32
|
+
encoding
|
|
33
|
+
}
|
|
34
|
+
})}\n`);
|
|
35
|
+
return;
|
|
36
|
+
}
|
|
37
|
+
output.stdout.write(result.text);
|
|
38
|
+
output.stderr.write(`Generated ${result.actualTokens} tokens using ${encoding}\n`);
|
|
39
|
+
});
|
|
40
|
+
program.configureOutput({
|
|
41
|
+
writeOut: value => {
|
|
42
|
+
output.stdout.write(value);
|
|
43
|
+
},
|
|
44
|
+
writeErr: value => {
|
|
45
|
+
output.stderr.write(value);
|
|
46
|
+
}
|
|
47
|
+
});
|
|
48
|
+
return program;
|
|
49
|
+
}
|
|
50
|
+
export async function runCli(args = process.argv.slice(2), output = defaultOutput) {
|
|
51
|
+
const program = createTokenfillProgram(output);
|
|
52
|
+
program.exitOverride();
|
|
53
|
+
try {
|
|
54
|
+
await program.parseAsync(args, { from: "user" });
|
|
55
|
+
return 0;
|
|
56
|
+
}
|
|
57
|
+
catch (error) {
|
|
58
|
+
if (error instanceof CommanderError) {
|
|
59
|
+
return error.exitCode;
|
|
60
|
+
}
|
|
61
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
62
|
+
output.stderr.write(`Error: ${message}\n`);
|
|
63
|
+
return 1;
|
|
64
|
+
}
|
|
65
|
+
}
|