@forwardimpact/libutil 0.1.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,123 @@
1
+ import { test, describe, beforeEach } from "node:test";
2
+ import assert from "node:assert";
3
+
4
+ // Module under test
5
+ import { Tokenizer, ranks } from "../tokenizer.js";
6
+ import { countTokens, createTokenizer } from "../index.js";
7
+
8
+ describe("Tokenizer", () => {
9
+ describe("constructor", () => {
10
+ test("creates instance with ranks parameter", () => {
11
+ const tokenizer = new Tokenizer(ranks);
12
+ assert.ok(tokenizer instanceof Tokenizer);
13
+ });
14
+
15
+ test("creates instance without ranks parameter", () => {
16
+ const tokenizer = new Tokenizer();
17
+ assert.ok(tokenizer instanceof Tokenizer);
18
+ });
19
+ });
20
+
21
+ describe("encode", () => {
22
+ let tokenizer;
23
+
24
+ beforeEach(() => {
25
+ tokenizer = new Tokenizer(ranks);
26
+ });
27
+
28
+ test("handles empty string", () => {
29
+ const result = tokenizer.encode("");
30
+ assert.strictEqual(result.length, 0);
31
+ });
32
+
33
+ test("handles non-string input", () => {
34
+ const result = tokenizer.encode(null);
35
+ assert.strictEqual(result.length, 0);
36
+ });
37
+
38
+ test("encodes simple word", () => {
39
+ const result = tokenizer.encode("hello");
40
+ assert.ok(result.length >= 1);
41
+ assert.ok(Array.isArray(result));
42
+ });
43
+
44
+ test("encodes longer text", () => {
45
+ const shortText = "hello";
46
+ const longText = "hello world this is a longer piece of text";
47
+
48
+ const shortResult = tokenizer.encode(shortText);
49
+ const longResult = tokenizer.encode(longText);
50
+
51
+ assert.ok(longResult.length > shortResult.length);
52
+ });
53
+
54
+ test("handles whitespace-only text", () => {
55
+ const result = tokenizer.encode(" ");
56
+ assert.strictEqual(result.length, 0);
57
+ });
58
+
59
+ test("handles punctuation", () => {
60
+ const result = tokenizer.encode("Hello, world!");
61
+ assert.ok(result.length >= 3); // At least hello + comma + world + exclamation
62
+ });
63
+
64
+ test("handles mixed content", () => {
65
+ const result = tokenizer.encode("The year 2024 was great!");
66
+ assert.ok(result.length >= 5); // Multiple words and punctuation
67
+ });
68
+
69
+ test("provides reasonable approximation", () => {
70
+ // Test that the approximation is in a reasonable range
71
+ const text = "This is a test sentence with about ten words here.";
72
+ const result = tokenizer.encode(text);
73
+
74
+ // Should be roughly 10-15 tokens for this sentence
75
+ assert.ok(result.length >= 8);
76
+ assert.ok(result.length <= 20);
77
+ });
78
+ });
79
+
80
+ describe("decode", () => {
81
+ test("throws error when called", () => {
82
+ const tokenizer = new Tokenizer(ranks);
83
+ assert.throws(() => tokenizer.decode([1, 2, 3]), {
84
+ message: /decode\(\) not implemented/,
85
+ });
86
+ });
87
+ });
88
+ });
89
+
90
+ describe("Integration with libutil functions", () => {
91
+ describe("tokenizerFactory", () => {
92
+ test("creates Tokenizer instance", () => {
93
+ const tokenizer = createTokenizer();
94
+ assert.ok(tokenizer instanceof Tokenizer);
95
+ });
96
+ });
97
+
98
+ describe("countTokens", () => {
99
+ test("returns token count for text", () => {
100
+ const count = countTokens("hello world");
101
+ assert.ok(typeof count === "number");
102
+ assert.ok(count >= 1);
103
+ });
104
+
105
+ test("handles empty text", () => {
106
+ const count = countTokens("");
107
+ assert.strictEqual(count, 0);
108
+ });
109
+
110
+ test("uses provided tokenizer", () => {
111
+ const customTokenizer = new Tokenizer(ranks);
112
+ const count = countTokens("test", customTokenizer);
113
+ assert.ok(typeof count === "number");
114
+ assert.ok(count >= 1);
115
+ });
116
+
117
+ test("uses default tokenizer when none provided", () => {
118
+ const count = countTokens("test");
119
+ assert.ok(typeof count === "number");
120
+ assert.ok(count >= 1);
121
+ });
122
+ });
123
+ });
@@ -0,0 +1,89 @@
1
+ import { describe, test } from "node:test";
2
+ import assert from "node:assert";
3
+
4
+ import { waitFor } from "../wait.js";
5
+
6
+ describe("waitFor", () => {
7
+ test("resolves immediately when condition is true", async () => {
8
+ let callCount = 0;
9
+ await waitFor(() => {
10
+ callCount++;
11
+ return Promise.resolve(true);
12
+ });
13
+
14
+ assert.strictEqual(callCount, 1);
15
+ });
16
+
17
+ test("retries until condition becomes true", async () => {
18
+ let callCount = 0;
19
+ await waitFor(
20
+ () => {
21
+ callCount++;
22
+ return Promise.resolve(callCount >= 3);
23
+ },
24
+ { interval: 10, timeout: 5000 },
25
+ );
26
+
27
+ assert.strictEqual(callCount, 3);
28
+ });
29
+
30
+ test("throws error on timeout", async () => {
31
+ await assert.rejects(
32
+ () =>
33
+ waitFor(() => Promise.resolve(false), {
34
+ timeout: 50,
35
+ interval: 10,
36
+ }),
37
+ { message: "Timeout waiting for condition after 50ms" },
38
+ );
39
+ });
40
+
41
+ test("ignores errors during polling", async () => {
42
+ let callCount = 0;
43
+ await waitFor(
44
+ () => {
45
+ callCount++;
46
+ if (callCount < 3) {
47
+ throw new Error("Service not ready");
48
+ }
49
+ return Promise.resolve(true);
50
+ },
51
+ { interval: 10, timeout: 5000 },
52
+ );
53
+
54
+ assert.strictEqual(callCount, 3);
55
+ });
56
+
57
+ test("uses default options when not provided", async () => {
58
+ // Just verify it doesn't throw with defaults
59
+ let called = false;
60
+ await waitFor(() => {
61
+ called = true;
62
+ return Promise.resolve(true);
63
+ });
64
+
65
+ assert.strictEqual(called, true);
66
+ });
67
+
68
+ test("increases interval with exponential backoff", async () => {
69
+ const intervals = [];
70
+ let lastTime = Date.now();
71
+ let callCount = 0;
72
+
73
+ await waitFor(
74
+ () => {
75
+ const now = Date.now();
76
+ if (callCount > 0) {
77
+ intervals.push(now - lastTime);
78
+ }
79
+ lastTime = now;
80
+ callCount++;
81
+ return Promise.resolve(callCount >= 4);
82
+ },
83
+ { interval: 20, maxInterval: 100, timeout: 5000 },
84
+ );
85
+
86
+ // Intervals should generally increase (with some tolerance for timing)
87
+ assert.ok(intervals.length >= 2);
88
+ });
89
+ });
package/tokenizer.js ADDED
@@ -0,0 +1,89 @@
1
+ /**
2
+ * Simple tokenizer class that provides API compatibility with js-tiktoken
3
+ * Uses basic approximation logic for token counting
4
+ */
5
+ export class Tokenizer {
6
+ /**
7
+ * Creates a new Tokenizer instance
8
+ * @param {object} _ranks - Ranking data (unused in simple implementation)
9
+ */
10
+ constructor(_ranks) {
11
+ // Ranks parameter is ignored in this simple implementation
12
+ }
13
+
14
+ /**
15
+ * Encodes text into tokens using simple approximation
16
+ * @param {string} text - Text to encode
17
+ * @returns {number[]} Array of token IDs (approximated)
18
+ */
19
+ encode(text) {
20
+ if (typeof text !== "string") {
21
+ return [];
22
+ }
23
+
24
+ if (text.length === 0) {
25
+ return [];
26
+ }
27
+
28
+ // Simple approximation logic:
29
+ // 1. Split on whitespace and punctuation
30
+ // 2. Count characters in a way that approximates GPT tokenization
31
+ // 3. Return array with length approximating actual token count
32
+
33
+ // Remove extra whitespace and normalize
34
+ const normalized = text.trim().replace(/\s+/g, " ");
35
+
36
+ if (normalized.length === 0) {
37
+ return [];
38
+ }
39
+
40
+ // Simple heuristic for token counting:
41
+ // - Average English word is ~4 characters = 1 token
42
+ // - Punctuation and special chars often = 1 token each
43
+ // - Numbers and code can be more dense
44
+
45
+ let tokenCount = 0;
46
+
47
+ // Count words (sequences of letters/numbers)
48
+ const words = normalized.match(/\b\w+\b/g) || [];
49
+ for (const word of words) {
50
+ // Short words (1-4 chars) = 1 token
51
+ // Longer words = roughly chars/4 tokens
52
+ if (word.length <= 4) {
53
+ tokenCount += 1;
54
+ } else {
55
+ tokenCount += Math.ceil(word.length / 4);
56
+ }
57
+ }
58
+
59
+ // Count punctuation and special characters
60
+ const punctuation = normalized.match(/[^\w\s]/g) || [];
61
+ tokenCount += punctuation.length;
62
+
63
+ // Count whitespace as minimal tokens (spaces between words)
64
+ const spaces = normalized.match(/\s/g) || [];
65
+ tokenCount += Math.ceil(spaces.length / 2);
66
+
67
+ // Ensure minimum of 1 token for non-empty text
68
+ tokenCount = Math.max(1, tokenCount);
69
+
70
+ // Return array with dummy token IDs
71
+ // The actual values don't matter since only .length is used
72
+ return new Array(tokenCount).fill(0).map((_, i) => i);
73
+ }
74
+
75
+ /**
76
+ * Decodes tokens back to text (not implemented - not used in codebase)
77
+ * @param {number[]} _tokens - Token IDs to decode
78
+ * @throws {Error} Always throws - not implemented
79
+ */
80
+ decode(_tokens) {
81
+ throw new Error("decode() not implemented in Tokenizer");
82
+ }
83
+ }
84
+
85
+ /**
86
+ * Dummy ranks object for compatibility
87
+ * Not used in the simple implementation but needed for API compatibility
88
+ */
89
+ export const ranks = {};
package/wait.js ADDED
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Poll until a condition returns true with exponential backoff
3
+ * @param {() => Promise<boolean>} checkFn - Function that returns true when ready
4
+ * @param {object} [options] - Configuration options
5
+ * @param {number} [options.timeout] - Maximum time to wait in ms
6
+ * @param {number} [options.interval] - Initial polling interval in ms
7
+ * @param {number} [options.maxInterval] - Maximum polling interval in ms
8
+ * @returns {Promise<void>}
9
+ * @throws {Error} When timeout is reached
10
+ */
11
+ export async function waitFor(checkFn, options = {}) {
12
+ const { timeout = 30000, interval = 1000, maxInterval = 10000 } = options;
13
+ const startTime = Date.now();
14
+ let currentInterval = interval;
15
+
16
+ while (Date.now() - startTime < timeout) {
17
+ try {
18
+ if (await checkFn()) return;
19
+ } catch {
20
+ // Ignore errors during polling - service may not be up yet
21
+ }
22
+
23
+ await new Promise((resolve) => setTimeout(resolve, currentInterval));
24
+ currentInterval = Math.min(currentInterval * 1.5, maxInterval);
25
+ }
26
+
27
+ throw new Error(`Timeout waiting for condition after ${timeout}ms`);
28
+ }