@soulcraft/brainy 6.4.0 → 6.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/assets/models/all-MiniLM-L6-v2-q8/config.json +25 -0
  2. package/assets/models/all-MiniLM-L6-v2-q8/model.onnx +0 -0
  3. package/assets/models/all-MiniLM-L6-v2-q8/tokenizer.json +30686 -0
  4. package/assets/models/all-MiniLM-L6-v2-q8/vocab.json +1 -0
  5. package/dist/critical/model-guardian.d.ts +5 -22
  6. package/dist/critical/model-guardian.js +38 -210
  7. package/dist/embeddings/EmbeddingManager.d.ts +7 -17
  8. package/dist/embeddings/EmbeddingManager.js +28 -136
  9. package/dist/embeddings/wasm/AssetLoader.d.ts +67 -0
  10. package/dist/embeddings/wasm/AssetLoader.js +238 -0
  11. package/dist/embeddings/wasm/EmbeddingPostProcessor.d.ts +60 -0
  12. package/dist/embeddings/wasm/EmbeddingPostProcessor.js +123 -0
  13. package/dist/embeddings/wasm/ONNXInferenceEngine.d.ts +55 -0
  14. package/dist/embeddings/wasm/ONNXInferenceEngine.js +154 -0
  15. package/dist/embeddings/wasm/WASMEmbeddingEngine.d.ts +82 -0
  16. package/dist/embeddings/wasm/WASMEmbeddingEngine.js +231 -0
  17. package/dist/embeddings/wasm/WordPieceTokenizer.d.ts +71 -0
  18. package/dist/embeddings/wasm/WordPieceTokenizer.js +264 -0
  19. package/dist/embeddings/wasm/index.d.ts +13 -0
  20. package/dist/embeddings/wasm/index.js +15 -0
  21. package/dist/embeddings/wasm/types.d.ts +114 -0
  22. package/dist/embeddings/wasm/types.js +25 -0
  23. package/dist/setup.d.ts +11 -11
  24. package/dist/setup.js +17 -31
  25. package/dist/utils/embedding.d.ts +45 -62
  26. package/dist/utils/embedding.js +61 -440
  27. package/dist/vfs/VirtualFileSystem.d.ts +14 -0
  28. package/dist/vfs/VirtualFileSystem.js +56 -6
  29. package/package.json +10 -3
  30. package/scripts/download-model.cjs +175 -0
@@ -0,0 +1,71 @@
1
+ /**
2
+ * WordPiece Tokenizer for BERT-based models
3
+ *
4
+ * Implements the WordPiece tokenization algorithm used by all-MiniLM-L6-v2.
5
+ * This is a clean, dependency-free implementation.
6
+ *
7
+ * Algorithm:
8
+ * 1. Normalize text (lowercase for uncased models)
9
+ * 2. Split on whitespace and punctuation
10
+ * 3. Apply WordPiece subword tokenization
11
+ * 4. Add special tokens ([CLS], [SEP])
12
+ * 5. Generate attention mask
13
+ */
14
+ import { TokenizerConfig, TokenizedInput } from './types.js';
15
+ /**
16
+ * WordPiece tokenizer for BERT-based sentence transformers
17
+ */
18
+ export declare class WordPieceTokenizer {
19
+ private vocab;
20
+ private reverseVocab;
21
+ private config;
22
+ constructor(vocab: Map<string, number> | Record<string, number>, config?: Partial<TokenizerConfig>);
23
+ /**
24
+ * Tokenize text into token IDs
25
+ */
26
+ encode(text: string): TokenizedInput;
27
+ /**
28
+ * Encode with padding to fixed length
29
+ */
30
+ encodeWithPadding(text: string, targetLength?: number): TokenizedInput;
31
+ /**
32
+ * Batch encode multiple texts
33
+ */
34
+ encodeBatch(texts: string[]): {
35
+ inputIds: number[][];
36
+ attentionMask: number[][];
37
+ tokenTypeIds: number[][];
38
+ };
39
+ /**
40
+ * Basic tokenization: split on whitespace and punctuation
41
+ */
42
+ private basicTokenize;
43
+ /**
44
+ * WordPiece tokenization for a single word
45
+ */
46
+ private wordPieceTokenize;
47
+ /**
48
+ * Check if character is whitespace
49
+ */
50
+ private isWhitespace;
51
+ /**
52
+ * Check if character is punctuation
53
+ */
54
+ private isPunctuation;
55
+ /**
56
+ * Decode token IDs back to text (for debugging)
57
+ */
58
+ decode(tokenIds: number[]): string;
59
+ /**
60
+ * Get vocabulary size
61
+ */
62
+ get vocabSize(): number;
63
+ /**
64
+ * Get max sequence length
65
+ */
66
+ get maxLength(): number;
67
+ }
68
+ /**
69
+ * Create tokenizer from vocabulary JSON
70
+ */
71
+ export declare function createTokenizer(vocabJson: Record<string, number>): WordPieceTokenizer;
@@ -0,0 +1,264 @@
1
+ /**
2
+ * WordPiece Tokenizer for BERT-based models
3
+ *
4
+ * Implements the WordPiece tokenization algorithm used by all-MiniLM-L6-v2.
5
+ * This is a clean, dependency-free implementation.
6
+ *
7
+ * Algorithm:
8
+ * 1. Normalize text (lowercase for uncased models)
9
+ * 2. Split on whitespace and punctuation
10
+ * 3. Apply WordPiece subword tokenization
11
+ * 4. Add special tokens ([CLS], [SEP])
12
+ * 5. Generate attention mask
13
+ */
14
+ import { SPECIAL_TOKENS, MODEL_CONSTANTS, } from './types.js';
15
+ /**
16
+ * WordPiece tokenizer for BERT-based sentence transformers
17
+ */
18
+ export class WordPieceTokenizer {
19
+ constructor(vocab, config) {
20
+ // Convert Record to Map if needed
21
+ this.vocab = vocab instanceof Map ? vocab : new Map(Object.entries(vocab));
22
+ // Build reverse vocab for debugging
23
+ this.reverseVocab = new Map();
24
+ for (const [token, id] of this.vocab) {
25
+ this.reverseVocab.set(id, token);
26
+ }
27
+ // Default config for all-MiniLM-L6-v2
28
+ this.config = {
29
+ vocab: this.vocab,
30
+ unkTokenId: config?.unkTokenId ?? SPECIAL_TOKENS.UNK,
31
+ clsTokenId: config?.clsTokenId ?? SPECIAL_TOKENS.CLS,
32
+ sepTokenId: config?.sepTokenId ?? SPECIAL_TOKENS.SEP,
33
+ padTokenId: config?.padTokenId ?? SPECIAL_TOKENS.PAD,
34
+ maxLength: config?.maxLength ?? MODEL_CONSTANTS.MAX_SEQUENCE_LENGTH,
35
+ doLowerCase: config?.doLowerCase ?? true,
36
+ };
37
+ }
38
+ /**
39
+ * Tokenize text into token IDs
40
+ */
41
+ encode(text) {
42
+ // 1. Normalize
43
+ let normalizedText = text;
44
+ if (this.config.doLowerCase) {
45
+ normalizedText = text.toLowerCase();
46
+ }
47
+ // 2. Clean and split into words
48
+ const words = this.basicTokenize(normalizedText);
49
+ // 3. Apply WordPiece to each word
50
+ const tokens = [this.config.clsTokenId];
51
+ for (const word of words) {
52
+ const wordTokens = this.wordPieceTokenize(word);
53
+ // Check if adding these tokens would exceed max length (accounting for [SEP])
54
+ if (tokens.length + wordTokens.length + 1 > this.config.maxLength) {
55
+ break;
56
+ }
57
+ tokens.push(...wordTokens);
58
+ }
59
+ tokens.push(this.config.sepTokenId);
60
+ // 4. Generate attention mask and token type IDs
61
+ const attentionMask = new Array(tokens.length).fill(1);
62
+ const tokenTypeIds = new Array(tokens.length).fill(0);
63
+ return {
64
+ inputIds: tokens,
65
+ attentionMask,
66
+ tokenTypeIds,
67
+ tokenCount: tokens.length - 2, // Exclude [CLS] and [SEP]
68
+ };
69
+ }
70
+ /**
71
+ * Encode with padding to fixed length
72
+ */
73
+ encodeWithPadding(text, targetLength) {
74
+ const result = this.encode(text);
75
+ const padLength = targetLength ?? this.config.maxLength;
76
+ // Pad to target length
77
+ while (result.inputIds.length < padLength) {
78
+ result.inputIds.push(this.config.padTokenId);
79
+ result.attentionMask.push(0);
80
+ result.tokenTypeIds.push(0);
81
+ }
82
+ // Truncate if longer (shouldn't happen with proper encode())
83
+ if (result.inputIds.length > padLength) {
84
+ result.inputIds.length = padLength;
85
+ result.attentionMask.length = padLength;
86
+ result.tokenTypeIds.length = padLength;
87
+ // Ensure [SEP] is at the end
88
+ result.inputIds[padLength - 1] = this.config.sepTokenId;
89
+ result.attentionMask[padLength - 1] = 1;
90
+ }
91
+ return result;
92
+ }
93
+ /**
94
+ * Batch encode multiple texts
95
+ */
96
+ encodeBatch(texts) {
97
+ const results = texts.map((text) => this.encode(text));
98
+ // Find max length in batch
99
+ const maxLen = Math.max(...results.map((r) => r.inputIds.length));
100
+ // Pad all to same length
101
+ const inputIds = [];
102
+ const attentionMask = [];
103
+ const tokenTypeIds = [];
104
+ for (const result of results) {
105
+ const padded = this.encodeWithPadding('', // Not used since we're modifying result
106
+ maxLen);
107
+ // Copy original values
108
+ for (let i = 0; i < result.inputIds.length; i++) {
109
+ padded.inputIds[i] = result.inputIds[i];
110
+ padded.attentionMask[i] = result.attentionMask[i];
111
+ padded.tokenTypeIds[i] = result.tokenTypeIds[i];
112
+ }
113
+ // Pad the rest
114
+ for (let i = result.inputIds.length; i < maxLen; i++) {
115
+ padded.inputIds[i] = this.config.padTokenId;
116
+ padded.attentionMask[i] = 0;
117
+ padded.tokenTypeIds[i] = 0;
118
+ }
119
+ inputIds.push(padded.inputIds.slice(0, maxLen));
120
+ attentionMask.push(padded.attentionMask.slice(0, maxLen));
121
+ tokenTypeIds.push(padded.tokenTypeIds.slice(0, maxLen));
122
+ }
123
+ return { inputIds, attentionMask, tokenTypeIds };
124
+ }
125
+ /**
126
+ * Basic tokenization: split on whitespace and punctuation
127
+ */
128
+ basicTokenize(text) {
129
+ // Clean whitespace
130
+ text = text.trim().replace(/\s+/g, ' ');
131
+ if (!text) {
132
+ return [];
133
+ }
134
+ const words = [];
135
+ let currentWord = '';
136
+ for (const char of text) {
137
+ if (this.isWhitespace(char)) {
138
+ if (currentWord) {
139
+ words.push(currentWord);
140
+ currentWord = '';
141
+ }
142
+ }
143
+ else if (this.isPunctuation(char)) {
144
+ if (currentWord) {
145
+ words.push(currentWord);
146
+ currentWord = '';
147
+ }
148
+ words.push(char);
149
+ }
150
+ else {
151
+ currentWord += char;
152
+ }
153
+ }
154
+ if (currentWord) {
155
+ words.push(currentWord);
156
+ }
157
+ return words;
158
+ }
159
+ /**
160
+ * WordPiece tokenization for a single word
161
+ */
162
+ wordPieceTokenize(word) {
163
+ if (!word) {
164
+ return [];
165
+ }
166
+ // Check if whole word is in vocabulary
167
+ if (this.vocab.has(word)) {
168
+ return [this.vocab.get(word)];
169
+ }
170
+ const tokens = [];
171
+ let start = 0;
172
+ while (start < word.length) {
173
+ let end = word.length;
174
+ let foundToken = false;
175
+ while (start < end) {
176
+ let substr = word.slice(start, end);
177
+ // Add ## prefix for subwords (not at start of word)
178
+ if (start > 0) {
179
+ substr = '##' + substr;
180
+ }
181
+ if (this.vocab.has(substr)) {
182
+ tokens.push(this.vocab.get(substr));
183
+ foundToken = true;
184
+ break;
185
+ }
186
+ end--;
187
+ }
188
+ if (!foundToken) {
189
+ // Unknown character - use [UNK] for single character
190
+ tokens.push(this.config.unkTokenId);
191
+ start++;
192
+ }
193
+ else {
194
+ start = end;
195
+ }
196
+ }
197
+ return tokens;
198
+ }
199
+ /**
200
+ * Check if character is whitespace
201
+ */
202
+ isWhitespace(char) {
203
+ return /\s/.test(char);
204
+ }
205
+ /**
206
+ * Check if character is punctuation
207
+ */
208
+ isPunctuation(char) {
209
+ const code = char.charCodeAt(0);
210
+ // ASCII punctuation ranges
211
+ if ((code >= 33 && code <= 47) || // !"#$%&'()*+,-./
212
+ (code >= 58 && code <= 64) || // :;<=>?@
213
+ (code >= 91 && code <= 96) || // [\]^_`
214
+ (code >= 123 && code <= 126) // {|}~
215
+ ) {
216
+ return true;
217
+ }
218
+ // Unicode punctuation categories
219
+ return /[\u2000-\u206F\u2E00-\u2E7F\\'!"#$%&()*+,\-./:;<=>?@\[\]^_`{|}~]/.test(char);
220
+ }
221
+ /**
222
+ * Decode token IDs back to text (for debugging)
223
+ */
224
+ decode(tokenIds) {
225
+ const tokens = [];
226
+ for (const id of tokenIds) {
227
+ const token = this.reverseVocab.get(id);
228
+ if (token && !['[CLS]', '[SEP]', '[PAD]'].includes(token)) {
229
+ if (token.startsWith('##')) {
230
+ // Subword - append without space
231
+ if (tokens.length > 0) {
232
+ tokens[tokens.length - 1] += token.slice(2);
233
+ }
234
+ else {
235
+ tokens.push(token.slice(2));
236
+ }
237
+ }
238
+ else {
239
+ tokens.push(token);
240
+ }
241
+ }
242
+ }
243
+ return tokens.join(' ');
244
+ }
245
+ /**
246
+ * Get vocabulary size
247
+ */
248
+ get vocabSize() {
249
+ return this.vocab.size;
250
+ }
251
+ /**
252
+ * Get max sequence length
253
+ */
254
+ get maxLength() {
255
+ return this.config.maxLength;
256
+ }
257
+ }
258
+ /**
259
+ * Create tokenizer from vocabulary JSON
260
+ */
261
+ export function createTokenizer(vocabJson) {
262
+ return new WordPieceTokenizer(vocabJson);
263
+ }
264
+ //# sourceMappingURL=WordPieceTokenizer.js.map
@@ -0,0 +1,13 @@
1
+ /**
2
+ * WASM Embedding Engine - Public Exports
3
+ *
4
+ * Clean, production-grade embedding engine using direct ONNX WASM.
5
+ * No transformers.js dependency, no runtime downloads, works everywhere.
6
+ */
7
+ export { WASMEmbeddingEngine, wasmEmbeddingEngine, embed, embedBatch, getEmbeddingStats, } from './WASMEmbeddingEngine.js';
8
+ export { WordPieceTokenizer, createTokenizer } from './WordPieceTokenizer.js';
9
+ export { ONNXInferenceEngine, createInferenceEngine } from './ONNXInferenceEngine.js';
10
+ export { EmbeddingPostProcessor, createPostProcessor } from './EmbeddingPostProcessor.js';
11
+ export { AssetLoader, getAssetLoader, createAssetLoader } from './AssetLoader.js';
12
+ export type { TokenizerConfig, TokenizedInput, InferenceConfig, EmbeddingResult, EngineStats, ModelConfig, } from './types.js';
13
+ export { SPECIAL_TOKENS, MODEL_CONSTANTS } from './types.js';
@@ -0,0 +1,15 @@
1
+ /**
2
+ * WASM Embedding Engine - Public Exports
3
+ *
4
+ * Clean, production-grade embedding engine using direct ONNX WASM.
5
+ * No transformers.js dependency, no runtime downloads, works everywhere.
6
+ */
7
+ // Main engine
8
+ export { WASMEmbeddingEngine, wasmEmbeddingEngine, embed, embedBatch, getEmbeddingStats, } from './WASMEmbeddingEngine.js';
9
+ // Components (for advanced use)
10
+ export { WordPieceTokenizer, createTokenizer } from './WordPieceTokenizer.js';
11
+ export { ONNXInferenceEngine, createInferenceEngine } from './ONNXInferenceEngine.js';
12
+ export { EmbeddingPostProcessor, createPostProcessor } from './EmbeddingPostProcessor.js';
13
+ export { AssetLoader, getAssetLoader, createAssetLoader } from './AssetLoader.js';
14
+ export { SPECIAL_TOKENS, MODEL_CONSTANTS } from './types.js';
15
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1,114 @@
1
+ /**
2
+ * Type definitions for WASM Embedding Engine
3
+ *
4
+ * Clean, production-grade types for direct ONNX WASM embeddings.
5
+ */
6
+ /**
7
+ * Tokenizer configuration for WordPiece
8
+ */
9
+ export interface TokenizerConfig {
10
+ /** Vocabulary mapping word → token ID */
11
+ vocab: Map<string, number>;
12
+ /** [UNK] token ID (100 for BERT-based models) */
13
+ unkTokenId: number;
14
+ /** [CLS] token ID (101 for BERT-based models) */
15
+ clsTokenId: number;
16
+ /** [SEP] token ID (102 for BERT-based models) */
17
+ sepTokenId: number;
18
+ /** [PAD] token ID (0 for BERT-based models) */
19
+ padTokenId: number;
20
+ /** Maximum sequence length (512 for all-MiniLM-L6-v2) */
21
+ maxLength: number;
22
+ /** Whether to lowercase input (true for uncased models) */
23
+ doLowerCase: boolean;
24
+ }
25
+ /**
26
+ * Result of tokenization
27
+ */
28
+ export interface TokenizedInput {
29
+ /** Token IDs including [CLS] and [SEP] */
30
+ inputIds: number[];
31
+ /** Attention mask (1 for real tokens, 0 for padding) */
32
+ attentionMask: number[];
33
+ /** Token type IDs (all 0 for single sentence) */
34
+ tokenTypeIds: number[];
35
+ /** Number of tokens (excluding special tokens) */
36
+ tokenCount: number;
37
+ }
38
+ /**
39
+ * ONNX inference engine configuration
40
+ */
41
+ export interface InferenceConfig {
42
+ /** Path to ONNX model file */
43
+ modelPath: string;
44
+ /** Path to WASM files directory */
45
+ wasmPath?: string;
46
+ /** Number of threads (1 for universal compatibility) */
47
+ numThreads: number;
48
+ /** Enable SIMD if available */
49
+ enableSimd: boolean;
50
+ /** Enable CPU memory arena (false for memory efficiency) */
51
+ enableCpuMemArena: boolean;
52
+ }
53
+ /**
54
+ * Embedding result with metadata
55
+ */
56
+ export interface EmbeddingResult {
57
+ /** 384-dimensional embedding vector */
58
+ embedding: number[];
59
+ /** Number of tokens processed */
60
+ tokenCount: number;
61
+ /** Processing time in milliseconds */
62
+ processingTimeMs: number;
63
+ }
64
+ /**
65
+ * Engine statistics
66
+ */
67
+ export interface EngineStats {
68
+ /** Whether the engine is initialized */
69
+ initialized: boolean;
70
+ /** Total number of embeddings generated */
71
+ embedCount: number;
72
+ /** Total processing time in milliseconds */
73
+ totalProcessingTimeMs: number;
74
+ /** Average processing time per embedding */
75
+ avgProcessingTimeMs: number;
76
+ /** Model name */
77
+ modelName: string;
78
+ }
79
+ /**
80
+ * Model configuration (from config.json)
81
+ */
82
+ export interface ModelConfig {
83
+ /** Model architecture type */
84
+ architectures: string[];
85
+ /** Hidden size (384 for all-MiniLM-L6-v2) */
86
+ hidden_size: number;
87
+ /** Number of attention heads */
88
+ num_attention_heads: number;
89
+ /** Number of hidden layers */
90
+ num_hidden_layers: number;
91
+ /** Vocabulary size */
92
+ vocab_size: number;
93
+ /** Maximum position embeddings */
94
+ max_position_embeddings: number;
95
+ }
96
+ /**
97
+ * Special token IDs for BERT-based models
98
+ */
99
+ export declare const SPECIAL_TOKENS: {
100
+ readonly PAD: 0;
101
+ readonly UNK: 100;
102
+ readonly CLS: 101;
103
+ readonly SEP: 102;
104
+ readonly MASK: 103;
105
+ };
106
+ /**
107
+ * Model constants for all-MiniLM-L6-v2
108
+ */
109
+ export declare const MODEL_CONSTANTS: {
110
+ readonly HIDDEN_SIZE: 384;
111
+ readonly MAX_SEQUENCE_LENGTH: 512;
112
+ readonly VOCAB_SIZE: 30522;
113
+ readonly MODEL_NAME: "all-MiniLM-L6-v2";
114
+ };
@@ -0,0 +1,25 @@
1
+ /**
2
+ * Type definitions for WASM Embedding Engine
3
+ *
4
+ * Clean, production-grade types for direct ONNX WASM embeddings.
5
+ */
6
+ /**
7
+ * Special token IDs for BERT-based models
8
+ */
9
+ export const SPECIAL_TOKENS = {
10
+ PAD: 0,
11
+ UNK: 100,
12
+ CLS: 101,
13
+ SEP: 102,
14
+ MASK: 103,
15
+ };
16
+ /**
17
+ * Model constants for all-MiniLM-L6-v2
18
+ */
19
+ export const MODEL_CONSTANTS = {
20
+ HIDDEN_SIZE: 384,
21
+ MAX_SEQUENCE_LENGTH: 512,
22
+ VOCAB_SIZE: 30522,
23
+ MODEL_NAME: 'all-MiniLM-L6-v2',
24
+ };
25
+ //# sourceMappingURL=types.js.map
package/dist/setup.d.ts CHANGED
@@ -1,17 +1,17 @@
1
1
  /**
2
- * CRITICAL: This file is imported for its side effects to patch the environment
3
- * for Node.js compatibility before any other library code runs.
2
+ * Brainy Setup - Minimal Polyfills
4
3
  *
5
- * It ensures that by the time Transformers.js/ONNX Runtime is imported by any other
6
- * module, the necessary compatibility fixes for the current Node.js
7
- * environment are already in place.
4
+ * ARCHITECTURE (v7.0.0):
5
+ * Brainy uses direct ONNX WASM for embeddings.
6
+ * No transformers.js dependency, no hacks required.
8
7
  *
9
- * This file MUST be imported as the first import in unified.ts to prevent
10
- * race conditions with library initialization. Failure to do so may
11
- * result in errors like "TextEncoder is not a constructor" when the package
12
- * is used in Node.js environments.
8
+ * This file provides minimal polyfills for cross-environment compatibility:
9
+ * - TextEncoder/TextDecoder for older environments
13
10
  *
14
- * The package.json file marks this file as having side effects to prevent
15
- * tree-shaking by bundlers, ensuring the patch is always applied.
11
+ * BENEFITS:
12
+ * - Clean codebase with no workarounds
13
+ * - Works everywhere: Node.js, Bun, Bun --compile, browsers, Deno
14
+ * - No platform-specific binaries
15
+ * - Model bundled in package (no runtime downloads)
16
16
  */
17
17
  export {};
package/dist/setup.js CHANGED
@@ -1,45 +1,31 @@
1
1
  /**
2
- * CRITICAL: This file is imported for its side effects to patch the environment
3
- * for Node.js compatibility before any other library code runs.
2
+ * Brainy Setup - Minimal Polyfills
4
3
  *
5
- * It ensures that by the time Transformers.js/ONNX Runtime is imported by any other
6
- * module, the necessary compatibility fixes for the current Node.js
7
- * environment are already in place.
4
+ * ARCHITECTURE (v7.0.0):
5
+ * Brainy uses direct ONNX WASM for embeddings.
6
+ * No transformers.js dependency, no hacks required.
8
7
  *
9
- * This file MUST be imported as the first import in unified.ts to prevent
10
- * race conditions with library initialization. Failure to do so may
11
- * result in errors like "TextEncoder is not a constructor" when the package
12
- * is used in Node.js environments.
8
+ * This file provides minimal polyfills for cross-environment compatibility:
9
+ * - TextEncoder/TextDecoder for older environments
13
10
  *
14
- * The package.json file marks this file as having side effects to prevent
15
- * tree-shaking by bundlers, ensuring the patch is always applied.
11
+ * BENEFITS:
12
+ * - Clean codebase with no workarounds
13
+ * - Works everywhere: Node.js, Bun, Bun --compile, browsers, Deno
14
+ * - No platform-specific binaries
15
+ * - Model bundled in package (no runtime downloads)
16
16
  */
17
- // Get the appropriate global object for the current environment
18
- const globalObj = (() => {
19
- if (typeof globalThis !== 'undefined')
20
- return globalThis;
21
- if (typeof global !== 'undefined')
22
- return global;
23
- if (typeof self !== 'undefined')
24
- return self;
25
- return null; // No global object available
26
- })();
27
- // Define TextEncoder and TextDecoder globally to make sure they're available
28
- // Now works across all environments: Node.js, serverless, and other server environments
17
+ // ============================================================================
18
+ // TextEncoder/TextDecoder Polyfills
19
+ // ============================================================================
20
+ const globalObj = globalThis ?? global ?? self;
29
21
  if (globalObj) {
30
- if (!globalObj.TextEncoder) {
22
+ if (!globalObj.TextEncoder)
31
23
  globalObj.TextEncoder = TextEncoder;
32
- }
33
- if (!globalObj.TextDecoder) {
24
+ if (!globalObj.TextDecoder)
34
25
  globalObj.TextDecoder = TextDecoder;
35
- }
36
- // Create special global constructors for library compatibility
37
26
  globalObj.__TextEncoder__ = TextEncoder;
38
27
  globalObj.__TextDecoder__ = TextDecoder;
39
28
  }
40
- // Also import normally for ES modules environments
41
29
  import { applyTensorFlowPatch } from './utils/textEncoding.js';
42
- // Apply the TextEncoder/TextDecoder compatibility patch
43
30
  applyTensorFlowPatch();
44
- console.log('Applied TextEncoder/TextDecoder patch via ES modules in setup.ts');
45
31
  //# sourceMappingURL=setup.js.map