edgeflowjs 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +473 -0
- package/dist/backends/index.d.ts +13 -0
- package/dist/backends/index.d.ts.map +1 -0
- package/dist/backends/index.js +32 -0
- package/dist/backends/index.js.map +1 -0
- package/dist/backends/onnx.d.ts +46 -0
- package/dist/backends/onnx.d.ts.map +1 -0
- package/dist/backends/onnx.js +249 -0
- package/dist/backends/onnx.js.map +1 -0
- package/dist/backends/wasm.d.ts +78 -0
- package/dist/backends/wasm.d.ts.map +1 -0
- package/dist/backends/wasm.js +358 -0
- package/dist/backends/wasm.js.map +1 -0
- package/dist/backends/webgpu.d.ts +143 -0
- package/dist/backends/webgpu.d.ts.map +1 -0
- package/dist/backends/webgpu.js +326 -0
- package/dist/backends/webgpu.js.map +1 -0
- package/dist/backends/webnn.d.ts +115 -0
- package/dist/backends/webnn.d.ts.map +1 -0
- package/dist/backends/webnn.js +202 -0
- package/dist/backends/webnn.js.map +1 -0
- package/dist/core/index.d.ts +9 -0
- package/dist/core/index.d.ts.map +1 -0
- package/dist/core/index.js +14 -0
- package/dist/core/index.js.map +1 -0
- package/dist/core/memory.d.ts +234 -0
- package/dist/core/memory.d.ts.map +1 -0
- package/dist/core/memory.js +554 -0
- package/dist/core/memory.js.map +1 -0
- package/dist/core/runtime.d.ts +129 -0
- package/dist/core/runtime.d.ts.map +1 -0
- package/dist/core/runtime.js +352 -0
- package/dist/core/runtime.js.map +1 -0
- package/dist/core/scheduler.d.ts +118 -0
- package/dist/core/scheduler.d.ts.map +1 -0
- package/dist/core/scheduler.js +600 -0
- package/dist/core/scheduler.js.map +1 -0
- package/dist/core/tensor.d.ts +149 -0
- package/dist/core/tensor.d.ts.map +1 -0
- package/dist/core/tensor.js +719 -0
- package/dist/core/tensor.js.map +1 -0
- package/dist/core/types.d.ts +367 -0
- package/dist/core/types.d.ts.map +1 -0
- package/dist/core/types.js +54 -0
- package/dist/core/types.js.map +1 -0
- package/dist/edgeflow.browser.js +5601 -0
- package/dist/edgeflow.browser.js.map +7 -0
- package/dist/edgeflow.browser.min.js +19 -0
- package/dist/edgeflow.browser.min.js.map +7 -0
- package/dist/index.d.ts +71 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +158 -0
- package/dist/index.js.map +1 -0
- package/dist/pipelines/base.d.ts +122 -0
- package/dist/pipelines/base.d.ts.map +1 -0
- package/dist/pipelines/base.js +155 -0
- package/dist/pipelines/base.js.map +1 -0
- package/dist/pipelines/feature-extraction.d.ts +68 -0
- package/dist/pipelines/feature-extraction.d.ts.map +1 -0
- package/dist/pipelines/feature-extraction.js +197 -0
- package/dist/pipelines/feature-extraction.js.map +1 -0
- package/dist/pipelines/image-classification.d.ts +61 -0
- package/dist/pipelines/image-classification.d.ts.map +1 -0
- package/dist/pipelines/image-classification.js +140 -0
- package/dist/pipelines/image-classification.js.map +1 -0
- package/dist/pipelines/index.d.ts +58 -0
- package/dist/pipelines/index.d.ts.map +1 -0
- package/dist/pipelines/index.js +72 -0
- package/dist/pipelines/index.js.map +1 -0
- package/dist/pipelines/text-classification.d.ts +71 -0
- package/dist/pipelines/text-classification.d.ts.map +1 -0
- package/dist/pipelines/text-classification.js +175 -0
- package/dist/pipelines/text-classification.js.map +1 -0
- package/dist/tools/index.d.ts +143 -0
- package/dist/tools/index.d.ts.map +1 -0
- package/dist/tools/index.js +294 -0
- package/dist/tools/index.js.map +1 -0
- package/dist/utils/cache.d.ts +162 -0
- package/dist/utils/cache.d.ts.map +1 -0
- package/dist/utils/cache.js +443 -0
- package/dist/utils/cache.js.map +1 -0
- package/dist/utils/index.d.ts +8 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +12 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/model-loader.d.ts +107 -0
- package/dist/utils/model-loader.d.ts.map +1 -0
- package/dist/utils/model-loader.js +694 -0
- package/dist/utils/model-loader.js.map +1 -0
- package/dist/utils/preprocessor.d.ts +147 -0
- package/dist/utils/preprocessor.d.ts.map +1 -0
- package/dist/utils/preprocessor.js +423 -0
- package/dist/utils/preprocessor.js.map +1 -0
- package/dist/utils/tokenizer.d.ts +140 -0
- package/dist/utils/tokenizer.d.ts.map +1 -0
- package/dist/utils/tokenizer.js +397 -0
- package/dist/utils/tokenizer.js.map +1 -0
- package/package.json +87 -0
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* edgeFlow.js - Tokenizer
|
|
3
|
+
*
|
|
4
|
+
* Lightweight tokenizer implementation for text processing.
|
|
5
|
+
* Supports BPE, WordPiece, and basic tokenization.
|
|
6
|
+
*/
|
|
7
|
+
import { TokenizerConfig, TokenizedOutput } from '../core/types.js';
|
|
8
|
+
/**
|
|
9
|
+
* Tokenizer model types
|
|
10
|
+
*/
|
|
11
|
+
export type TokenizerModel = 'bpe' | 'wordpiece' | 'unigram' | 'basic';
|
|
12
|
+
/**
|
|
13
|
+
* Tokenizer options
|
|
14
|
+
*/
|
|
15
|
+
export interface TokenizerOptions {
|
|
16
|
+
/** Tokenizer model type */
|
|
17
|
+
model?: TokenizerModel;
|
|
18
|
+
/** Vocabulary */
|
|
19
|
+
vocab?: Map<string, number> | Record<string, number>;
|
|
20
|
+
/** Merges for BPE */
|
|
21
|
+
merges?: string[];
|
|
22
|
+
/** Add special tokens */
|
|
23
|
+
addSpecialTokens?: boolean;
|
|
24
|
+
/** Maximum length */
|
|
25
|
+
maxLength?: number;
|
|
26
|
+
/** Padding strategy */
|
|
27
|
+
padding?: 'max_length' | 'longest' | 'do_not_pad';
|
|
28
|
+
/** Truncation */
|
|
29
|
+
truncation?: boolean;
|
|
30
|
+
/** Return attention mask */
|
|
31
|
+
returnAttentionMask?: boolean;
|
|
32
|
+
/** Return token type IDs */
|
|
33
|
+
returnTokenTypeIds?: boolean;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Tokenizer - Base class for all tokenizers
|
|
37
|
+
*/
|
|
38
|
+
export declare class Tokenizer {
|
|
39
|
+
protected vocab: Map<string, number>;
|
|
40
|
+
protected reverseVocab: Map<number, string>;
|
|
41
|
+
protected config: TokenizerConfig;
|
|
42
|
+
protected model: TokenizerModel;
|
|
43
|
+
protected merges: Map<string, string>;
|
|
44
|
+
constructor(config: Partial<TokenizerConfig>, options?: TokenizerOptions);
|
|
45
|
+
/**
|
|
46
|
+
* Load vocabulary
|
|
47
|
+
*/
|
|
48
|
+
protected loadVocab(vocab: Map<string, number> | Record<string, number>): void;
|
|
49
|
+
/**
|
|
50
|
+
* Load BPE merges
|
|
51
|
+
*/
|
|
52
|
+
protected loadMerges(merges: string[]): void;
|
|
53
|
+
/**
|
|
54
|
+
* Tokenize text
|
|
55
|
+
*/
|
|
56
|
+
encode(text: string, options?: {
|
|
57
|
+
addSpecialTokens?: boolean;
|
|
58
|
+
maxLength?: number;
|
|
59
|
+
padding?: 'max_length' | 'longest' | 'do_not_pad';
|
|
60
|
+
truncation?: boolean;
|
|
61
|
+
returnAttentionMask?: boolean;
|
|
62
|
+
returnTokenTypeIds?: boolean;
|
|
63
|
+
}): TokenizedOutput;
|
|
64
|
+
/**
|
|
65
|
+
* Batch encode
|
|
66
|
+
*/
|
|
67
|
+
encodeBatch(texts: string[], options?: {
|
|
68
|
+
addSpecialTokens?: boolean;
|
|
69
|
+
maxLength?: number;
|
|
70
|
+
padding?: 'max_length' | 'longest' | 'do_not_pad';
|
|
71
|
+
truncation?: boolean;
|
|
72
|
+
returnAttentionMask?: boolean;
|
|
73
|
+
returnTokenTypeIds?: boolean;
|
|
74
|
+
}): TokenizedOutput[];
|
|
75
|
+
/**
|
|
76
|
+
* Decode token IDs back to text
|
|
77
|
+
*/
|
|
78
|
+
decode(ids: number[], skipSpecialTokens?: boolean): string;
|
|
79
|
+
/**
|
|
80
|
+
* Basic tokenization (split by whitespace and punctuation)
|
|
81
|
+
*/
|
|
82
|
+
protected tokenize(text: string): string[];
|
|
83
|
+
/**
|
|
84
|
+
* Normalize text
|
|
85
|
+
*/
|
|
86
|
+
protected normalize(text: string): string;
|
|
87
|
+
/**
|
|
88
|
+
* Basic tokenization
|
|
89
|
+
*/
|
|
90
|
+
protected tokenizeBasic(text: string): string[];
|
|
91
|
+
/**
|
|
92
|
+
* WordPiece tokenization
|
|
93
|
+
*/
|
|
94
|
+
protected tokenizeWordPiece(text: string): string[];
|
|
95
|
+
/**
|
|
96
|
+
* Tokenize a single word using WordPiece
|
|
97
|
+
*/
|
|
98
|
+
protected tokenizeWord(word: string): string[];
|
|
99
|
+
/**
|
|
100
|
+
* BPE tokenization
|
|
101
|
+
*/
|
|
102
|
+
protected tokenizeBPE(text: string): string[];
|
|
103
|
+
/**
|
|
104
|
+
* Add special tokens
|
|
105
|
+
*/
|
|
106
|
+
protected addSpecialTokens(tokens: string[]): string[];
|
|
107
|
+
/**
|
|
108
|
+
* Convert tokens to IDs
|
|
109
|
+
*/
|
|
110
|
+
protected convertTokensToIds(tokens: string[]): number[];
|
|
111
|
+
/**
|
|
112
|
+
* Convert IDs to tokens
|
|
113
|
+
*/
|
|
114
|
+
protected convertIdsToTokens(ids: number[]): string[];
|
|
115
|
+
/**
|
|
116
|
+
* Check if token is a special token
|
|
117
|
+
*/
|
|
118
|
+
protected isSpecialToken(token: string): boolean;
|
|
119
|
+
/**
|
|
120
|
+
* Detokenize (convert tokens back to text)
|
|
121
|
+
*/
|
|
122
|
+
protected detokenize(tokens: string[]): string;
|
|
123
|
+
/**
|
|
124
|
+
* Get vocabulary size
|
|
125
|
+
*/
|
|
126
|
+
get vocabSize(): number;
|
|
127
|
+
/**
|
|
128
|
+
* Get config
|
|
129
|
+
*/
|
|
130
|
+
getConfig(): TokenizerConfig;
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Create a basic English tokenizer
|
|
134
|
+
*/
|
|
135
|
+
export declare function createBasicTokenizer(): Tokenizer;
|
|
136
|
+
/**
|
|
137
|
+
* Load tokenizer from URL
|
|
138
|
+
*/
|
|
139
|
+
export declare function loadTokenizer(url: string): Promise<Tokenizer>;
|
|
140
|
+
//# sourceMappingURL=tokenizer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tokenizer.d.ts","sourceRoot":"","sources":["../../src/utils/tokenizer.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EACL,eAAe,EACf,eAAe,EAGhB,MAAM,kBAAkB,CAAC;AAM1B;;GAEG;AACH,MAAM,MAAM,cAAc,GAAG,KAAK,GAAG,WAAW,GAAG,SAAS,GAAG,OAAO,CAAC;AAEvE;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,2BAA2B;IAC3B,KAAK,CAAC,EAAE,cAAc,CAAC;IACvB,iBAAiB;IACjB,KAAK,CAAC,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACrD,qBAAqB;IACrB,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAClB,yBAAyB;IACzB,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,qBAAqB;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,uBAAuB;IACvB,OAAO,CAAC,EAAE,YAAY,GAAG,SAAS,GAAG,YAAY,CAAC;IAClD,iBAAiB;IACjB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,4BAA4B;IAC5B,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,4BAA4B;IAC5B,kBAAkB,CAAC,EAAE,OAAO,CAAC;CAC9B;AAMD;;GAEG;AACH,qBAAa,SAAS;IACpB,SAAS,CAAC,KAAK,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACrC,SAAS,CAAC,YAAY,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC5C,SAAS,CAAC,MAAM,EAAE,eAAe,CAAC;IAClC,SAAS,CAAC,KAAK,EAAE,cAAc,CAAC;IAChC,SAAS,CAAC,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAa;gBAEtC,MAAM,EAAE,OAAO,CAAC,eAAe,CAAC,EAAE,OAAO,GAAE,gBAAqB;IA4B5E;;OAEG;IACH,SAAS,CAAC,SAAS,CAAC,KAAK,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,IAAI;IAa9E;;OAEG;IACH,SAAS,CAAC,UAAU,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,IAAI;IAS5C;;OAEG;IACH,MAAM,CACJ,IAAI,EAAE,MAAM,EACZ,OAAO,GAAE;QACP,gBAAgB,CAAC,EAAE,OAAO,CAAC;QAC3B,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,OAAO,CAAC,EAAE,YAAY,GAAG,SAAS,GAAG,YAAY,CAAC;QAClD,UAAU,CAAC,EAAE,OAAO,CAAC;QACrB,mBAAmB,CAAC,EAAE,OAAO,CAAC;QAC9B,kBAAkB,CAAC,EAAE,OAAO,CAAC;KACzB,GACL,eAAe;IAyDlB;;OAEG;IACH,WAAW,CACT,KAAK,EAAE,MAAM,EAAE,EACf,OAAO,GAAE;QACP,gBAAgB,CAAC,EAAE,OAAO,CAAC;QAC3B,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,OAAO,CAAC,EAAE,YAAY,GAAG,SAAS,GAAG,YAAY,CAAC;QAClD,UAAU,CAAC,EAAE,OAAO,CAAC;QACrB,mBAAmB,CAAC,EAAE,OAAO,CAAC;QAC9B,kBAAkB,CAAC,EAAE,OAAO,CAAC;KACzB,GACL,eAAe,EAAE;IAYpB;;OAEG;IACH,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,EAAE,iBAAiB,UAAO,GAAG,MAAM;IAWvD;;OAEG;IACH,SAAS,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE;IAc1C;;OAEG;IACH,SAAS,CAAC,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM;IAQzC;;OAEG;IACH,SAAS,CAAC,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE;IAI/C;;OAEG;IACH,SAAS,CAAC,iBAAiB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE;IAYnD;;OAEG;IACH,SAAS,CAAC,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE;IAmC9C;;OAEG;IACH,SAAS,CAAC,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE;IAyC7C;;OAEG;IACH,SAAS,CAAC,gBAAgB,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE;IAkBtD;;OAEG;IACH,SAAS,CAAC,kBAAkB,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE;IAgBxD;;OAEG;IACH,SAAS,CAAC,kBAAkB,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE;IAgBrD;;OAEG;IACH,SAAS,CAAC,cAAc,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO;IAIhD;;OAEG;IACH,SAAS,CAAC,UAAU,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM;IAW9C;;OAEG;IACH,IAAI,SAAS,IAAI,MAAM,CAEtB;IAED;;OAEG;IACH,SAAS,IAAI,eAAe;CAG7B;AAMD;;GAEG;AACH,wBAAgB,oBAAoB,IAAI,SAAS,CA0ChD;AAED;;GAEG;AACH,wBAAsB,aAAa,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,CAAC,CAwBnE"}
|
|
@@ -0,0 +1,397 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* edgeFlow.js - Tokenizer
|
|
3
|
+
*
|
|
4
|
+
* Lightweight tokenizer implementation for text processing.
|
|
5
|
+
* Supports BPE, WordPiece, and basic tokenization.
|
|
6
|
+
*/
|
|
7
|
+
import { EdgeFlowError, ErrorCodes, } from '../core/types.js';
|
|
8
|
+
// ============================================================================
|
|
9
|
+
// Base Tokenizer
|
|
10
|
+
// ============================================================================
|
|
11
|
+
/**
|
|
12
|
+
* Tokenizer - Base class for all tokenizers
|
|
13
|
+
*/
|
|
14
|
+
export class Tokenizer {
|
|
15
|
+
vocab;
|
|
16
|
+
reverseVocab;
|
|
17
|
+
config;
|
|
18
|
+
model;
|
|
19
|
+
merges = new Map();
|
|
20
|
+
constructor(config, options = {}) {
|
|
21
|
+
this.config = {
|
|
22
|
+
vocabSize: config.vocabSize ?? 30522,
|
|
23
|
+
maxLength: config.maxLength ?? 512,
|
|
24
|
+
padTokenId: config.padTokenId ?? 0,
|
|
25
|
+
unkTokenId: config.unkTokenId ?? 100,
|
|
26
|
+
bosTokenId: config.bosTokenId,
|
|
27
|
+
eosTokenId: config.eosTokenId,
|
|
28
|
+
sepTokenId: config.sepTokenId ?? 102,
|
|
29
|
+
clsTokenId: config.clsTokenId ?? 101,
|
|
30
|
+
maskTokenId: config.maskTokenId ?? 103,
|
|
31
|
+
};
|
|
32
|
+
this.model = options.model ?? 'basic';
|
|
33
|
+
this.vocab = new Map();
|
|
34
|
+
this.reverseVocab = new Map();
|
|
35
|
+
// Load vocabulary
|
|
36
|
+
if (options.vocab) {
|
|
37
|
+
this.loadVocab(options.vocab);
|
|
38
|
+
}
|
|
39
|
+
// Load merges for BPE
|
|
40
|
+
if (options.merges) {
|
|
41
|
+
this.loadMerges(options.merges);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Load vocabulary
|
|
46
|
+
*/
|
|
47
|
+
loadVocab(vocab) {
|
|
48
|
+
if (vocab instanceof Map) {
|
|
49
|
+
this.vocab = new Map(vocab);
|
|
50
|
+
}
|
|
51
|
+
else {
|
|
52
|
+
this.vocab = new Map(Object.entries(vocab));
|
|
53
|
+
}
|
|
54
|
+
// Build reverse vocab
|
|
55
|
+
for (const [token, id] of this.vocab) {
|
|
56
|
+
this.reverseVocab.set(id, token);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Load BPE merges
|
|
61
|
+
*/
|
|
62
|
+
loadMerges(merges) {
|
|
63
|
+
for (const merge of merges) {
|
|
64
|
+
const [a, b] = merge.split(' ');
|
|
65
|
+
if (a && b) {
|
|
66
|
+
this.merges.set(`${a} ${b}`, `${a}${b}`);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Tokenize text
|
|
72
|
+
*/
|
|
73
|
+
encode(text, options = {}) {
|
|
74
|
+
const { addSpecialTokens = true, maxLength = this.config.maxLength, padding = 'max_length', truncation = true, returnAttentionMask = true, returnTokenTypeIds = false, } = options;
|
|
75
|
+
// Tokenize
|
|
76
|
+
let tokens = this.tokenize(text);
|
|
77
|
+
// Add special tokens
|
|
78
|
+
if (addSpecialTokens) {
|
|
79
|
+
tokens = this.addSpecialTokens(tokens);
|
|
80
|
+
}
|
|
81
|
+
// Convert to IDs
|
|
82
|
+
let inputIds = this.convertTokensToIds(tokens);
|
|
83
|
+
// Truncate if needed
|
|
84
|
+
if (truncation && inputIds.length > maxLength) {
|
|
85
|
+
inputIds = inputIds.slice(0, maxLength);
|
|
86
|
+
// Ensure EOS token if present
|
|
87
|
+
if (addSpecialTokens && this.config.sepTokenId !== undefined) {
|
|
88
|
+
inputIds[inputIds.length - 1] = this.config.sepTokenId;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
// Create attention mask
|
|
92
|
+
const attentionMask = returnAttentionMask
|
|
93
|
+
? inputIds.map(() => 1)
|
|
94
|
+
: [];
|
|
95
|
+
// Pad if needed
|
|
96
|
+
if (padding === 'max_length' && inputIds.length < maxLength) {
|
|
97
|
+
const padLength = maxLength - inputIds.length;
|
|
98
|
+
inputIds = [...inputIds, ...new Array(padLength).fill(this.config.padTokenId)];
|
|
99
|
+
if (returnAttentionMask) {
|
|
100
|
+
attentionMask.push(...new Array(padLength).fill(0));
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
const result = {
|
|
104
|
+
inputIds,
|
|
105
|
+
attentionMask,
|
|
106
|
+
};
|
|
107
|
+
// Token type IDs (for segment embeddings)
|
|
108
|
+
if (returnTokenTypeIds) {
|
|
109
|
+
result.tokenTypeIds = inputIds.map(() => 0);
|
|
110
|
+
}
|
|
111
|
+
return result;
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Batch encode
|
|
115
|
+
*/
|
|
116
|
+
encodeBatch(texts, options = {}) {
|
|
117
|
+
// Determine max length for 'longest' padding
|
|
118
|
+
let maxLen = options.maxLength ?? this.config.maxLength;
|
|
119
|
+
if (options.padding === 'longest') {
|
|
120
|
+
const encodings = texts.map(text => this.encode(text, { ...options, padding: 'do_not_pad' }));
|
|
121
|
+
maxLen = Math.max(...encodings.map(e => e.inputIds.length));
|
|
122
|
+
}
|
|
123
|
+
return texts.map(text => this.encode(text, { ...options, maxLength: maxLen }));
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Decode token IDs back to text
|
|
127
|
+
*/
|
|
128
|
+
decode(ids, skipSpecialTokens = true) {
|
|
129
|
+
const tokens = this.convertIdsToTokens(ids);
|
|
130
|
+
// Filter special tokens if requested
|
|
131
|
+
const filteredTokens = skipSpecialTokens
|
|
132
|
+
? tokens.filter(token => !this.isSpecialToken(token))
|
|
133
|
+
: tokens;
|
|
134
|
+
return this.detokenize(filteredTokens);
|
|
135
|
+
}
|
|
136
|
+
/**
|
|
137
|
+
* Basic tokenization (split by whitespace and punctuation)
|
|
138
|
+
*/
|
|
139
|
+
tokenize(text) {
|
|
140
|
+
// Normalize text
|
|
141
|
+
const normalized = this.normalize(text);
|
|
142
|
+
switch (this.model) {
|
|
143
|
+
case 'bpe':
|
|
144
|
+
return this.tokenizeBPE(normalized);
|
|
145
|
+
case 'wordpiece':
|
|
146
|
+
return this.tokenizeWordPiece(normalized);
|
|
147
|
+
default:
|
|
148
|
+
return this.tokenizeBasic(normalized);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Normalize text
|
|
153
|
+
*/
|
|
154
|
+
normalize(text) {
|
|
155
|
+
return text
|
|
156
|
+
.toLowerCase()
|
|
157
|
+
.replace(/[^\w\s'-]/g, ' $& ')
|
|
158
|
+
.replace(/\s+/g, ' ')
|
|
159
|
+
.trim();
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* Basic tokenization
|
|
163
|
+
*/
|
|
164
|
+
tokenizeBasic(text) {
|
|
165
|
+
return text.split(/\s+/).filter(t => t.length > 0);
|
|
166
|
+
}
|
|
167
|
+
/**
|
|
168
|
+
* WordPiece tokenization
|
|
169
|
+
*/
|
|
170
|
+
tokenizeWordPiece(text) {
|
|
171
|
+
const words = text.split(/\s+/).filter(w => w.length > 0);
|
|
172
|
+
const tokens = [];
|
|
173
|
+
for (const word of words) {
|
|
174
|
+
const wordTokens = this.tokenizeWord(word);
|
|
175
|
+
tokens.push(...wordTokens);
|
|
176
|
+
}
|
|
177
|
+
return tokens;
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Tokenize a single word using WordPiece
|
|
181
|
+
*/
|
|
182
|
+
tokenizeWord(word) {
|
|
183
|
+
if (this.vocab.has(word)) {
|
|
184
|
+
return [word];
|
|
185
|
+
}
|
|
186
|
+
const tokens = [];
|
|
187
|
+
let start = 0;
|
|
188
|
+
while (start < word.length) {
|
|
189
|
+
let end = word.length;
|
|
190
|
+
let found = false;
|
|
191
|
+
while (start < end) {
|
|
192
|
+
const substr = start === 0 ? word.slice(start, end) : `##${word.slice(start, end)}`;
|
|
193
|
+
if (this.vocab.has(substr)) {
|
|
194
|
+
tokens.push(substr);
|
|
195
|
+
found = true;
|
|
196
|
+
break;
|
|
197
|
+
}
|
|
198
|
+
end--;
|
|
199
|
+
}
|
|
200
|
+
if (!found) {
|
|
201
|
+
// Unknown character
|
|
202
|
+
tokens.push('[UNK]');
|
|
203
|
+
start++;
|
|
204
|
+
}
|
|
205
|
+
else {
|
|
206
|
+
start = end;
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
return tokens;
|
|
210
|
+
}
|
|
211
|
+
/**
|
|
212
|
+
* BPE tokenization
|
|
213
|
+
*/
|
|
214
|
+
tokenizeBPE(text) {
|
|
215
|
+
const words = text.split(/\s+/).filter(w => w.length > 0);
|
|
216
|
+
const tokens = [];
|
|
217
|
+
for (const word of words) {
|
|
218
|
+
// Split word into characters
|
|
219
|
+
let chars = word.split('').map((c, i) => i === word.length - 1 ? c + '</w>' : c);
|
|
220
|
+
// Apply merges iteratively
|
|
221
|
+
while (chars.length > 1) {
|
|
222
|
+
let minPair = null;
|
|
223
|
+
let minScore = Infinity;
|
|
224
|
+
for (let i = 0; i < chars.length - 1; i++) {
|
|
225
|
+
const pair = `${chars[i]} ${chars[i + 1]}`;
|
|
226
|
+
if (this.merges.has(pair)) {
|
|
227
|
+
const score = Array.from(this.merges.keys()).indexOf(pair);
|
|
228
|
+
if (score < minScore) {
|
|
229
|
+
minScore = score;
|
|
230
|
+
minPair = [i, pair];
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
if (!minPair)
|
|
235
|
+
break;
|
|
236
|
+
const [idx, pair] = minPair;
|
|
237
|
+
const merged = this.merges.get(pair);
|
|
238
|
+
chars = [
|
|
239
|
+
...chars.slice(0, idx),
|
|
240
|
+
merged,
|
|
241
|
+
...chars.slice(idx + 2),
|
|
242
|
+
];
|
|
243
|
+
}
|
|
244
|
+
tokens.push(...chars);
|
|
245
|
+
}
|
|
246
|
+
return tokens;
|
|
247
|
+
}
|
|
248
|
+
/**
|
|
249
|
+
* Add special tokens
|
|
250
|
+
*/
|
|
251
|
+
addSpecialTokens(tokens) {
|
|
252
|
+
const result = [];
|
|
253
|
+
// Add CLS token
|
|
254
|
+
if (this.config.clsTokenId !== undefined) {
|
|
255
|
+
result.push('[CLS]');
|
|
256
|
+
}
|
|
257
|
+
result.push(...tokens);
|
|
258
|
+
// Add SEP token
|
|
259
|
+
if (this.config.sepTokenId !== undefined) {
|
|
260
|
+
result.push('[SEP]');
|
|
261
|
+
}
|
|
262
|
+
return result;
|
|
263
|
+
}
|
|
264
|
+
/**
|
|
265
|
+
* Convert tokens to IDs
|
|
266
|
+
*/
|
|
267
|
+
convertTokensToIds(tokens) {
|
|
268
|
+
return tokens.map(token => {
|
|
269
|
+
const id = this.vocab.get(token);
|
|
270
|
+
if (id !== undefined)
|
|
271
|
+
return id;
|
|
272
|
+
// Handle special tokens
|
|
273
|
+
if (token === '[CLS]')
|
|
274
|
+
return this.config.clsTokenId ?? this.config.unkTokenId;
|
|
275
|
+
if (token === '[SEP]')
|
|
276
|
+
return this.config.sepTokenId ?? this.config.unkTokenId;
|
|
277
|
+
if (token === '[PAD]')
|
|
278
|
+
return this.config.padTokenId;
|
|
279
|
+
if (token === '[MASK]')
|
|
280
|
+
return this.config.maskTokenId ?? this.config.unkTokenId;
|
|
281
|
+
if (token === '[UNK]')
|
|
282
|
+
return this.config.unkTokenId;
|
|
283
|
+
return this.config.unkTokenId;
|
|
284
|
+
});
|
|
285
|
+
}
|
|
286
|
+
/**
|
|
287
|
+
* Convert IDs to tokens
|
|
288
|
+
*/
|
|
289
|
+
convertIdsToTokens(ids) {
|
|
290
|
+
return ids.map(id => {
|
|
291
|
+
const token = this.reverseVocab.get(id);
|
|
292
|
+
if (token !== undefined)
|
|
293
|
+
return token;
|
|
294
|
+
// Handle special token IDs
|
|
295
|
+
if (id === this.config.clsTokenId)
|
|
296
|
+
return '[CLS]';
|
|
297
|
+
if (id === this.config.sepTokenId)
|
|
298
|
+
return '[SEP]';
|
|
299
|
+
if (id === this.config.padTokenId)
|
|
300
|
+
return '[PAD]';
|
|
301
|
+
if (id === this.config.maskTokenId)
|
|
302
|
+
return '[MASK]';
|
|
303
|
+
if (id === this.config.unkTokenId)
|
|
304
|
+
return '[UNK]';
|
|
305
|
+
return '[UNK]';
|
|
306
|
+
});
|
|
307
|
+
}
|
|
308
|
+
/**
|
|
309
|
+
* Check if token is a special token
|
|
310
|
+
*/
|
|
311
|
+
isSpecialToken(token) {
|
|
312
|
+
return ['[CLS]', '[SEP]', '[PAD]', '[MASK]', '[UNK]'].includes(token);
|
|
313
|
+
}
|
|
314
|
+
/**
|
|
315
|
+
* Detokenize (convert tokens back to text)
|
|
316
|
+
*/
|
|
317
|
+
detokenize(tokens) {
|
|
318
|
+
// Handle WordPiece
|
|
319
|
+
const text = tokens
|
|
320
|
+
.join(' ')
|
|
321
|
+
.replace(/ ##/g, '')
|
|
322
|
+
.replace(/<\/w>/g, ' ')
|
|
323
|
+
.trim();
|
|
324
|
+
return text;
|
|
325
|
+
}
|
|
326
|
+
/**
|
|
327
|
+
* Get vocabulary size
|
|
328
|
+
*/
|
|
329
|
+
get vocabSize() {
|
|
330
|
+
return this.vocab.size;
|
|
331
|
+
}
|
|
332
|
+
/**
|
|
333
|
+
* Get config
|
|
334
|
+
*/
|
|
335
|
+
getConfig() {
|
|
336
|
+
return { ...this.config };
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
// ============================================================================
|
|
340
|
+
// Pre-trained Tokenizers
|
|
341
|
+
// ============================================================================
|
|
342
|
+
/**
|
|
343
|
+
* Create a basic English tokenizer
|
|
344
|
+
*/
|
|
345
|
+
export function createBasicTokenizer() {
|
|
346
|
+
// Create basic vocabulary
|
|
347
|
+
const vocab = {
|
|
348
|
+
'[PAD]': 0,
|
|
349
|
+
'[UNK]': 1,
|
|
350
|
+
'[CLS]': 2,
|
|
351
|
+
'[SEP]': 3,
|
|
352
|
+
'[MASK]': 4,
|
|
353
|
+
};
|
|
354
|
+
// Add common words
|
|
355
|
+
const commonWords = [
|
|
356
|
+
'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
|
|
357
|
+
'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should',
|
|
358
|
+
'may', 'might', 'must', 'shall', 'can', 'need', 'dare', 'ought', 'used',
|
|
359
|
+
'i', 'you', 'he', 'she', 'it', 'we', 'they', 'me', 'him', 'her', 'us', 'them',
|
|
360
|
+
'my', 'your', 'his', 'its', 'our', 'their', 'mine', 'yours', 'hers', 'ours', 'theirs',
|
|
361
|
+
'this', 'that', 'these', 'those', 'what', 'which', 'who', 'whom', 'whose',
|
|
362
|
+
'and', 'but', 'or', 'nor', 'for', 'yet', 'so', 'as', 'if', 'when', 'while',
|
|
363
|
+
'not', 'no', 'yes', 'all', 'any', 'both', 'each', 'every', 'few', 'more', 'most',
|
|
364
|
+
'other', 'some', 'such', 'only', 'own', 'same', 'than', 'too', 'very',
|
|
365
|
+
'good', 'bad', 'great', 'new', 'old', 'high', 'low', 'big', 'small', 'long', 'short',
|
|
366
|
+
'love', 'like', 'hate', 'want', 'need', 'think', 'know', 'feel', 'see', 'hear',
|
|
367
|
+
];
|
|
368
|
+
let id = 5;
|
|
369
|
+
for (const word of commonWords) {
|
|
370
|
+
vocab[word] = id++;
|
|
371
|
+
}
|
|
372
|
+
return new Tokenizer({
|
|
373
|
+
vocabSize: id,
|
|
374
|
+
maxLength: 128,
|
|
375
|
+
padTokenId: 0,
|
|
376
|
+
unkTokenId: 1,
|
|
377
|
+
clsTokenId: 2,
|
|
378
|
+
sepTokenId: 3,
|
|
379
|
+
maskTokenId: 4,
|
|
380
|
+
}, { vocab, model: 'basic' });
|
|
381
|
+
}
|
|
382
|
+
/**
|
|
383
|
+
* Load tokenizer from URL
|
|
384
|
+
*/
|
|
385
|
+
export async function loadTokenizer(url) {
|
|
386
|
+
const response = await fetch(url);
|
|
387
|
+
if (!response.ok) {
|
|
388
|
+
throw new EdgeFlowError(`Failed to load tokenizer from ${url}`, ErrorCodes.MODEL_NOT_FOUND);
|
|
389
|
+
}
|
|
390
|
+
const data = await response.json();
|
|
391
|
+
return new Tokenizer(data.config ?? {}, {
|
|
392
|
+
vocab: data.vocab,
|
|
393
|
+
merges: data.merges,
|
|
394
|
+
model: data.model,
|
|
395
|
+
});
|
|
396
|
+
}
|
|
397
|
+
//# sourceMappingURL=tokenizer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tokenizer.js","sourceRoot":"","sources":["../../src/utils/tokenizer.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAGL,aAAa,EACb,UAAU,GACX,MAAM,kBAAkB,CAAC;AAmC1B,+EAA+E;AAC/E,iBAAiB;AACjB,+EAA+E;AAE/E;;GAEG;AACH,MAAM,OAAO,SAAS;IACV,KAAK,CAAsB;IAC3B,YAAY,CAAsB;IAClC,MAAM,CAAkB;IACxB,KAAK,CAAiB;IACtB,MAAM,GAAwB,IAAI,GAAG,EAAE,CAAC;IAElD,YAAY,MAAgC,EAAE,UAA4B,EAAE;QAC1E,IAAI,CAAC,MAAM,GAAG;YACZ,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,KAAK;YACpC,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,GAAG;YAClC,UAAU,EAAE,MAAM,CAAC,UAAU,IAAI,CAAC;YAClC,UAAU,EAAE,MAAM,CAAC,UAAU,IAAI,GAAG;YACpC,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,UAAU,EAAE,MAAM,CAAC,UAAU,IAAI,GAAG;YACpC,UAAU,EAAE,MAAM,CAAC,UAAU,IAAI,GAAG;YACpC,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,GAAG;SACvC,CAAC;QAEF,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,OAAO,CAAC;QACtC,IAAI,CAAC,KAAK,GAAG,IAAI,GAAG,EAAE,CAAC;QACvB,IAAI,CAAC,YAAY,GAAG,IAAI,GAAG,EAAE,CAAC;QAE9B,kBAAkB;QAClB,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;YAClB,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;QAChC,CAAC;QAED,sBAAsB;QACtB,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;YACnB,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;IAED;;OAEG;IACO,SAAS,CAAC,KAAmD;QACrE,IAAI,KAAK,YAAY,GAAG,EAAE,CAAC;YACzB,IAAI,CAAC,KAAK,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC;QAC9B,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,KAAK,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC;QAC9C,CAAC;QAED,sBAAsB;QACtB,KAAK,MAAM,CAAC,KAAK,EAAE,EAAE,CAAC,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACrC,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,EAAE,EAAE,KAAK,CAAC,CAAC;QACnC,CAAC;IACH,CAAC;IAED;;OAEG;IACO,UAAU,CAAC,MAAgB;QACnC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YAChC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;gBACX,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,EAAE,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAC3C,CAAC;QACH,CAAC;IACH,CAAC;IAED;;OAEG;IACH,MAAM,CACJ,IAAY,EACZ,UAOI,EAAE;QAEN,MAAM,EACJ,gBAAgB,GAAG,IAAI,EACvB,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,EACjC,OAAO,GAAG,YAAY,EACtB,UAAU,GAAG,IAAI,EACjB,mBAAmB,GAAG,IAAI,EAC1B,kBAAkB,GAAG,KAAK,GAC3B,GAAG,OAAO,CAAC;QAEZ,WAAW;QACX,IAAI,MAAM,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QAEjC,qBAAqB;QACrB,IAAI,gBAAgB,EAAE,CAAC;YACrB,MAAM,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC;QACzC,CAAC;QAED,iBAAiB;QACjB,IAAI,QAAQ,GAAG,IAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,CAAC;QAE/C,qBAAqB;QACrB,IAAI,UAAU,IAAI,QAAQ,CAAC,MAAM,GAAG,SAAS,EAAE,CAAC;YAC9C,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;YACxC,8BAA8B;YAC9B,IAAI,gBAAgB,IAAI,IAAI,CAAC,MAAM,CAAC,UAAU,KAAK,SAAS,EAAE,CAAC;gBAC7D,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC;YACzD,CAAC;QACH,CAAC;QAED,wBAAwB;QACxB,MAAM,aAAa,GAAa,mBAAmB;YACjD,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;YACvB,CAAC,CAAC,EAAE,CAAC;QAEP,gBAAgB;QAChB,IAAI,OAAO,KAAK,YAAY,IAAI,QAAQ,CAAC,MAAM,GAAG,SAAS,EAAE,CAAC;YAC5D,MAAM,SAAS,GAAG,SAAS,GAAG,QAAQ,CAAC,MAAM,CAAC;YAC9C,QAAQ,GAAG,CAAC,GAAG,QAAQ,EAAE,GAAG,IAAI,KAAK,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,UAAU,CAAa,CAAC,CAAC;YAC3F,IAAI,mBAAmB,EAAE,CAAC;gBACxB,aAAa,CAAC,IAAI,CAAC,GAAI,IAAI,KAAK,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAc,CAAC,CAAC;YACpE,CAAC;QACH,CAAC;QAED,MAAM,MAAM,GAAoB;YAC9B,QAAQ;YACR,aAAa;SACd,CAAC;QAEF,0CAA0C;QAC1C,IAAI,kBAAkB,EAAE,CAAC;YACvB,MAAM,CAAC,YAAY,GAAG,QAAQ,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;QAC9C,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACH,WAAW,CACT,KAAe,EACf,UAOI,EAAE;QAEN,6CAA6C;QAC7C,IAAI,MAAM,GAAG,OAAO,CAAC,SAAS,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC;QAExD,IAAI,OAAO,CAAC,OAAO,KAAK,SAAS,EAAE,CAAC;YAClC,MAAM,SAAS,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,EAAE,GAAG,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC;YAC9F,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC;QAC9D,CAAC;QAED,OAAO,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,EAAE,GAAG,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC;IACjF,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,GAAa,EAAE,iBAAiB,GAAG,IAAI;QAC5C,MAAM,MAAM,GAAG,IAAI,CAAC,kBAAkB,CAAC,GAAG,CAAC,CAAC;QAE5C,qCAAqC;QACrC,MAAM,cAAc,GAAG,iBAAiB;YACtC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC;YACrD,CAAC,CAAC,MAAM,CAAC;QAEX,OAAO,IAAI,CAAC,UAAU,CAAC,cAAc,CAAC,CAAC;IACzC,CAAC;IAED;;OAEG;IACO,QAAQ,CAAC,IAAY;QAC7B,iBAAiB;QACjB,MAAM,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QAExC,QAAQ,IAAI,CAAC,KAAK,EAAE,CAAC;YACnB,KAAK,KAAK;gBACR,OAAO,IAAI,CAAC,WAAW,CAAC,UAAU,CAAC,CAAC;YACtC,KAAK,WAAW;gBACd,OAAO,IAAI,CAAC,iBAAiB,CAAC,UAAU,CAAC,CAAC;YAC5C;gBACE,OAAO,IAAI,CAAC,aAAa,CAAC,UAAU,CAAC,CAAC;QAC1C,CAAC;IACH,CAAC;IAED;;OAEG;IACO,SAAS,CAAC,IAAY;QAC9B,OAAO,IAAI;aACR,WAAW,EAAE;aACb,OAAO,CAAC,YAAY,EAAE,MAAM,CAAC;aAC7B,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;aACpB,IAAI,EAAE,CAAC;IACZ,CAAC;IAED;;OAEG;IACO,aAAa,CAAC,IAAY;QAClC,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACrD,CAAC;IAED;;OAEG;IACO,iBAAiB,CAAC,IAAY;QACtC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAC1D,MAAM,MAAM,GAAa,EAAE,CAAC;QAE5B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,UAAU,GAAG,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;YAC3C,MAAM,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,CAAC;QAC7B,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACO,YAAY,CAAC,IAAY;QACjC,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;YACzB,OAAO,CAAC,IAAI,CAAC,CAAC;QAChB,CAAC;QAED,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,IAAI,KAAK,GAAG,CAAC,CAAC;QAEd,OAAO,KAAK,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;YAC3B,IAAI,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC;YACtB,IAAI,KAAK,GAAG,KAAK,CAAC;YAElB,OAAO,KAAK,GAAG,GAAG,EAAE,CAAC;gBACnB,MAAM,MAAM,GAAG,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,EAAE,CAAC;gBAEpF,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;oBAC3B,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;oBACpB,KAAK,GAAG,IAAI,CAAC;oBACb,MAAM;gBACR,CAAC;gBACD,GAAG,EAAE,CAAC;YACR,CAAC;YAED,IAAI,CAAC,KAAK,EAAE,CAAC;gBACX,oBAAoB;gBACpB,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBACrB,KAAK,EAAE,CAAC;YACV,CAAC;iBAAM,CAAC;gBACN,KAAK,GAAG,GAAG,CAAC;YACd,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACO,WAAW,CAAC,IAAY;QAChC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAC1D,MAAM,MAAM,GAAa,EAAE,CAAC;QAE5B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,6BAA6B;YAC7B,IAAI,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAEjF,2BAA2B;YAC3B,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACxB,IAAI,OAAO,GAA4B,IAAI,CAAC;gBAC5C,IAAI,QAAQ,GAAG,QAAQ,CAAC;gBAExB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC1C,MAAM,IAAI,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;oBAC3C,IAAI,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;wBAC1B,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;wBAC3D,IAAI,KAAK,GAAG,QAAQ,EAAE,CAAC;4BACrB,QAAQ,GAAG,KAAK,CAAC;4BACjB,OAAO,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;wBACtB,CAAC;oBACH,CAAC;gBACH,CAAC;gBAED,IAAI,CAAC,OAAO;oBAAE,MAAM;gBAEpB,MAAM,CAAC,GAAG,EAAE,IAAI,CAAC,GAAG,OAAO,CAAC;gBAC5B,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,CAAE,CAAC;gBACtC,KAAK,GAAG;oBACN,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;oBACtB,MAAM;oBACN,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC;iBACxB,CAAC;YACJ,CAAC;YAED,MAAM,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,CAAC;QACxB,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACO,gBAAgB,CAAC,MAAgB;QACzC,MAAM,MAAM,GAAa,EAAE,CAAC;QAE5B,gBAAgB;QAChB,IAAI,IAAI,CAAC,MAAM,CAAC,UAAU,KAAK,SAAS,EAAE,CAAC;YACzC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACvB,CAAC;QAED,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;QAEvB,gBAAgB;QAChB,IAAI,IAAI,CAAC,MAAM,CAAC,UAAU,KAAK,SAAS,EAAE,CAAC;YACzC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACvB,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACO,kBAAkB,CAAC,MAAgB;QAC3C,OAAO,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE;YACxB,MAAM,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;YACjC,IAAI,EAAE,KAAK,SAAS;gBAAE,OAAO,EAAE,CAAC;YAEhC,wBAAwB;YACxB,IAAI,KAAK,KAAK,OAAO;gBAAE,OAAO,IAAI,CAAC,MAAM,CAAC,UAAU,IAAI,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC;YAC/E,IAAI,KAAK,KAAK,OAAO;gBAAE,OAAO,IAAI,CAAC,MAAM,CAAC,UAAU,IAAI,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC;YAC/E,IAAI,KAAK,KAAK,OAAO;gBAAE,OAAO,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC;YACrD,IAAI,KAAK,KAAK,QAAQ;gBAAE,OAAO,IAAI,CAAC,MAAM,CAAC,WAAW,IAAI,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC;YACjF,IAAI,KAAK,KAAK,OAAO;gBAAE,OAAO,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC;YAErD,OAAO,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC;QAChC,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACO,kBAAkB,CAAC,GAAa;QACxC,OAAO,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE;YAClB,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YACxC,IAAI,KAAK,KAAK,SAAS;gBAAE,OAAO,KAAK,CAAC;YAEtC,2BAA2B;YAC3B,IAAI,EAAE,KAAK,IAAI,CAAC,MAAM,CAAC,UAAU;gBAAE,OAAO,OAAO,CAAC;YAClD,IAAI,EAAE,KAAK,IAAI,CAAC,MAAM,CAAC,UAAU;gBAAE,OAAO,OAAO,CAAC;YAClD,IAAI,EAAE,KAAK,IAAI,CAAC,MAAM,CAAC,UAAU;gBAAE,OAAO,OAAO,CAAC;YAClD,IAAI,EAAE,KAAK,IAAI,CAAC,MAAM,CAAC,WAAW;gBAAE,OAAO,QAAQ,CAAC;YACpD,IAAI,EAAE,KAAK,IAAI,CAAC,MAAM,CAAC,UAAU;gBAAE,OAAO,OAAO,CAAC;YAElD,OAAO,OAAO,CAAC;QACjB,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACO,cAAc,CAAC,KAAa;QACpC,OAAO,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;IACxE,CAAC;IAED;;OAEG;IACO,UAAU,CAAC,MAAgB;QACnC,mBAAmB;QACnB,MAAM,IAAI,GAAG,MAAM;aAChB,IAAI,CAAC,GAAG,CAAC;aACT,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;aACnB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;aACtB,IAAI,EAAE,CAAC;QAEV,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;OAEG;IACH,IAAI,SAAS;QACX,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC;IACzB,CAAC;IAED;;OAEG;IACH,SAAS;QACP,OAAO,EAAE,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;IAC5B,CAAC;CACF;AAED,+EAA+E;AAC/E,yBAAyB;AACzB,+EAA+E;AAE/E;;GAEG;AACH,MAAM,UAAU,oBAAoB;IAClC,0BAA0B;IAC1B,MAAM,KAAK,GAA2B;QACpC,OAAO,EAAE,CAAC;QACV,OAAO,EAAE,CAAC;QACV,OAAO,EAAE,CAAC;QACV,OAAO,EAAE,CAAC;QACV,QAAQ,EAAE,CAAC;KACZ,CAAC;IAEF,mBAAmB;IACnB,MAAM,WAAW,GAAG;QAClB,KAAK,EAAE,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO;QACnE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,QAAQ;QAC7E,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM;QACvE,GAAG,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM;QAC7E,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,QAAQ;QACrF,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,OAAO;QACzE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO;QAC1E,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM;QAChF,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM;QACrE,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO;QACpF,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM;KAC/E,CAAC;IAEF,IAAI,EAAE,GAAG,CAAC,CAAC;IACX,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;QAC/B,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC;IACrB,CAAC;IAED,OAAO,IAAI,SAAS,CAClB;QACE,SAAS,EAAE,EAAE;QACb,SAAS,EAAE,GAAG;QACd,UAAU,EAAE,CAAC;QACb,UAAU,EAAE,CAAC;QACb,UAAU,EAAE,CAAC;QACb,UAAU,EAAE,CAAC;QACb,WAAW,EAAE,CAAC;KACf,EACD,EAAE,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,CAC1B,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CAAC,GAAW;IAC7C,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC;IAClC,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACjB,MAAM,IAAI,aAAa,CACrB,iCAAiC,GAAG,EAAE,EACtC,UAAU,CAAC,eAAe,CAC3B,CAAC;IACJ,CAAC;IAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAK/B,CAAC;IAEF,OAAO,IAAI,SAAS,CAClB,IAAI,CAAC,MAAM,IAAI,EAAE,EACjB;QACE,KAAK,EAAE,IAAI,CAAC,KAAK;QACjB,MAAM,EAAE,IAAI,CAAC,MAAM;QACnB,KAAK,EAAE,IAAI,CAAC,KAAK;KAClB,CACF,CAAC;AACJ,CAAC"}
|
package/package.json
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "edgeflowjs",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Lightweight, high-performance browser ML inference framework with native concurrency support",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "./dist/index.js",
|
|
7
|
+
"module": "./dist/index.js",
|
|
8
|
+
"types": "./dist/index.d.ts",
|
|
9
|
+
"exports": {
|
|
10
|
+
".": {
|
|
11
|
+
"import": "./dist/index.js",
|
|
12
|
+
"types": "./dist/index.d.ts"
|
|
13
|
+
},
|
|
14
|
+
"./core": {
|
|
15
|
+
"import": "./dist/core/index.js",
|
|
16
|
+
"types": "./dist/core/index.d.ts"
|
|
17
|
+
},
|
|
18
|
+
"./backends": {
|
|
19
|
+
"import": "./dist/backends/index.js",
|
|
20
|
+
"types": "./dist/backends/index.d.ts"
|
|
21
|
+
},
|
|
22
|
+
"./pipelines": {
|
|
23
|
+
"import": "./dist/pipelines/index.js",
|
|
24
|
+
"types": "./dist/pipelines/index.d.ts"
|
|
25
|
+
},
|
|
26
|
+
"./tools": {
|
|
27
|
+
"import": "./dist/tools/index.js",
|
|
28
|
+
"types": "./dist/tools/index.d.ts"
|
|
29
|
+
}
|
|
30
|
+
},
|
|
31
|
+
"files": [
|
|
32
|
+
"dist",
|
|
33
|
+
"README.md",
|
|
34
|
+
"LICENSE"
|
|
35
|
+
],
|
|
36
|
+
"scripts": {
|
|
37
|
+
"build": "tsc && npm run build:browser",
|
|
38
|
+
"build:browser": "node scripts/build-browser.js",
|
|
39
|
+
"dev": "tsc --watch",
|
|
40
|
+
"clean": "rm -rf dist",
|
|
41
|
+
"lint": "eslint src --ext .ts",
|
|
42
|
+
"test": "vitest",
|
|
43
|
+
"test:coverage": "vitest --coverage",
|
|
44
|
+
"demo": "npm run build && node demo/server.js",
|
|
45
|
+
"demo:server": "node demo/server.js",
|
|
46
|
+
"prepublishOnly": "npm run clean && npm run build"
|
|
47
|
+
},
|
|
48
|
+
"keywords": [
|
|
49
|
+
"machine-learning",
|
|
50
|
+
"ml",
|
|
51
|
+
"ai",
|
|
52
|
+
"inference",
|
|
53
|
+
"webgpu",
|
|
54
|
+
"webnn",
|
|
55
|
+
"browser",
|
|
56
|
+
"edge",
|
|
57
|
+
"transformers",
|
|
58
|
+
"neural-network"
|
|
59
|
+
],
|
|
60
|
+
"author": "",
|
|
61
|
+
"license": "MIT",
|
|
62
|
+
"repository": {
|
|
63
|
+
"type": "git",
|
|
64
|
+
"url": "https://github.com/s-zx/edgeflow.js"
|
|
65
|
+
},
|
|
66
|
+
"bugs": {
|
|
67
|
+
"url": "https://github.com/s-zx/edgeflow.js/issues"
|
|
68
|
+
},
|
|
69
|
+
"homepage": "https://edgeflow.js.org",
|
|
70
|
+
"dependencies": {
|
|
71
|
+
"onnxruntime-web": "^1.17.0"
|
|
72
|
+
},
|
|
73
|
+
"devDependencies": {
|
|
74
|
+
"@types/node": "^20.10.0",
|
|
75
|
+
"typescript": "^5.3.0",
|
|
76
|
+
"vitest": "^1.0.0",
|
|
77
|
+
"eslint": "^8.55.0",
|
|
78
|
+
"@typescript-eslint/parser": "^6.13.0",
|
|
79
|
+
"@typescript-eslint/eslint-plugin": "^6.13.0",
|
|
80
|
+
"esbuild": "^0.20.0"
|
|
81
|
+
},
|
|
82
|
+
"peerDependencies": {},
|
|
83
|
+
"engines": {
|
|
84
|
+
"node": ">=18.0.0"
|
|
85
|
+
},
|
|
86
|
+
"sideEffects": false
|
|
87
|
+
}
|