@masteryhub-its/speakout-local-client-model 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
package/README.md ADDED
@@ -0,0 +1,257 @@
1
+ # @masteryhub-its/speakout-local-client-model
2
+
3
+ Production-ready text moderation library using BERT model with ONNX Runtime. This package provides efficient client-side text moderation capabilities for browser environments using WebAssembly.
4
+
5
+ ## Features
6
+
7
+ - 🚀 **Fast Inference**: Powered by ONNX Runtime Web with optimized INT8 quantized model
8
+ - 🌐 **Browser-Ready**: Designed for browser environments using WebAssembly
9
+ - 📦 **Zero Config**: Works out of the box with embedded model files - no manual setup required
10
+ - 🔒 **Type Safe**: Full TypeScript support with type definitions included
11
+ - ⚡ **Efficient**: Minimal dependencies and optimized WASM performance
12
+ - 🔧 **Fully Typed**: Written entirely in TypeScript for better developer experience
13
+
14
+ ## Installation
15
+
16
+ ```bash
17
+ npm install @masteryhub-its/speakout-local-client-model
18
+ ```
19
+
20
+ ## Quick Start
21
+
22
+ ### Browser (Vite/React)
23
+
24
+ ```typescript
25
+ import { ClientContentModeration } from '@masteryhub-its/speakout-local-client-model';
26
+
27
+ const moderation = new ClientContentModeration();
28
+
29
+ await moderation.initialize();
30
+
31
+ const result = await moderation.moderate("User input text");
32
+
33
+ if (result.approved) {
34
+ // Content is safe
35
+ } else {
36
+ // Content should be rejected
37
+ }
38
+ ```
39
+
40
+ ### Vite Configuration
41
+
42
+ For Vite projects, you need to configure WASM asset support. The model files are embedded in the package, so no manual copying is required:
43
+
44
+ **Create or update `vite.config.ts`:**
45
+
46
+ ```typescript
47
+ import { defineConfig } from 'vite';
48
+ import react from '@vitejs/plugin-react';
49
+
50
+ export default defineConfig({
51
+ plugins: [react()],
52
+ assetsInclude: ['**/*.onnx', '**/*.wasm'],
53
+ optimizeDeps: {
54
+ exclude: ['onnxruntime-web'],
55
+ },
56
+ server: {
57
+ fs: {
58
+ // Allow serving files from node_modules (for embedded models)
59
+ allow: ['..'],
60
+ },
61
+ },
62
+ });
63
+ ```
64
+
65
+ **Initialize with default paths (models are automatically resolved from the package):**
66
+
67
+ ```typescript
68
+ import { ClientContentModeration } from '@masteryhub-its/speakout-local-client-model';
69
+
70
+ const moderation = new ClientContentModeration();
71
+
72
+ // Uses default paths: /models/bert-mini-moderation-output/model.int8.onnx
73
+ await moderation.initialize();
74
+
75
+ // Or specify custom URLs:
76
+ await moderation.initialize(
77
+ '/models/bert-mini-moderation-output/model.int8.onnx',
78
+ '/models/bert-mini-moderation-output/tokenizer.json'
79
+ );
80
+ ```
81
+
82
+ **Note:** Model files are embedded in the package and will be automatically resolved. The package uses `import.meta.url` to locate models relative to the package location, so they work seamlessly in both development and production builds.
83
+
84
+ ## API
85
+
86
+ ### `ClientContentModeration`
87
+
88
+ Main class for text moderation.
89
+
90
+ #### Constructor
91
+
92
+ ```typescript
93
+ new ClientContentModeration(options?: ModerationOptions)
94
+ ```
95
+
96
+ **Options:**
97
+ - `modelFilePath?: string` - Custom URL to ONNX model file (e.g., "/models/model.onnx")
98
+ - `tokenizerFilePath?: string` - Custom URL to tokenizer file (e.g., "/models/tokenizer.json")
99
+ - `maxLength?: number` - Maximum sequence length (default: 128)
100
+ - `threshold?: number` - Confidence threshold (default: 0.5)
101
+
102
+ #### Methods
103
+
104
+ ##### `initialize(modelFilePath?, tokenizerFilePath?): Promise<void>`
105
+
106
+ Initialize the model and tokenizer. This is called automatically on first use, but you can call it explicitly for better error handling.
107
+
108
+ **Parameters:**
109
+ - `modelFilePath?: string` - URL to the ONNX model file (default: `/models/bert-mini-moderation-output/model.int8.onnx`)
110
+ - `tokenizerFilePath?: string` - URL to the tokenizer JSON file (default: `/models/bert-mini-moderation-output/tokenizer.json`)
111
+
112
+ ##### `moderate(text: string, threshold?: number): Promise<ModerationResult>`
113
+
114
+ Moderate a single text string.
115
+
116
+ **Returns:**
117
+ ```typescript
118
+ {
119
+ approved: boolean; // Whether content should be approved
120
+ confidence: number; // Confidence score (0-1)
121
+ probabilities: {
122
+ reject: number; // Probability of rejection (0-1)
123
+ approve: number; // Probability of approval (0-1)
124
+ }
125
+ }
126
+ ```
127
+
128
+ ##### `moderateBatch(texts: string[], threshold?: number): Promise<ModerationResult[]>`
129
+
130
+ Moderate multiple texts in parallel.
131
+
132
+ ##### `dispose(): void`
133
+
134
+ Clean up resources and dispose of the model session.
135
+
136
+ ## Examples
137
+
138
+ ### Basic Usage
139
+
140
+ ```typescript
141
+ import { ClientContentModeration } from '@masteryhub-its/speakout-local-client-model';
142
+
143
+ const moderation = new ClientContentModeration();
144
+ await moderation.initialize();
145
+
146
+ const result = await moderation.moderate("This is a test message");
147
+ console.log(`Approved: ${result.approved}, Confidence: ${result.confidence}`);
148
+ ```
149
+
150
+ ### Custom Threshold
151
+
152
+ ```typescript
153
+ const result = await moderation.moderate("User content", 0.7); // 70% threshold
154
+ ```
155
+
156
+ ### Batch Processing
157
+
158
+ ```typescript
159
+ const texts = [
160
+ "Hello world",
161
+ "This is safe content",
162
+ "Another message"
163
+ ];
164
+
165
+ const results = await moderation.moderateBatch(texts);
166
+ results.forEach((result, index) => {
167
+ console.log(`Text ${index}: ${result.approved ? 'Approved' : 'Rejected'}`);
168
+ });
169
+ ```
170
+
171
+ ### Custom Model FilePaths
172
+
173
+ ```typescript
174
+ const moderation = new ClientContentModeration({
175
+ modelFilePath: '/FilePath/to/model.onnx',
176
+ tokenizerFilePath: '/FilePath/to/tokenizer.json',
177
+ maxLength: 256,
178
+ threshold: 0.6
179
+ });
180
+ ```
181
+
182
+ ## Development
183
+
184
+ ### Building from Source
185
+
186
+ ```bash
187
+ # Clone the repository
188
+ git clone <repository-url>
189
+ cd speakout-platform-local-model
190
+
191
+ # Install dependencies
192
+ npm install
193
+
194
+ # Build the project
195
+ npm run build
196
+
197
+ # Verify package structure
198
+ npm run verify
199
+ ```
200
+
201
+ ### Project Structure
202
+
203
+ ```
204
+ ├── src/ # TypeScript source files
205
+ │ ├── index.ts # Main entry point
206
+ │ ├── model.ts # ONNX model wrapper
207
+ │ ├── tokenizer.ts # Text tokenization
208
+ │ ├── types.ts # TypeScript type definitions
209
+ │ └── utils/ # Utility functions and constants
210
+ ├── lib/ # Compiled JavaScript (generated)
211
+ ├── models/ # Model files (ONNX model and tokenizer)
212
+ ├── build.ts # Build verification script
213
+ └── example.ts # Example usage file
214
+ ```
215
+
216
+ ## Requirements
217
+
218
+ - Node.js >= 18.0.0
219
+ - For browser usage: Modern browser with WebAssembly support
220
+ - TypeScript >= 5.3.3 (for development)
221
+
222
+ ## Dependencies
223
+
224
+ ### Runtime Dependencies
225
+
226
+ - `onnxruntime-web` - ONNX Runtime for model inference (browser/WASM)
227
+ - `tokenizers` - Fast tokenization library
228
+
229
+ ### Development Dependencies
230
+
231
+ - `typescript` - TypeScript compiler
232
+ - `ts-node` - TypeScript execution for Node.js
233
+ - `@types/node` - Node.js type definitions
234
+
235
+ ## TypeScript Support
236
+
237
+ This package is written entirely in TypeScript and includes full type definitions. All types are exported and available for use:
238
+
239
+ ```typescript
240
+ import type {
241
+ ModerationResult,
242
+ ModerationOptions,
243
+ TokenizerEncoding
244
+ } from '@masteryhub-its/speakout-local-client-model';
245
+ ```
246
+
247
+ ## License
248
+
249
+ MIT
250
+
251
+ ## Contributing
252
+
253
+ Contributions are welcome! Please feel free to submit a Pull Request.
254
+
255
+ ## Support
256
+
257
+ For issues, questions, or contributions, please open an issue on the repository.
package/index.js ADDED
@@ -0,0 +1 @@
1
+ export { default as ClientContentModeration } from './lib/index.js';
package/lib/index.d.ts ADDED
@@ -0,0 +1,22 @@
1
+ import { ModerationResult, ModerationOptions } from "./types.js";
2
+ export declare class ClientContentModeration {
3
+ private model;
4
+ private tokenizer;
5
+ private initialized;
6
+ constructor(options?: ModerationOptions);
7
+ initialize(): Promise<void>;
8
+ moderate(text: string, threshold?: number): Promise<ModerationResult>;
9
+ moderateBatch(texts: string[], threshold?: number): Promise<ModerationResult[]>;
10
+ /**
11
+ * Compute softmax probabilities from logits
12
+ * @param logits - Array of logit values
13
+ * @returns Array of probabilities (sums to 1)
14
+ */
15
+ private softmax;
16
+ /**
17
+ * Dispose resources and clean up
18
+ */
19
+ dispose(): void;
20
+ }
21
+ export type { ModerationResult, ModerationOptions };
22
+ export default ClientContentModeration;
package/lib/index.js ADDED
@@ -0,0 +1,89 @@
1
+ import { ModerationModel } from "./model.js";
2
+ import { Tokenizer } from "./tokenizer.js";
3
+ import { DEFAULTS } from "./utils/constants.js";
4
+ export class ClientContentModeration {
5
+ model;
6
+ tokenizer;
7
+ initialized = false;
8
+ constructor(options) {
9
+ this.model = new ModerationModel();
10
+ this.tokenizer = new Tokenizer();
11
+ }
12
+ async initialize() {
13
+ if (this.initialized)
14
+ return;
15
+ await Promise.all([
16
+ this.model.initialize(),
17
+ this.tokenizer.initialize(),
18
+ ]);
19
+ this.initialized = true;
20
+ }
21
+ async moderate(text, threshold = DEFAULTS.THRESHOLD) {
22
+ if (!this.initialized)
23
+ await this.initialize();
24
+ const encodings = await this.tokenizer.encodeChunks(text);
25
+ const chunkResults = await Promise.all(encodings.map(e => this.model.predict(e.inputIds, e.attentionMask)));
26
+ const validChunks = chunkResults.filter(c => (Array.isArray(c) || c instanceof Float32Array) && c.length > 0);
27
+ if (!validChunks.length) {
28
+ return {
29
+ approved: true,
30
+ confidence: 0.5,
31
+ probabilities: { reject: 0.5, approve: 0.5 }
32
+ };
33
+ }
34
+ const aggregatedLogits = validChunks[0].map((_, i) => validChunks.reduce((sum, logits) => sum + logits[i], 0) / validChunks.length);
35
+ const probabilities = this.softmax(aggregatedLogits);
36
+ const shouldApprove = probabilities[1] >= threshold;
37
+ return {
38
+ approved: shouldApprove,
39
+ confidence: Math.max(...probabilities),
40
+ probabilities: {
41
+ reject: probabilities[0],
42
+ approve: probabilities[1]
43
+ }
44
+ };
45
+ }
46
+ async moderateBatch(texts, threshold = DEFAULTS.THRESHOLD) {
47
+ if (!this.initialized)
48
+ await this.initialize();
49
+ return Promise.all(texts.map(t => this.moderate(t, threshold)));
50
+ }
51
+ /**
52
+ * Compute softmax probabilities from logits
53
+ * @param logits - Array of logit values
54
+ * @returns Array of probabilities (sums to 1)
55
+ */
56
+ softmax(logits) {
57
+ const output = [];
58
+ let max = -Infinity;
59
+ // Find max (numerical stability)
60
+ for (const v of logits) {
61
+ if (v > max)
62
+ max = v;
63
+ }
64
+ // Exponentiate and sum
65
+ let sum = 0;
66
+ for (const v of logits) {
67
+ const e = Math.exp(v - max);
68
+ output.push(e);
69
+ sum += e;
70
+ }
71
+ // Normalize (guard against edge cases)
72
+ if (sum === 0 || !Number.isFinite(sum)) {
73
+ const uniform = 1 / output.length;
74
+ return output.map(() => uniform);
75
+ }
76
+ for (let i = 0; i < output.length; i++) {
77
+ output[i] /= sum;
78
+ }
79
+ return output;
80
+ }
81
+ /**
82
+ * Dispose resources and clean up
83
+ */
84
+ dispose() {
85
+ this.model.dispose();
86
+ this.initialized = false;
87
+ }
88
+ }
89
+ export default ClientContentModeration;
package/lib/model.d.ts ADDED
@@ -0,0 +1,11 @@
1
+ export declare class ModerationModel {
2
+ private session;
3
+ private initialized;
4
+ private static runtime;
5
+ private modelFileUrl;
6
+ private static getOnnxRuntime;
7
+ constructor(file?: string);
8
+ initialize(): Promise<void>;
9
+ predict(inputIds: number[], attentionMask: number[]): Promise<Float32Array>;
10
+ dispose(): void;
11
+ }
package/lib/model.js ADDED
@@ -0,0 +1,72 @@
1
+ import * as ort from 'onnxruntime-web';
2
+ import { ERROR_MESSAGES, ONNX_CONFIG, MODEL_PATH } from './utils/constants.js';
3
+ export class ModerationModel {
4
+ session = null;
5
+ initialized = false;
6
+ static runtime = null;
7
+ modelFileUrl;
8
+ static getOnnxRuntime() {
9
+ if (ModerationModel.runtime)
10
+ return ModerationModel.runtime;
11
+ if (!ort?.InferenceSession)
12
+ throw new Error(ERROR_MESSAGES.ONNX_RUNTIME_NOT_AVAILABLE);
13
+ ModerationModel.runtime = ort;
14
+ return ModerationModel.runtime;
15
+ }
16
+ constructor(file) {
17
+ this.modelFileUrl = file ?? MODEL_PATH;
18
+ }
19
+ async initialize() {
20
+ if (this.initialized && this.session)
21
+ return;
22
+ const runtime = ModerationModel.getOnnxRuntime();
23
+ if (runtime.env?.wasm) {
24
+ runtime.env.wasm.numThreads = ONNX_CONFIG.WASM_NUM_THREADS;
25
+ runtime.env.wasm.simd = true;
26
+ }
27
+ const sessionOptions = {
28
+ graphOptimizationLevel: ONNX_CONFIG.GRAPH_OPTIMIZATION_LEVEL,
29
+ executionProviders: [ONNX_CONFIG.EXECUTION_PROVIDER_WASM],
30
+ };
31
+ try {
32
+ this.session = await runtime.InferenceSession.create(this.modelFileUrl, sessionOptions);
33
+ this.initialized = true;
34
+ }
35
+ catch (error) {
36
+ // If model loading fails, verify the URL is correct
37
+ const verifyResponse = await fetch(this.modelFileUrl);
38
+ const contentType = verifyResponse.headers.get('content-type') || '';
39
+ if (contentType.includes('text/html')) {
40
+ const text = await verifyResponse.text();
41
+ throw new Error(`Failed to load ONNX model: The URL ${this.modelFileUrl} returned HTML instead of a model file. This usually means the model file path is incorrect. Response preview: ${text.substring(0, 200)}`);
42
+ }
43
+ throw error;
44
+ }
45
+ }
46
+ async predict(inputIds, attentionMask) {
47
+ if (!this.session)
48
+ await this.initialize();
49
+ if (!this.session)
50
+ throw new Error(ERROR_MESSAGES.SESSION_NOT_INITIALIZED);
51
+ const runtime = ModerationModel.getOnnxRuntime();
52
+ const tokenTypeIds = new Array(inputIds.length).fill(0);
53
+ const feeds = {
54
+ input_ids: new runtime.Tensor(ONNX_CONFIG.TENSOR_TYPE_INT64, inputIds, [1, inputIds.length]),
55
+ attention_mask: new runtime.Tensor(ONNX_CONFIG.TENSOR_TYPE_INT64, attentionMask, [1, attentionMask.length]),
56
+ token_type_ids: new runtime.Tensor(ONNX_CONFIG.TENSOR_TYPE_INT64, tokenTypeIds, [1, tokenTypeIds.length]),
57
+ };
58
+ const output = await this.session.run(feeds);
59
+ const logits = (output.logits ||
60
+ output[Object.keys(output)[0]]);
61
+ if (!logits || !("data" in logits)) {
62
+ throw new Error("Model output does not contain logits");
63
+ }
64
+ return logits.data instanceof Float32Array
65
+ ? logits.data
66
+ : new Float32Array(logits.data);
67
+ }
68
+ dispose() {
69
+ this.session = null;
70
+ this.initialized = false;
71
+ }
72
+ }
@@ -0,0 +1,15 @@
1
+ import type { TokenizerEncoding } from './types.js';
2
+ export declare class Tokenizer {
3
+ private tokenizer;
4
+ private readonly maxLength;
5
+ private readonly reservedTokens;
6
+ constructor(maxLength?: number);
7
+ initialize(path?: string): Promise<void>;
8
+ private tokenizeText;
9
+ private findSubwordTokens;
10
+ private padTokens;
11
+ private rawTokenize;
12
+ encodeChunks(text: string): Promise<TokenizerEncoding[]>;
13
+ private createEmptyChunk;
14
+ encode(text: string): Promise<TokenizerEncoding>;
15
+ }
@@ -0,0 +1,111 @@
1
+ import { ERROR_MESSAGES, SPECIAL_TOKENS, DEFAULT_TOKEN_IDS, DEFAULTS, TOKENIZER_PATH } from './utils/constants.js';
2
+ export class Tokenizer {
3
+ tokenizer = null;
4
+ maxLength;
5
+ reservedTokens = 2;
6
+ constructor(maxLength) {
7
+ this.maxLength = maxLength ?? DEFAULTS.MAX_LENGTH;
8
+ }
9
+ async initialize(path) {
10
+ if (this.tokenizer)
11
+ return;
12
+ const url = path ?? TOKENIZER_PATH;
13
+ try {
14
+ const res = await fetch(url);
15
+ if (!res.ok) {
16
+ throw new Error(`Failed to load tokenizer: HTTP ${res.status} at ${url}`);
17
+ }
18
+ const contentType = res.headers.get('content-type') || '';
19
+ if (contentType.includes('text/html')) {
20
+ const text = await res.text();
21
+ throw new Error(`Failed to load tokenizer: The URL ${url} returned HTML instead of JSON. This usually means the path is incorrect. Response preview: ${text.substring(0, 200)}`);
22
+ }
23
+ const json = await res.json();
24
+ this.tokenizer = { vocab: json.model?.vocab ?? {} };
25
+ }
26
+ catch (error) {
27
+ if (error instanceof Error && error.message.includes('HTML')) {
28
+ throw error;
29
+ }
30
+ throw new Error(`Failed to initialize tokenizer at ${url}: ${error instanceof Error ? error.message : String(error)}`);
31
+ }
32
+ }
33
+ tokenizeText(text, vocab, unkId) {
34
+ const words = text.toLowerCase().match(/\S+/g) ?? [];
35
+ const tokens = [];
36
+ for (const word of words) {
37
+ if (vocab[word])
38
+ tokens.push(vocab[word]);
39
+ else
40
+ tokens.push(...this.findSubwordTokens(word, vocab, unkId));
41
+ if (tokens.length >= this.maxLength - this.reservedTokens)
42
+ break;
43
+ }
44
+ return tokens.slice(0, this.maxLength - this.reservedTokens);
45
+ }
46
+ findSubwordTokens(word, vocab, unkId) {
47
+ const lengths = Array.from({ length: word.length }, (_, k) => word.length - k);
48
+ for (const i of lengths) {
49
+ const subword = word.substring(0, i);
50
+ if (vocab[subword]) {
51
+ const tokens = [vocab[subword]];
52
+ const remaining = word.substring(i);
53
+ if (remaining) {
54
+ const subwordToken = `${SPECIAL_TOKENS.SUBWORD_PREFIX}${remaining}`;
55
+ tokens.push(vocab[subwordToken] ?? unkId);
56
+ }
57
+ return tokens;
58
+ }
59
+ }
60
+ return [unkId];
61
+ }
62
+ padTokens(tokens, padId) {
63
+ const inputIds = tokens.slice(0, this.maxLength);
64
+ const attentionMask = inputIds.map(() => 1);
65
+ while (inputIds.length < this.maxLength) {
66
+ inputIds.push(padId);
67
+ attentionMask.push(0);
68
+ }
69
+ return { inputIds, attentionMask };
70
+ }
71
+ async rawTokenize(text) {
72
+ if (!this.tokenizer)
73
+ await this.initialize();
74
+ if (!this.tokenizer)
75
+ throw new Error(ERROR_MESSAGES.TOKENIZER_NOT_INITIALIZED);
76
+ const vocab = this.tokenizer.vocab ?? {};
77
+ const unkId = vocab[SPECIAL_TOKENS.UNK] ?? DEFAULT_TOKEN_IDS.UNK;
78
+ return this.tokenizeText(text, vocab, unkId);
79
+ }
80
+ async encodeChunks(text) {
81
+ if (!this.tokenizer)
82
+ await this.initialize();
83
+ if (!this.tokenizer)
84
+ throw new Error(ERROR_MESSAGES.TOKENIZER_NOT_INITIALIZED);
85
+ const raw = await this.rawTokenize(text);
86
+ const vocab = this.tokenizer.vocab ?? {};
87
+ const padId = vocab[SPECIAL_TOKENS.PAD] ?? DEFAULT_TOKEN_IDS.PAD;
88
+ const clsId = vocab[SPECIAL_TOKENS.CLS] ?? DEFAULT_TOKEN_IDS.CLS;
89
+ const sepId = vocab[SPECIAL_TOKENS.SEP] ?? DEFAULT_TOKEN_IDS.SEP;
90
+ if (raw.length === 0)
91
+ return [this.createEmptyChunk(clsId, sepId, padId)];
92
+ const chunks = [];
93
+ const chunkSize = this.maxLength - this.reservedTokens;
94
+ const numChunks = Math.max(1, Math.ceil(raw.length / chunkSize));
95
+ const starts = Array.from({ length: numChunks }, (_, k) => k * chunkSize);
96
+ for (const start of starts) {
97
+ const slice = raw.slice(start, start + chunkSize);
98
+ chunks.push(this.padTokens([clsId, ...slice, sepId], padId));
99
+ }
100
+ return chunks;
101
+ }
102
+ createEmptyChunk(clsId, sepId, padId) {
103
+ const inputIds = [clsId, sepId, ...Array(this.maxLength - this.reservedTokens).fill(padId)];
104
+ const attentionMask = inputIds.map((id) => (id === padId ? 0 : 1));
105
+ return { inputIds: inputIds.slice(0, this.maxLength), attentionMask };
106
+ }
107
+ async encode(text) {
108
+ const [first] = await this.encodeChunks(text);
109
+ return first;
110
+ }
111
+ }
package/lib/types.d.ts ADDED
@@ -0,0 +1,56 @@
1
+ export interface ModerationResult {
2
+ approved: boolean;
3
+ confidence: number;
4
+ probabilities: {
5
+ reject: number;
6
+ approve: number;
7
+ };
8
+ }
9
+ export interface ModerationOptions {
10
+ modelFilePath?: string;
11
+ tokenizerFilePath?: string;
12
+ maxLength?: number;
13
+ threshold?: number;
14
+ }
15
+ export interface TokenizerEncoding {
16
+ inputIds: number[];
17
+ attentionMask: number[];
18
+ }
19
+ export interface OnnxRuntime {
20
+ InferenceSession: {
21
+ create: (modelFilePath: string, options?: SessionOptions) => Promise<InferenceSession>;
22
+ };
23
+ Tensor: new (type: string, data: number[] | Float32Array, dims: number[]) => OnnxTensor;
24
+ env?: {
25
+ wasm?: {
26
+ numThreads?: number;
27
+ simd?: boolean;
28
+ };
29
+ };
30
+ }
31
+ export interface InferenceSession {
32
+ run: (feeds: Record<string, OnnxTensor>) => Promise<InferenceOutput>;
33
+ }
34
+ export interface OnnxTensor {
35
+ data: Float32Array | number[];
36
+ dims: number[];
37
+ type?: string;
38
+ [key: string]: unknown;
39
+ }
40
+ export interface InferenceOutput {
41
+ logits?: OnnxTensor;
42
+ [key: string]: OnnxTensor | unknown;
43
+ }
44
+ export interface SessionOptions {
45
+ graphOptimizationLevel?: string;
46
+ executionProviders?: string[];
47
+ [key: string]: unknown;
48
+ }
49
+ export interface TokenizerVocab {
50
+ [token: string]: number;
51
+ }
52
+ export interface TokenizerData {
53
+ model?: {
54
+ vocab?: TokenizerVocab;
55
+ };
56
+ }
package/lib/types.js ADDED
@@ -0,0 +1 @@
1
+ export {};