@localmode/core 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,611 @@
1
+ # @localmode/core
2
+
3
+ Local-first AI utilities for the browser. Zero dependencies. Privacy-first.
4
+
5
+ [![npm](https://img.shields.io/npm/v/@localmode/core)](https://npmjs.com/package/@localmode/core)
6
+ [![bundle size](https://img.shields.io/bundlephobia/minzip/@localmode/core)](https://bundlephobia.com/package/@localmode/core)
7
+ [![license](https://img.shields.io/npm/l/@localmode/core)](../../LICENSE)
8
+
9
+ ## Installation
10
+
11
+ ```bash
12
+ # Preferred: pnpm
13
+ pnpm install @localmode/core
14
+
15
+ # Alternative: npm
16
+ npm install @localmode/core
17
+ ```
18
+
19
+ ## Overview
20
+
21
+ `@localmode/core` is a **zero-dependency** package containing all functions, interfaces, types, and utilities for building local-first AI applications. Provider packages (like `@localmode/transformers` and `@localmode/webllm`) implement these interfaces with specific ML frameworks.
22
+
23
+ ## Features
24
+
25
+ ### ✅ Live Features
26
+
27
+ These features are production-ready and actively used in applications.
28
+
29
+ #### Vector Database
30
+
31
+ - HNSW index for fast approximate nearest neighbor search
32
+ - IndexedDB persistence with memory fallback
33
+ - Cross-tab synchronization via Web Locks and BroadcastChannel
34
+ - Metadata filtering with extensible operators
35
+
36
+ #### Embeddings
37
+
38
+ - `embed()` - Generate embeddings for single values
39
+ - `embedMany()` - Batch embedding with progress tracking
40
+ - `semanticSearch()` - Search with embeddings
41
+ - Middleware support for caching, logging, validation
42
+
43
+ #### Reranking
44
+
45
+ - `rerank()` - Document reranking for improved RAG accuracy
46
+
47
+ #### RAG Utilities
48
+
49
+ - Text chunking (recursive, markdown, code-aware)
50
+ - `chunk()` - Split documents into optimal chunks
51
+ - Configurable separators and overlap
52
+
53
+ #### Text Generation (Streaming)
54
+
55
+ - `streamText()` - Streaming text generation with async iteration
56
+ - AbortSignal support for cancellation
57
+ - Works with WebLLM for local LLM inference
58
+
59
+ #### Storage
60
+
61
+ - `IndexedDBStorage` - Persistent browser storage
62
+ - `MemoryStorage` - In-memory fallback
63
+ - Automatic quota management and cleanup
64
+
65
+ #### Capabilities Detection
66
+
67
+ - `isWebGPUSupported()` - Check WebGPU availability
68
+ - `detectCapabilities()` - Full device capability report
69
+ - Automatic fallback recommendations
70
+
71
+ ---
72
+
73
+ ### 🚧 Coming Soon
74
+
75
+ These features have interfaces defined and are under active development.
76
+
77
+ #### Classification & NLP
78
+
79
+ - `classify()`, `classifyMany()` - Text classification
80
+ - `classifyZeroShot()` - Zero-shot classification with custom labels
81
+ - `extractEntities()`, `extractEntitiesMany()` - Named Entity Recognition
82
+
83
+ #### Audio
84
+
85
+ - `transcribe()` - Speech-to-text with Whisper models
86
+ - `synthesizeSpeech()` - Text-to-speech synthesis
87
+ - Word and segment-level timestamps
88
+ - Multi-language support
89
+
90
+ #### Vision
91
+
92
+ - `classifyImage()` - Image classification
93
+ - `classifyImageZeroShot()` - Zero-shot image classification
94
+ - `captionImage()` - Image captioning with BLIP models
95
+ - `segmentImage()` - Image segmentation / background removal
96
+ - `detectObjects()` - Object detection with bounding boxes
97
+ - `extractImageFeatures()` - Image feature extraction for similarity
98
+ - `imageToImage()` - Image transformation / super resolution
99
+
100
+ #### Text Generation (Complete)
101
+
102
+ - `generateText()` - Complete text generation with LLMs
103
+
104
+ #### Translation
105
+
106
+ - `translate()` - Text translation between languages
107
+
108
+ #### Summarization
109
+
110
+ - `summarize()` - Text summarization with configurable length
111
+
112
+ #### Fill-Mask
113
+
114
+ - `fillMask()` - Masked token prediction (BERT-style)
115
+
116
+ #### Question Answering
117
+
118
+ - `answerQuestion()` - Extractive question answering from context
119
+
120
+ #### OCR
121
+
122
+ - `extractText()` - Optical character recognition from images
123
+
124
+ #### Document QA
125
+
126
+ - `askDocument()` - Question answering on document images
127
+ - `askTable()` - Question answering on tabular data
128
+
129
+ #### Advanced RAG
130
+
131
+ - BM25 keyword search
132
+ - Hybrid search combining vector and keyword results
133
+ - Document loaders (Text, JSON, CSV, HTML)
134
+
135
+ ---
136
+
137
+ ## Quick Start
138
+
139
+ ### Semantic Search with PDF Documents
140
+
141
+ ```typescript
142
+ import { createVectorDB, embed, embedMany, chunk, rerank } from '@localmode/core';
143
+ import { transformers } from '@localmode/transformers';
144
+
145
+ // Create embedding model
146
+ const embeddingModel = transformers.embedding('Xenova/all-MiniLM-L6-v2');
147
+
148
+ // Create vector database
149
+ const db = await createVectorDB({
150
+ name: 'documents',
151
+ dimensions: 384,
152
+ });
153
+
154
+ // Chunk and embed document
155
+ const chunks = chunk(documentText, {
156
+ strategy: 'recursive',
157
+ size: 512,
158
+ overlap: 50,
159
+ });
160
+
161
+ const { embeddings } = await embedMany({
162
+ model: embeddingModel,
163
+ values: chunks.map((c) => c.text),
164
+ });
165
+
166
+ // Store in database
167
+ await db.addMany(
168
+ chunks.map((c, i) => ({
169
+ id: `chunk-${i}`,
170
+ vector: embeddings[i],
171
+ metadata: { text: c.text },
172
+ }))
173
+ );
174
+
175
+ // Search
176
+ const { embedding: queryVector } = await embed({
177
+ model: embeddingModel,
178
+ value: 'What is machine learning?',
179
+ });
180
+
181
+ const results = await db.search(queryVector, { k: 10 });
182
+
183
+ // Optional: Rerank for better accuracy
184
+ const rerankerModel = transformers.reranker('Xenova/ms-marco-MiniLM-L-6-v2');
185
+ const reranked = await rerank({
186
+ model: rerankerModel,
187
+ query: 'What is machine learning?',
188
+ documents: results.map((r) => r.metadata.text),
189
+ topK: 5,
190
+ });
191
+ ```
192
+
193
+ ### LLM Chat with Streaming
194
+
195
+ ```typescript
196
+ import { streamText } from '@localmode/core';
197
+ import { webllm } from '@localmode/webllm';
198
+
199
+ const model = webllm.languageModel('Llama-3.2-1B-Instruct-q4f16_1-MLC');
200
+
201
+ const result = await streamText({
202
+ model,
203
+ prompt: 'Explain quantum computing in simple terms',
204
+ maxTokens: 500,
205
+ });
206
+
207
+ for await (const chunk of result.stream) {
208
+ process.stdout.write(chunk.text);
209
+ }
210
+ ```
211
+
212
+ ---
213
+
214
+ ## Core Exports
215
+
216
+ ### Vector Database
217
+
218
+ ```typescript
219
+ import {
220
+ createVectorDB,
221
+ createVectorDBWithWorker,
222
+ HNSWIndex,
223
+ cosineSimilarity,
224
+ euclideanDistance,
225
+ dotProduct,
226
+ } from '@localmode/core';
227
+ ```
228
+
229
+ ### Embeddings
230
+
231
+ ```typescript
232
+ import {
233
+ embed,
234
+ embedMany,
235
+ streamEmbedMany,
236
+ semanticSearch,
237
+ wrapEmbeddingModel,
238
+ } from '@localmode/core';
239
+ ```
240
+
241
+ ### Reranking
242
+
243
+ ```typescript
244
+ import { rerank } from '@localmode/core';
245
+ ```
246
+
247
+ ### RAG Utilities
248
+
249
+ ```typescript
250
+ import {
251
+ // Chunking
252
+ chunk,
253
+ recursiveChunk,
254
+ markdownChunk,
255
+ codeChunk,
256
+ // BM25 (Coming Soon)
257
+ createBM25,
258
+ // Hybrid Search (Coming Soon)
259
+ hybridFuse,
260
+ reciprocalRankFusion,
261
+ // Ingestion
262
+ ingest,
263
+ } from '@localmode/core';
264
+ ```
265
+
266
+ ### Text Generation
267
+
268
+ ```typescript
269
+ import {
270
+ streamText, // ✅ Live
271
+ generateText, // 🚧 Coming Soon
272
+ } from '@localmode/core';
273
+ ```
274
+
275
+ ### Storage
276
+
277
+ ```typescript
278
+ import {
279
+ IndexedDBStorage,
280
+ MemoryStorage,
281
+ createStorage,
282
+ getStorageQuota,
283
+ requestPersistence,
284
+ cleanup,
285
+ } from '@localmode/core';
286
+ ```
287
+
288
+ ### Capabilities
289
+
290
+ ```typescript
291
+ import {
292
+ detectCapabilities,
293
+ isWebGPUSupported,
294
+ isIndexedDBSupported,
295
+ checkModelSupport,
296
+ getRecommendedFallbacks,
297
+ } from '@localmode/core';
298
+ ```
299
+
300
+ ### Classification (Coming Soon)
301
+
302
+ ```typescript
303
+ import { classify, classifyMany, classifyZeroShot } from '@localmode/core';
304
+ ```
305
+
306
+ ### NER (Coming Soon)
307
+
308
+ ```typescript
309
+ import { extractEntities, extractEntitiesMany } from '@localmode/core';
310
+ ```
311
+
312
+ ### Audio (Coming Soon)
313
+
314
+ ```typescript
315
+ import { transcribe, synthesizeSpeech } from '@localmode/core';
316
+ ```
317
+
318
+ ### Vision (Coming Soon)
319
+
320
+ ```typescript
321
+ import {
322
+ classifyImage,
323
+ classifyImageZeroShot,
324
+ captionImage,
325
+ segmentImage,
326
+ detectObjects,
327
+ extractImageFeatures,
328
+ imageToImage,
329
+ } from '@localmode/core';
330
+ ```
331
+
332
+ ### Translation (Coming Soon)
333
+
334
+ ```typescript
335
+ import { translate } from '@localmode/core';
336
+ ```
337
+
338
+ ### Summarization (Coming Soon)
339
+
340
+ ```typescript
341
+ import { summarize } from '@localmode/core';
342
+ ```
343
+
344
+ ### Fill-Mask (Coming Soon)
345
+
346
+ ```typescript
347
+ import { fillMask } from '@localmode/core';
348
+ ```
349
+
350
+ ### Question Answering (Coming Soon)
351
+
352
+ ```typescript
353
+ import { answerQuestion } from '@localmode/core';
354
+ ```
355
+
356
+ ### OCR (Coming Soon)
357
+
358
+ ```typescript
359
+ import { extractText } from '@localmode/core';
360
+ ```
361
+
362
+ ### Document QA (Coming Soon)
363
+
364
+ ```typescript
365
+ import { askDocument, askTable } from '@localmode/core';
366
+ ```
367
+
368
+ ### Middleware
369
+
370
+ ```typescript
371
+ import {
372
+ wrapEmbeddingModel,
373
+ wrapVectorDB,
374
+ cachingMiddleware,
375
+ loggingMiddleware,
376
+ retryMiddleware,
377
+ rateLimitMiddleware,
378
+ validationMiddleware,
379
+ piiRedactionMiddleware,
380
+ encryptionMiddleware,
381
+ } from '@localmode/core';
382
+ ```
383
+
384
+ ### Security
385
+
386
+ ```typescript
387
+ import { encrypt, decrypt, deriveKey, isCryptoSupported, redactPII } from '@localmode/core';
388
+ ```
389
+
390
+ ### Cross-Tab Sync
391
+
392
+ ```typescript
393
+ import { createBroadcaster, createLockManager, isWebLocksSupported } from '@localmode/core';
394
+ ```
395
+
396
+ ### Network
397
+
398
+ ```typescript
399
+ import {
400
+ getNetworkStatus,
401
+ onNetworkChange,
402
+ isOnline,
403
+ isOffline,
404
+ waitForOnline,
405
+ } from '@localmode/core';
406
+ ```
407
+
408
+ ### Events
409
+
410
+ ```typescript
411
+ import { createEventEmitter, globalEventBus } from '@localmode/core';
412
+ ```
413
+
414
+ ### Errors
415
+
416
+ ```typescript
417
+ import {
418
+ LocalModeError,
419
+ EmbeddingError,
420
+ ModelNotFoundError,
421
+ StorageError,
422
+ QuotaExceededError,
423
+ ValidationError,
424
+ formatErrorForUser,
425
+ } from '@localmode/core';
426
+ ```
427
+
428
+ ### Testing Utilities
429
+
430
+ ```typescript
431
+ import {
432
+ createMockEmbeddingModel,
433
+ createMockStorage,
434
+ createMockVectorDB,
435
+ createTestVector,
436
+ createSeededRandom,
437
+ } from '@localmode/core';
438
+ ```
439
+
440
+ ---
441
+
442
+ ## Architecture
443
+
444
+ ### Zero-Dependency Core
445
+
446
+ `@localmode/core` has **zero external dependencies**. All functionality is implemented using native browser APIs:
447
+
448
+ - **Vector Search**: Custom HNSW implementation
449
+ - **Storage**: IndexedDB + Memory fallback
450
+ - **Encryption**: Web Crypto API
451
+ - **Sync**: Web Locks + BroadcastChannel
452
+
453
+ ### Provider Pattern
454
+
455
+ Provider packages implement core interfaces:
456
+
457
+ ```typescript
458
+ // @localmode/transformers - HuggingFace Transformers.js
459
+ import { transformers } from '@localmode/transformers';
460
+ const embedder = transformers.embedding('Xenova/all-MiniLM-L6-v2');
461
+
462
+ // @localmode/webllm - WebLLM for local LLMs
463
+ import { webllm } from '@localmode/webllm';
464
+ const llm = webllm.languageModel('Llama-3.2-1B-Instruct-q4f16_1-MLC');
465
+ ```
466
+
467
+ ### Function-First API
468
+
469
+ All operations are exposed as top-level async functions:
470
+
471
+ ```typescript
472
+ // ✅ Correct: Top-level functions
473
+ const { embedding } = await embed({ model, value: 'Hello' });
474
+
475
+ // ❌ Wrong: Class methods
476
+ const embedder = new Embedder(model);
477
+ await embedder.embed('Hello');
478
+ ```
479
+
480
+ ### Options Object Pattern
481
+
482
+ All functions accept a single options object:
483
+
484
+ ```typescript
485
+ const result = await embed({
486
+ model: embeddingModel,
487
+ value: 'Hello world',
488
+ abortSignal: controller.signal, // Optional
489
+ });
490
+ ```
491
+
492
+ ### Structured Results
493
+
494
+ All functions return structured result objects:
495
+
496
+ ```typescript
497
+ interface EmbedResult {
498
+ embedding: Float32Array;
499
+ usage: { tokens: number };
500
+ response: { modelId: string; timestamp: Date };
501
+ }
502
+ ```
503
+
504
+ ---
505
+
506
+ ## User Extensibility
507
+
508
+ Implement any core interface to create custom providers:
509
+
510
+ ### Custom Storage
511
+
512
+ ```typescript
513
+ import type { Storage } from '@localmode/core';
514
+
515
+ class MyRedisStorage implements Storage {
516
+ async get(key: string) {
517
+ /* ... */
518
+ }
519
+ async set(key: string, value: StoredDocument) {
520
+ /* ... */
521
+ }
522
+ async delete(key: string) {
523
+ /* ... */
524
+ }
525
+ async keys() {
526
+ /* ... */
527
+ }
528
+ async clear() {
529
+ /* ... */
530
+ }
531
+ async close() {
532
+ /* ... */
533
+ }
534
+ }
535
+
536
+ const db = await createVectorDB({
537
+ storage: new MyRedisStorage(),
538
+ });
539
+ ```
540
+
541
+ ### Custom Embedding Model
542
+
543
+ ```typescript
544
+ import type { EmbeddingModel } from '@localmode/core';
545
+
546
+ class MyAPIEmbedder implements EmbeddingModel {
547
+ readonly modelId = 'custom:my-embedder';
548
+ readonly provider = 'custom';
549
+ readonly dimensions = 768;
550
+ readonly maxEmbeddingsPerCall = 100;
551
+ readonly supportsParallelCalls = true;
552
+
553
+ async doEmbed(options: DoEmbedOptions) {
554
+ // Your implementation
555
+ return {
556
+ embeddings: [new Float32Array(768)],
557
+ usage: { tokens: 10 },
558
+ };
559
+ }
560
+ }
561
+
562
+ const { embedding } = await embed({
563
+ model: new MyAPIEmbedder(),
564
+ value: 'Hello',
565
+ });
566
+ ```
567
+
568
+ ---
569
+
570
+ ## Browser Compatibility
571
+
572
+ | Browser | WebGPU | WASM | IndexedDB | Workers |
573
+ | ----------- | ------- | ---- | --------- | ------- |
574
+ | Chrome 80+ | 113+ | ✅ | ✅ | ✅ |
575
+ | Edge 80+ | 113+ | ✅ | ✅ | ✅ |
576
+ | Firefox 75+ | Nightly | ✅ | ✅ | ✅ |
577
+ | Safari 14+ | 18+ | ✅ | ✅ | ⚠️ |
578
+
579
+ ### Platform Notes
580
+
581
+ - **Safari/iOS**: Private browsing blocks IndexedDB → use `MemoryStorage`
582
+ - **Firefox**: WebGPU Nightly only → use WASM backend
583
+ - **SharedArrayBuffer**: Requires cross-origin isolation
584
+
585
+ ---
586
+
587
+ ## Privacy Guarantees
588
+
589
+ - **No telemetry** - We don't track anything
590
+ - **No network requests** - Core package makes zero network calls
591
+ - **Data stays local** - All processing happens in your browser
592
+ - **Open source** - Audit the code yourself
593
+
594
+ ---
595
+
596
+ ## Related Packages
597
+
598
+ | Package | Description |
599
+ | ------------------------- | ------------------------------------ |
600
+ | `@localmode/transformers` | HuggingFace Transformers.js provider |
601
+ | `@localmode/webllm` | WebLLM provider for local LLMs |
602
+ | `@localmode/pdfjs` | PDF text extraction |
603
+ | `@localmode/dexie` | Dexie.js storage adapter |
604
+ | `@localmode/idb` | idb storage adapter |
605
+ | `@localmode/localforage` | localForage storage adapter |
606
+
607
+ ---
608
+
609
+ ## License
610
+
611
+ [MIT](../../LICENSE)