emlet 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -67
- package/package.json +5 -2
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Emlet
|
|
2
2
|
|
|
3
|
-
> **
|
|
3
|
+
> **An embedding engine built for the sovereign web.**
|
|
4
4
|
|
|
5
5
|
[](https://www.npmjs.com/package/emlet)
|
|
6
6
|
[](https://gitlab.com/basedwon/emlet/-/pipelines)
|
|
@@ -12,19 +12,24 @@
|
|
|
12
12
|
[](https://twitter.com/basdwon)
|
|
13
13
|
[](https://discordapp.com/users/basedwon)
|
|
14
14
|
|
|
15
|
-
Emlet is a fast, fully self-contained semantic embedding
|
|
15
|
+
Emlet is a fast, fully self-contained semantic embedding engine designed to run anywhere JavaScript runs—browser, Node, edge, offline. No dependencies, no GPU, no network calls. Just load and embed.
|
|
16
16
|
|
|
17
|
-
The entire engine fits in
|
|
17
|
+
The entire engine fits in 1 MB and produces deterministic vector embeddings suitable for similarity search, clustering, retrieval, tagging, or downstream ML workflows.
|
|
18
18
|
|
|
19
19
|
## Features
|
|
20
20
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
21
|
+
- 100M parameters, ~1MB total size
|
|
22
|
+
- 7K tokens/sec throughput (in the browser)
|
|
23
|
+
- Deterministic output (same input → same vector)
|
|
24
|
+
- Out-of-vocabulary synthesis (no missing tokens)
|
|
25
|
+
- Unicode-aware (text, emoji, symbols, ZWJ)
|
|
26
|
+
- Configurable vector size (1-1568D)
|
|
27
|
+
- Offline-first, zero dependencies
|
|
28
|
+
- Vanilla JavaScript, edge-ready
|
|
29
|
+
- No GPU. No cloud. No API.
|
|
30
|
+
- Self-extracting runtime
|
|
31
|
+
- Neuro-symbolic core
|
|
32
|
+
- A digital familiar
|
|
28
33
|
|
|
29
34
|
## Installation
|
|
30
35
|
|
|
@@ -52,8 +57,6 @@ import emlet from 'emlet'
|
|
|
52
57
|
import { emlet, Emlet } from 'emlet'
|
|
53
58
|
```
|
|
54
59
|
|
|
55
|
-
Both styles are supported from the same file.
|
|
56
|
-
|
|
57
60
|
## Basic Usage
|
|
58
61
|
|
|
59
62
|
```js
|
|
@@ -69,8 +72,9 @@ The default export is a ready-to-use model instance.
|
|
|
69
72
|
You can create your own instance with a different output size:
|
|
70
73
|
|
|
71
74
|
```js
|
|
72
|
-
const
|
|
73
|
-
const
|
|
75
|
+
const modelA = new Emlet() // 96D default
|
|
76
|
+
const modelB = new Emlet(128) // 128D output
|
|
77
|
+
const modelC = new Emlet(256, true) // 256D head + 32D tail = 288D
|
|
74
78
|
```
|
|
75
79
|
|
|
76
80
|
### Constructor
|
|
@@ -120,56 +124,6 @@ emlet.embed('[')
|
|
|
120
124
|
|
|
121
125
|
This allows punctuation-level modeling when needed without polluting normal text embeddings.
|
|
122
126
|
|
|
123
|
-
## Common Patterns
|
|
124
|
-
|
|
125
|
-
### Text Chunking
|
|
126
|
-
|
|
127
|
-
```js
|
|
128
|
-
function chunkText(text, maxLen = 80) {
|
|
129
|
-
const words = text.split(/\s+/)
|
|
130
|
-
const chunks = []
|
|
131
|
-
let chunk = ''
|
|
132
|
-
|
|
133
|
-
for (let word of words) {
|
|
134
|
-
if ((chunk + ' ' + word).trim().length > maxLen) {
|
|
135
|
-
chunks.push(chunk.trim())
|
|
136
|
-
chunk = word
|
|
137
|
-
} else {
|
|
138
|
-
chunk += ' ' + word
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
if (chunk) chunks.push(chunk.trim())
|
|
143
|
-
return chunks
|
|
144
|
-
}
|
|
145
|
-
```
|
|
146
|
-
|
|
147
|
-
### Cosine Similarity
|
|
148
|
-
|
|
149
|
-
```js
|
|
150
|
-
function cosineSim(a, b) {
|
|
151
|
-
const dot = a.reduce((s, v, i) => s + v * b[i], 0)
|
|
152
|
-
const normA = Math.sqrt(a.reduce((s, v) => s + v * v, 0))
|
|
153
|
-
const normB = Math.sqrt(b.reduce((s, v) => s + v * v, 0))
|
|
154
|
-
return dot / (normA * normB + 1e-8)
|
|
155
|
-
}
|
|
156
|
-
```
|
|
157
|
-
|
|
158
|
-
### Top-K Similarity Search
|
|
159
|
-
|
|
160
|
-
```js
|
|
161
|
-
function topKSimilar(input, options, k = 5) {
|
|
162
|
-
const base = emlet.embed(input)
|
|
163
|
-
return options
|
|
164
|
-
.map(text => ({
|
|
165
|
-
text,
|
|
166
|
-
score: cosineSim(base, emlet.embed(text))
|
|
167
|
-
}))
|
|
168
|
-
.sort((a, b) => b.score - a.score)
|
|
169
|
-
.slice(0, k)
|
|
170
|
-
}
|
|
171
|
-
```
|
|
172
|
-
|
|
173
127
|
## API Surface
|
|
174
128
|
|
|
175
129
|
Emlet intentionally exposes a minimal API:
|
|
@@ -179,17 +133,22 @@ Emlet intentionally exposes a minimal API:
|
|
|
179
133
|
|
|
180
134
|
Everything else—chunking, similarity, indexing, clustering—is left to userland.
|
|
181
135
|
|
|
136
|
+
## Examples
|
|
137
|
+
|
|
138
|
+
See [`test.js`](./test.js) for example usage including batch encoding, similarity math, and vector inspection.
|
|
139
|
+
|
|
140
|
+
|
|
182
141
|
## Testing
|
|
183
142
|
|
|
184
143
|
Emlet includes a test suite built with [testr](https://npmjs.com/package/@basd/testr).
|
|
185
144
|
|
|
186
|
-
To run the test, first clone the
|
|
145
|
+
To run the test, first clone the repository:
|
|
187
146
|
|
|
188
147
|
```sh
|
|
189
148
|
git clone https://github.com/basedwon/emlet.git
|
|
190
149
|
```
|
|
191
150
|
|
|
192
|
-
Install the
|
|
151
|
+
Install the dependencies, then run `npm test`:
|
|
193
152
|
|
|
194
153
|
```bash
|
|
195
154
|
npm install
|
|
@@ -198,7 +157,7 @@ npm test
|
|
|
198
157
|
|
|
199
158
|
## Donations
|
|
200
159
|
|
|
201
|
-
If
|
|
160
|
+
If Emlet sparks something useful in your work, consider sending some coin to support further development.
|
|
202
161
|
|
|
203
162
|
**Bitcoin (BTC):**
|
|
204
163
|
```
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "emlet",
|
|
3
|
-
"version": "0.1.
|
|
4
|
-
"description": "
|
|
3
|
+
"version": "0.1.1",
|
|
4
|
+
"description": "An embedding engine built for the sovereign web.",
|
|
5
5
|
"main": "emlet.js",
|
|
6
6
|
"exports": {
|
|
7
7
|
".": "./emlet.js"
|
|
@@ -32,6 +32,7 @@
|
|
|
32
32
|
"ai",
|
|
33
33
|
"browser",
|
|
34
34
|
"compact-model",
|
|
35
|
+
"deterministic",
|
|
35
36
|
"emoji",
|
|
36
37
|
"embedding",
|
|
37
38
|
"embedding-model",
|
|
@@ -39,6 +40,7 @@
|
|
|
39
40
|
"javascript",
|
|
40
41
|
"ml",
|
|
41
42
|
"model",
|
|
43
|
+
"neuro-symbolic",
|
|
42
44
|
"nlp",
|
|
43
45
|
"no-dependencies",
|
|
44
46
|
"obfuscated",
|
|
@@ -53,6 +55,7 @@
|
|
|
53
55
|
"tokenizer",
|
|
54
56
|
"vec",
|
|
55
57
|
"vector",
|
|
58
|
+
"vector-embeddings",
|
|
56
59
|
"emlet"
|
|
57
60
|
]
|
|
58
61
|
}
|