pixmap-engine 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(pkill -f \"next dev\")",
5
+ "Read(//private/tmp/**)",
6
+ "Bash(npm rebuild:*)",
7
+ "Bash(rm:*)",
8
+ "Bash(grep:*)",
9
+ "Bash(npm uninstall:*)",
10
+ "Bash(npm run:*)",
11
+ "Bash(npm link:*)",
12
+ "Bash(pixmap status:*)"
13
+ ]
14
+ }
15
+ }
package/README.md ADDED
@@ -0,0 +1,103 @@
1
+ # pixmap
2
+
3
+ Local image similarity search using CLIP embeddings, HNSW indexing, and SQLite. Everything runs on your machine — no cloud, no API keys, your images stay private.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ npm install pixmap
9
+ ```
10
+
11
+ Requires Node.js 18+. Native modules (`sharp`, `better-sqlite3`, `hnswlib-node`) compile during install.
12
+
13
+ ## Usage
14
+
15
+ ### Command Line
16
+
17
+ ```bash
18
+ pixmap add ./photo.jpg # index a single image
19
+ pixmap add ./photos/ # index a directory (recursive)
20
+ pixmap search ./query.jpg # find similar images
21
+ pixmap search ./query.jpg -k 10 # return more results
22
+ pixmap list # show indexed images
23
+ pixmap status # index stats
24
+ ```
25
+ ### Terminal View
26
+ <img src="https://github.com/user-attachments/assets/0cfc7f0f-86b5-4a80-baaa-10feb28f811e" width="580" alt="Image"/>
27
+
28
+
29
+ ### As a library
30
+
31
+ ```typescript
32
+ import { PixmapEngine } from "pixmap";
33
+
34
+ const engine = new PixmapEngine({ dataDir: "./data" });
35
+ await engine.init();
36
+
37
+ await engine.add("./photos/dog.jpg");
38
+ await engine.add("./photos/cat.png");
39
+
40
+ const results = await engine.search("./query.jpg", 5);
41
+
42
+ for (const r of results) {
43
+ console.log(`${r.path} — ${(r.score * 100).toFixed(1)}%`);
44
+ }
45
+ ```
46
+
47
+ Options:
48
+
49
+ - `-d, --data-dir <path>` — where to store index and db (default: `./data`)
50
+ - `-k, --top-k <n>` — number of results (default: `5`)
51
+
52
+ ## API
53
+
54
+ ### `new PixmapEngine(options)`
55
+
56
+ ```typescript
57
+ {
58
+ dataDir: string; // where to store index.hnsw and metadata.db
59
+ indexFileName?: string; // default: "index.hnsw"
60
+ dbFileName?: string; // default: "metadata.db"
61
+ }
62
+ ```
63
+
64
+ ### Methods
65
+
66
+ - `**init()**` — loads the CLIP model, opens or creates the HNSW index and SQLite db. Must be called before anything else.
67
+ - `**add(imagePath)**` — indexes an image. Returns `{ id, skipped, record }`. If the image was already indexed, `skipped` is `true`.
68
+ - `**search(imagePath, topK?)**` — embeds the query image and returns the top-K most similar indexed images. Default K is 5.
69
+ - `**listImages(limit?)**` — returns indexed images, newest first.
70
+ - `**getImage(id)**` — look up a single image by its id.
71
+ - `**getIndexedCount()**` — total number of indexed images.
72
+
73
+ ## How it works
74
+
75
+ Each image is resized to 224x224 with Sharp, run through a CLIP vision model (ViT-B/32, via ONNX), producing a 512-dimensional embedding vector. That vector goes into an HNSW index for fast nearest-neighbor lookup. File paths, IDs, and timestamps are tracked in a SQLite database.
76
+
77
+ Searching works the same way: embed the query image, ask HNSW for the closest vectors, look up the metadata.
78
+
79
+ See [docs/HOW.md](docs/HOW.md) for the full technical breakdown.
80
+
81
+ ## What gets stored
82
+
83
+ Everything lives in your `dataDir`:
84
+
85
+ - `index.hnsw` — the vector index (binary, hnswlib format)
86
+ - `metadata.db` — SQLite database with image paths and metadata
87
+
88
+ About ~2KB per indexed image. A 100K image index is roughly 220MB.
89
+
90
+ ## Supported formats
91
+
92
+ JPEG, PNG, WebP, BMP, GIF, AVIF, TIFF — anything Sharp can read.
93
+
94
+ ## Dependencies
95
+
96
+ - `[@xenova/transformers](https://github.com/xenova/transformers.js)` — runs the CLIP model locally via ONNX
97
+ - `[sharp](https://github.com/lovell/sharp)` — image resize and preprocessing
98
+ - `[hnswlib-node](https://github.com/yoshoku/hnswlib-node)` — approximate nearest neighbor index
99
+ - `[better-sqlite3](https://github.com/WiseLibs/better-sqlite3)` — metadata storage
100
+
101
+ ## License
102
+
103
+ MIT
@@ -0,0 +1,75 @@
1
+ import sharp from 'sharp';
2
+ import { RawImage, pipeline } from '@xenova/transformers';
3
+ import { VECTOR_DIMENSIONS } from './types.js';
4
+ export class ImageEmbedder {
5
+ extractor = null;
6
+ async init() {
7
+ this.extractor = (await pipeline('image-feature-extraction', 'Xenova/clip-vit-base-patch32'));
8
+ }
9
+ async embed(imagePath) {
10
+ if (!this.extractor) {
11
+ throw new Error('ImageEmbedder is not initialized. Call init() first.');
12
+ }
13
+ const preprocessed = await sharp(imagePath)
14
+ .resize(224, 224, { fit: 'cover' })
15
+ .removeAlpha()
16
+ .toFormat('png')
17
+ .toBuffer();
18
+ const image = await this.bufferToRawImage(preprocessed);
19
+ const output = await this.extractor(image);
20
+ const vector = this.extractVector(output);
21
+ if (vector.length !== VECTOR_DIMENSIONS) {
22
+ throw new Error(`Unexpected embedding dimensions: ${vector.length}. Expected ${VECTOR_DIMENSIONS}.`);
23
+ }
24
+ return this.l2Normalize(vector);
25
+ }
26
+ async bufferToRawImage(buffer) {
27
+ const raw = RawImage;
28
+ if (typeof raw.fromBlob === 'function') {
29
+ const view = Uint8Array.from(buffer);
30
+ return raw.fromBlob(new Blob([view], { type: 'image/png' }));
31
+ }
32
+ if (typeof raw.fromBuffer === 'function') {
33
+ return raw.fromBuffer(buffer);
34
+ }
35
+ if (typeof raw.read === 'function') {
36
+ return raw.read(buffer);
37
+ }
38
+ throw new Error('No compatible RawImage constructor found in @xenova/transformers.');
39
+ }
40
+ extractVector(output) {
41
+ let data;
42
+ if (output instanceof Float32Array) {
43
+ data = output;
44
+ }
45
+ else if (Array.isArray(output) && output.every((x) => typeof x === 'number')) {
46
+ data = output;
47
+ }
48
+ else if (typeof output === 'object' && output !== null) {
49
+ const maybeObject = output;
50
+ if (maybeObject.data) {
51
+ data = maybeObject.data;
52
+ }
53
+ else if (Array.isArray(output)) {
54
+ const first = output[0];
55
+ data = first?.data;
56
+ }
57
+ }
58
+ if (!data) {
59
+ throw new Error('Failed to extract vector data from model output.');
60
+ }
61
+ return data instanceof Float32Array ? data : new Float32Array(data);
62
+ }
63
+ l2Normalize(vector) {
64
+ let sumSquares = 0;
65
+ for (let i = 0; i < vector.length; i += 1) {
66
+ sumSquares += vector[i] * vector[i];
67
+ }
68
+ const norm = Math.sqrt(sumSquares) || 1;
69
+ const normalized = new Float32Array(vector.length);
70
+ for (let i = 0; i < vector.length; i += 1) {
71
+ normalized[i] = vector[i] / norm;
72
+ }
73
+ return normalized;
74
+ }
75
+ }
package/dist/engine.js ADDED
@@ -0,0 +1,66 @@
1
+ import fs from 'node:fs';
2
+ import path from 'node:path';
3
+ import { DEFAULT_TOP_K } from './types.js';
4
+ export class PixmapEngine {
5
+ dataDir;
6
+ indexPath;
7
+ dbPath;
8
+ embedder = null;
9
+ store = null;
10
+ db = null;
11
+ constructor(options) {
12
+ this.dataDir = path.resolve(options.dataDir);
13
+ this.indexPath = path.join(this.dataDir, options.indexFileName ?? 'index.hnsw');
14
+ this.dbPath = path.join(this.dataDir, options.dbFileName ?? 'metadata.db');
15
+ }
16
+ async init() {
17
+ fs.mkdirSync(this.dataDir, { recursive: true });
18
+ const [{ ImageEmbedder }, { VectorStore }, { MetadataDb }] = await Promise.all([
19
+ import('./embedder.js'),
20
+ import('./vectorStore.js'),
21
+ import('./metadataDb.js'),
22
+ ]);
23
+ this.embedder = new ImageEmbedder();
24
+ await this.embedder.init();
25
+ this.store = new VectorStore();
26
+ this.store.initOrLoad(this.indexPath);
27
+ this.db = new MetadataDb(this.dbPath);
28
+ }
29
+ async add(imagePath) {
30
+ this.assertInitialized();
31
+ const absolute = path.resolve(imagePath);
32
+ const { addImage } = await import('./indexer.js');
33
+ const result = await addImage(absolute, this.embedder, this.store, this.db);
34
+ this.store.save(this.indexPath);
35
+ return {
36
+ ...result,
37
+ record: this.db.get(result.id),
38
+ };
39
+ }
40
+ async search(queryImagePath, topK = DEFAULT_TOP_K) {
41
+ this.assertInitialized();
42
+ const absolute = path.resolve(queryImagePath);
43
+ if (this.store.getCount() === 0) {
44
+ return [];
45
+ }
46
+ const { findSimilar } = await import('./searcher.js');
47
+ return findSimilar(absolute, this.embedder, this.store, this.db, topK);
48
+ }
49
+ listImages(limit = 200) {
50
+ this.assertInitialized();
51
+ return this.db.list(limit);
52
+ }
53
+ getImage(id) {
54
+ this.assertInitialized();
55
+ return this.db.get(id);
56
+ }
57
+ getIndexedCount() {
58
+ this.assertInitialized();
59
+ return this.store.getCount();
60
+ }
61
+ assertInitialized() {
62
+ if (!this.embedder || !this.store || !this.db) {
63
+ throw new Error('PixmapEngine not initialized. Call init() first.');
64
+ }
65
+ }
66
+ }
package/dist/index.js ADDED
@@ -0,0 +1,259 @@
1
+ #!/usr/bin/env node
2
+ import fs from 'node:fs';
3
+ import path from 'node:path';
4
+ import { ImageEmbedder } from './embedder.js';
5
+ import { addImage } from './indexer.js';
6
+ import { MetadataDb } from './metadataDb.js';
7
+ import { renderImage, renderImageRow } from './preview.js';
8
+ import { findSimilar } from './searcher.js';
9
+ import { DEFAULT_TOP_K } from './types.js';
10
+ import { VectorStore } from './vectorStore.js';
11
+ const IMAGE_EXTENSIONS = new Set(['.jpg', '.jpeg', '.png', '.webp', '.bmp', '.gif', '.avif', '.tiff']);
12
+ async function main() {
13
+ const args = process.argv.slice(2);
14
+ if (args.length === 0 || args[0] === '--help' || args[0] === '-h') {
15
+ printUsage();
16
+ return;
17
+ }
18
+ const command = args[0];
19
+ const rest = args.slice(1);
20
+ const options = parseOptions(rest);
21
+ const dataDir = path.resolve(options.dataDir);
22
+ fs.mkdirSync(dataDir, { recursive: true });
23
+ const dbPath = path.join(dataDir, 'metadata.db');
24
+ const indexPath = path.join(dataDir, 'index.hnsw');
25
+ if (command === 'list') {
26
+ const db = new MetadataDb(dbPath);
27
+ const images = db.list(200);
28
+ if (images.length === 0) {
29
+ console.log('No indexed images.');
30
+ return;
31
+ }
32
+ console.log(`\n Indexed images (${images.length}):\n`);
33
+ for (const img of images) {
34
+ const date = new Date(img.created * 1000).toISOString().slice(0, 19);
35
+ const name = path.basename(img.path);
36
+ console.log(` [${img.id}] ${date} ${name}`);
37
+ }
38
+ console.log();
39
+ return;
40
+ }
41
+ if (command === 'show') {
42
+ const target = rest.find((x) => !x.startsWith('--'));
43
+ if (!target) {
44
+ console.error('Error: Provide an image path or id.\n');
45
+ process.exitCode = 1;
46
+ return;
47
+ }
48
+ let imagePath;
49
+ const asId = Number(target);
50
+ if (Number.isFinite(asId) && asId > 0) {
51
+ const db = new MetadataDb(dbPath);
52
+ const record = db.get(asId);
53
+ if (!record) {
54
+ console.error(`Error: No image with id ${asId}`);
55
+ process.exitCode = 1;
56
+ return;
57
+ }
58
+ imagePath = record.path;
59
+ }
60
+ else {
61
+ imagePath = path.resolve(target);
62
+ }
63
+ assertImageExists(imagePath);
64
+ console.log();
65
+ const preview = await renderImage(imagePath);
66
+ console.log(preview);
67
+ console.log(` ${path.basename(imagePath)}\n`);
68
+ return;
69
+ }
70
+ if (command === 'status') {
71
+ const db = new MetadataDb(dbPath);
72
+ const store = new VectorStore();
73
+ store.initOrLoad(indexPath);
74
+ const images = db.list(999999);
75
+ console.log(`\n pixmap status`);
76
+ console.log(` data dir: ${dataDir}`);
77
+ console.log(` vectors: ${store.getCount()}`);
78
+ console.log(` images: ${images.length}`);
79
+ console.log();
80
+ return;
81
+ }
82
+ // Commands below require the embedder
83
+ console.log('Loading CLIP model...');
84
+ const embedder = new ImageEmbedder();
85
+ await embedder.init();
86
+ const store = new VectorStore();
87
+ store.initOrLoad(indexPath);
88
+ const db = new MetadataDb(dbPath);
89
+ if (command === 'add') {
90
+ const targets = rest.filter((x) => !x.startsWith('--'));
91
+ if (targets.length === 0) {
92
+ console.error('Error: No image path provided.\n');
93
+ printUsage();
94
+ process.exitCode = 1;
95
+ return;
96
+ }
97
+ let added = 0;
98
+ let skipped = 0;
99
+ for (const target of targets) {
100
+ const absolutePath = path.resolve(target);
101
+ const stat = safeStat(absolutePath);
102
+ if (!stat) {
103
+ console.error(` skip: not found — ${absolutePath}`);
104
+ continue;
105
+ }
106
+ const files = stat.isDirectory() ? walkImages(absolutePath) : [absolutePath];
107
+ for (const file of files) {
108
+ const result = await addImage(file, embedder, store, db);
109
+ if (result.skipped) {
110
+ skipped += 1;
111
+ console.log(` skip: already indexed — ${path.basename(file)}`);
112
+ }
113
+ else {
114
+ added += 1;
115
+ console.log(` added: id=${result.id} — ${path.basename(file)}`);
116
+ }
117
+ }
118
+ }
119
+ store.save(indexPath);
120
+ console.log(`\nDone. added=${added} skipped=${skipped}`);
121
+ return;
122
+ }
123
+ if (command === 'search' || command === 'similar') {
124
+ const imagePath = rest.find((x) => !x.startsWith('--'));
125
+ if (!imagePath) {
126
+ console.error('Error: No query image provided.\n');
127
+ printUsage();
128
+ process.exitCode = 1;
129
+ return;
130
+ }
131
+ const absolutePath = path.resolve(imagePath);
132
+ assertImageExists(absolutePath);
133
+ if (store.getCount() === 0) {
134
+ console.log('Index is empty. Add images first with: pixmap add <path>');
135
+ return;
136
+ }
137
+ // Show query image preview
138
+ console.log(`\n Query:\n`);
139
+ const queryPreview = await renderImage(absolutePath, undefined, 20);
140
+ console.log(queryPreview);
141
+ console.log(` ${path.basename(absolutePath)}\n`);
142
+ const results = await findSimilar(absolutePath, embedder, store, db, options.topK);
143
+ if (results.length === 0) {
144
+ console.log(' No similar images found.\n');
145
+ return;
146
+ }
147
+ console.log(` ── Results (${results.length}) ──\n`);
148
+ const PER_ROW = 3;
149
+ for (let i = 0; i < results.length; i += PER_ROW) {
150
+ const batch = results.slice(i, i + PER_ROW);
151
+ const panels = batch
152
+ .map((r, j) => {
153
+ const pct = (r.score * 100).toFixed(1);
154
+ const name = path.basename(r.path);
155
+ return fs.existsSync(r.path)
156
+ ? { label: `#${i + j + 1} ${pct}% ${name}`, imagePath: r.path }
157
+ : null;
158
+ })
159
+ .filter((p) => p !== null);
160
+ if (panels.length > 0) {
161
+ const row = await renderImageRow(panels);
162
+ console.log(row);
163
+ console.log();
164
+ }
165
+ }
166
+ return;
167
+ }
168
+ console.error(`Unknown command: ${command}\n`);
169
+ printUsage();
170
+ process.exitCode = 1;
171
+ }
172
+ function parseOptions(args) {
173
+ let dataDir = 'data';
174
+ let topK = DEFAULT_TOP_K;
175
+ for (let i = 0; i < args.length; i += 1) {
176
+ const arg = args[i];
177
+ if (arg === '--data-dir' || arg === '-d') {
178
+ dataDir = args[i + 1] ?? dataDir;
179
+ i += 1;
180
+ continue;
181
+ }
182
+ if (arg === '--top-k' || arg === '-k') {
183
+ const parsed = Number(args[i + 1]);
184
+ if (!Number.isNaN(parsed) && parsed > 0) {
185
+ topK = Math.floor(parsed);
186
+ }
187
+ i += 1;
188
+ continue;
189
+ }
190
+ }
191
+ return { dataDir, topK };
192
+ }
193
+ function walkImages(rootDir) {
194
+ const result = [];
195
+ const stack = [rootDir];
196
+ while (stack.length > 0) {
197
+ const current = stack.pop();
198
+ if (!current)
199
+ continue;
200
+ const entries = fs.readdirSync(current, { withFileTypes: true });
201
+ for (const entry of entries) {
202
+ const fullPath = path.join(current, entry.name);
203
+ if (entry.isDirectory()) {
204
+ stack.push(fullPath);
205
+ continue;
206
+ }
207
+ const ext = path.extname(entry.name).toLowerCase();
208
+ if (IMAGE_EXTENSIONS.has(ext)) {
209
+ result.push(fullPath);
210
+ }
211
+ }
212
+ }
213
+ return result.sort();
214
+ }
215
+ function safeStat(p) {
216
+ try {
217
+ return fs.statSync(p);
218
+ }
219
+ catch {
220
+ return null;
221
+ }
222
+ }
223
+ function assertImageExists(imagePath) {
224
+ if (!fs.existsSync(imagePath)) {
225
+ throw new Error(`File not found: ${imagePath}`);
226
+ }
227
+ if (!fs.statSync(imagePath).isFile()) {
228
+ throw new Error(`Not a file: ${imagePath}`);
229
+ }
230
+ }
231
+ function printUsage() {
232
+ console.log(`
233
+ pixmap — local image similarity search
234
+
235
+ Usage:
236
+ pixmap add <image|dir> [...] Add image(s) or directory to the index
237
+ pixmap search <image> [-k N] Find similar indexed images
238
+ pixmap show <image|id> Preview an image in the terminal
239
+ pixmap list Show all indexed images
240
+ pixmap status Show index stats
241
+
242
+ Options:
243
+ -d, --data-dir <path> Data directory (default: ./data)
244
+ -k, --top-k <number> Number of results (default: ${DEFAULT_TOP_K})
245
+ -h, --help Show this help
246
+
247
+ Examples:
248
+ pixmap add photo.jpg
249
+ pixmap add ./photos/
250
+ pixmap search query.png -k 10
251
+ pixmap show 3
252
+ pixmap list
253
+ `);
254
+ }
255
+ main().catch((error) => {
256
+ const message = error instanceof Error ? error.message : String(error);
257
+ console.error(`Error: ${message}`);
258
+ process.exitCode = 1;
259
+ });
@@ -0,0 +1,10 @@
1
+ export async function addImage(imagePath, embedder, store, db) {
2
+ const record = db.upsert(imagePath);
3
+ if (record.indexed) {
4
+ return { id: record.id, skipped: true };
5
+ }
6
+ const vector = await embedder.embed(imagePath);
7
+ store.add(record.id, vector);
8
+ db.markIndexed(record.id);
9
+ return { id: record.id, skipped: false };
10
+ }
@@ -0,0 +1,69 @@
1
+ import Database from 'better-sqlite3';
2
+ export class MetadataDb {
3
+ db;
4
+ constructor(path) {
5
+ this.db = new Database(path);
6
+ this.db.pragma('journal_mode = WAL');
7
+ this.migrateSchema();
8
+ this.db.exec(`
9
+ CREATE TABLE IF NOT EXISTS images (
10
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
11
+ path TEXT NOT NULL UNIQUE,
12
+ indexed INTEGER NOT NULL DEFAULT 0,
13
+ created INTEGER NOT NULL DEFAULT (strftime('%s','now'))
14
+ )
15
+ `);
16
+ }
17
+ upsert(path) {
18
+ const existing = this.findByPath(path);
19
+ if (existing) {
20
+ return { id: existing.id, indexed: existing.indexed === 1 };
21
+ }
22
+ const result = this.db.prepare('INSERT INTO images (path, indexed) VALUES (?, 0)').run(path);
23
+ return { id: Number(result.lastInsertRowid), indexed: false };
24
+ }
25
+ markIndexed(id) {
26
+ this.db.prepare('UPDATE images SET indexed = 1 WHERE id = ?').run(id);
27
+ }
28
+ get(id) {
29
+ return this.db.prepare('SELECT * FROM images WHERE id = ?').get(id);
30
+ }
31
+ list(limit = 200) {
32
+ return this.db
33
+ .prepare('SELECT * FROM images WHERE indexed = 1 ORDER BY created DESC, id DESC LIMIT ?')
34
+ .all(limit);
35
+ }
36
+ findByPath(path) {
37
+ return this.db.prepare('SELECT * FROM images WHERE path = ?').get(path);
38
+ }
39
+ migrateSchema() {
40
+ const exists = this.db
41
+ .prepare("SELECT name FROM sqlite_master WHERE type = 'table' AND name = 'images'")
42
+ .get();
43
+ if (!exists) {
44
+ return;
45
+ }
46
+ const columns = this.db
47
+ .prepare("PRAGMA table_info('images')")
48
+ .all();
49
+ const allowed = new Set(['id', 'path', 'indexed', 'created']);
50
+ const isCurrentSchema = columns.length === allowed.size && columns.every((c) => allowed.has(c.name));
51
+ if (isCurrentSchema) {
52
+ return;
53
+ }
54
+ this.db.exec(`
55
+ BEGIN;
56
+ CREATE TABLE images_new (
57
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
58
+ path TEXT NOT NULL UNIQUE,
59
+ indexed INTEGER NOT NULL DEFAULT 0,
60
+ created INTEGER NOT NULL DEFAULT (strftime('%s','now'))
61
+ );
62
+ INSERT INTO images_new (id, path, indexed, created)
63
+ SELECT id, path, indexed, created FROM images;
64
+ DROP TABLE images;
65
+ ALTER TABLE images_new RENAME TO images;
66
+ COMMIT;
67
+ `);
68
+ }
69
+ }