@classytic/mongokit 3.1.6 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +89 -4
- package/dist/actions/index.d.ts +2 -2
- package/dist/actions/index.js +5 -3
- package/dist/ai/index.d.ts +175 -0
- package/dist/ai/index.js +206 -0
- package/dist/chunks/{chunk-M2XHQGZB.js → chunk-44KXLGPO.js} +28 -1
- package/dist/chunks/{chunk-CSLJ2PL2.js → chunk-DEVXDBRL.js} +143 -9
- package/dist/chunks/{chunk-CF6FLC2G.js → chunk-I7CWNAJB.js} +1 -1
- package/dist/chunks/chunk-JWUAVZ3L.js +8 -0
- package/dist/chunks/{chunk-IT7DCOKR.js → chunk-UE2IEXZJ.js} +15 -8
- package/dist/chunks/chunk-URLJFIR7.js +22 -0
- package/dist/chunks/{chunk-SAKSLT47.js → chunk-VWKIKZYF.js} +274 -7
- package/dist/chunks/chunk-WSFCRVEQ.js +7 -0
- package/dist/{index-BXSSv1pW.d.ts → index-BDn5fSTE.d.ts} +13 -1
- package/dist/index.d.ts +146 -41
- package/dist/index.js +294 -296
- package/dist/{mongooseToJsonSchema-Cc5AwuDu.d.ts → mongooseToJsonSchema-CaRF_bCN.d.ts} +33 -2
- package/dist/pagination/PaginationEngine.d.ts +1 -1
- package/dist/pagination/PaginationEngine.js +3 -2
- package/dist/plugins/index.d.ts +125 -2
- package/dist/plugins/index.js +5 -3
- package/dist/{types-B5Uv6Ak7.d.ts → types-Jni1KgkP.d.ts} +18 -11
- package/dist/utils/index.d.ts +2 -2
- package/dist/utils/index.js +4 -2
- package/package.json +10 -2
- package/dist/chunks/chunk-VJXDGP3C.js +0 -14
package/README.md
CHANGED
|
@@ -12,9 +12,10 @@
|
|
|
12
12
|
- **Zero dependencies** - Only Mongoose as peer dependency
|
|
13
13
|
- **Smart pagination** - Auto-detects offset vs cursor-based
|
|
14
14
|
- **Event-driven** - Pre/post hooks for all operations
|
|
15
|
-
- **
|
|
15
|
+
- **14 built-in plugins** - Caching, soft delete, validation, multi-tenant, observability, and more
|
|
16
|
+
- **Vector search** - MongoDB Atlas `$vectorSearch` with auto-embedding and multimodal support
|
|
16
17
|
- **TypeScript first** - Full type safety with discriminated unions
|
|
17
|
-
- **
|
|
18
|
+
- **547 passing tests** - Battle-tested and production-ready
|
|
18
19
|
|
|
19
20
|
## Installation
|
|
20
21
|
|
|
@@ -22,7 +23,7 @@
|
|
|
22
23
|
npm install @classytic/mongokit mongoose
|
|
23
24
|
```
|
|
24
25
|
|
|
25
|
-
> Supports Mongoose `^
|
|
26
|
+
> Supports Mongoose `^9.0.0`
|
|
26
27
|
|
|
27
28
|
## Quick Start
|
|
28
29
|
|
|
@@ -186,6 +187,8 @@ const repo = new Repository(UserModel, [
|
|
|
186
187
|
| `batchOperationsPlugin()` | Adds `updateMany`, `deleteMany` |
|
|
187
188
|
| `aggregateHelpersPlugin()` | Adds `groupBy`, `sum`, `average`, etc. |
|
|
188
189
|
| `subdocumentPlugin()` | Manage subdocument arrays |
|
|
190
|
+
| `multiTenantPlugin(opts)` | Auto-inject tenant isolation on all operations |
|
|
191
|
+
| `observabilityPlugin(opts)` | Operation timing, metrics, slow query detection |
|
|
189
192
|
|
|
190
193
|
### Soft Delete
|
|
191
194
|
|
|
@@ -297,6 +300,88 @@ const repo = new Repository(UserModel, [
|
|
|
297
300
|
]);
|
|
298
301
|
```
|
|
299
302
|
|
|
303
|
+
### Multi-Tenant
|
|
304
|
+
|
|
305
|
+
```javascript
|
|
306
|
+
import { multiTenantPlugin } from '@classytic/mongokit';
|
|
307
|
+
|
|
308
|
+
const repo = new Repository(UserModel, [
|
|
309
|
+
multiTenantPlugin({
|
|
310
|
+
tenantField: 'organizationId',
|
|
311
|
+
contextKey: 'organizationId', // reads from context
|
|
312
|
+
required: true,
|
|
313
|
+
})
|
|
314
|
+
]);
|
|
315
|
+
|
|
316
|
+
// All operations are automatically scoped to the tenant
|
|
317
|
+
const users = await repo.getAll({ organizationId: 'org_123' });
|
|
318
|
+
await repo.update(userId, { name: 'New' }, { organizationId: 'org_123' });
|
|
319
|
+
// Cross-tenant update/delete is blocked — returns "not found"
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
### Observability
|
|
323
|
+
|
|
324
|
+
```javascript
|
|
325
|
+
import { observabilityPlugin } from '@classytic/mongokit';
|
|
326
|
+
|
|
327
|
+
const repo = new Repository(UserModel, [
|
|
328
|
+
observabilityPlugin({
|
|
329
|
+
onMetric: (metric) => {
|
|
330
|
+
// Send to DataDog, New Relic, OpenTelemetry, etc.
|
|
331
|
+
statsd.histogram(`mongokit.${metric.operation}`, metric.duration);
|
|
332
|
+
},
|
|
333
|
+
slowThresholdMs: 200, // log operations slower than 200ms
|
|
334
|
+
})
|
|
335
|
+
]);
|
|
336
|
+
```
|
|
337
|
+
|
|
338
|
+
### Vector Search (Atlas)
|
|
339
|
+
|
|
340
|
+
```javascript
|
|
341
|
+
import { vectorPlugin } from '@classytic/mongokit/ai';
|
|
342
|
+
|
|
343
|
+
const repo = new Repository(ProductModel, [
|
|
344
|
+
methodRegistryPlugin(),
|
|
345
|
+
vectorPlugin({
|
|
346
|
+
fields: [{
|
|
347
|
+
path: 'embedding',
|
|
348
|
+
index: 'vector_index',
|
|
349
|
+
dimensions: 1536,
|
|
350
|
+
sourceFields: ['title', 'description'],
|
|
351
|
+
}],
|
|
352
|
+
embedFn: async ({ text }) =>
|
|
353
|
+
openai.embeddings.create({ input: text, model: 'text-embedding-3-small' })
|
|
354
|
+
.then(r => r.data[0].embedding),
|
|
355
|
+
autoEmbed: true,
|
|
356
|
+
onEmbedError: (err) => console.warn('Embed failed:', err.message),
|
|
357
|
+
}),
|
|
358
|
+
]);
|
|
359
|
+
|
|
360
|
+
// Search by text (auto-embeds the query)
|
|
361
|
+
const results = await repo.searchSimilar({ query: 'running shoes', limit: 10 });
|
|
362
|
+
|
|
363
|
+
// Search by vector directly
|
|
364
|
+
const results = await repo.searchSimilar({ query: [0.1, 0.2, ...], limit: 5 });
|
|
365
|
+
|
|
366
|
+
// Embed manually
|
|
367
|
+
const vector = await repo.embed('some text');
|
|
368
|
+
```
|
|
369
|
+
|
|
370
|
+
### Logging
|
|
371
|
+
|
|
372
|
+
```javascript
|
|
373
|
+
import { configureLogger } from '@classytic/mongokit';
|
|
374
|
+
|
|
375
|
+
// Silence all internal warnings
|
|
376
|
+
configureLogger(false);
|
|
377
|
+
|
|
378
|
+
// Custom logger
|
|
379
|
+
configureLogger({
|
|
380
|
+
warn: (msg, ...args) => myLogger.warn(msg, ...args),
|
|
381
|
+
debug: (msg, ...args) => myLogger.debug(msg, ...args),
|
|
382
|
+
});
|
|
383
|
+
```
|
|
384
|
+
|
|
300
385
|
### MongoDB Operations Plugin
|
|
301
386
|
|
|
302
387
|
The `mongoOperationsPlugin` adds MongoDB-specific atomic operations like `increment`, `upsert`, `pushToArray`, etc.
|
|
@@ -764,7 +849,7 @@ Extending Repository works exactly the same with Mongoose 8 and 9. The package:
|
|
|
764
849
|
- Uses its own event system (not Mongoose middleware)
|
|
765
850
|
- Defines its own `FilterQuery` type (unaffected by Mongoose 9 rename)
|
|
766
851
|
- Properly gates update pipelines (safe for Mongoose 9's stricter defaults)
|
|
767
|
-
- All
|
|
852
|
+
- All 547 tests pass on Mongoose 9
|
|
768
853
|
|
|
769
854
|
## License
|
|
770
855
|
|
package/dist/actions/index.d.ts
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
export { b as aggregate, c as create, _ as deleteActions, r as read, u as update } from '../index-
|
|
1
|
+
export { b as aggregate, c as create, _ as deleteActions, r as read, u as update } from '../index-BDn5fSTE.js';
|
|
2
2
|
import 'mongoose';
|
|
3
|
-
import '../types-
|
|
3
|
+
import '../types-Jni1KgkP.js';
|
package/dist/actions/index.js
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
-
export { aggregate_exports as aggregate, delete_exports as deleteActions, read_exports as read, update_exports as update } from '../chunks/chunk-
|
|
2
|
-
export { create_exports as create } from '../chunks/chunk-
|
|
3
|
-
import '../chunks/chunk-
|
|
1
|
+
export { aggregate_exports as aggregate, delete_exports as deleteActions, read_exports as read, update_exports as update } from '../chunks/chunk-VWKIKZYF.js';
|
|
2
|
+
export { create_exports as create } from '../chunks/chunk-I7CWNAJB.js';
|
|
3
|
+
import '../chunks/chunk-URLJFIR7.js';
|
|
4
|
+
import '../chunks/chunk-JWUAVZ3L.js';
|
|
5
|
+
import '../chunks/chunk-WSFCRVEQ.js';
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
import { ClientSession, PipelineStage } from 'mongoose';
|
|
2
|
+
import { g as Plugin } from '../types-Jni1KgkP.js';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* AI/Vector Search Type Definitions
|
|
6
|
+
*
|
|
7
|
+
* Types for vector embedding storage, search, and similarity operations.
|
|
8
|
+
* Requires MongoDB Atlas for `$vectorSearch` aggregation.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
/** Supported similarity metrics for vector search */
|
|
12
|
+
type SimilarityMetric = 'cosine' | 'euclidean' | 'dotProduct';
|
|
13
|
+
/** A single piece of content to embed — text, image, or any media */
|
|
14
|
+
interface EmbeddingInput {
|
|
15
|
+
/** Text content to embed */
|
|
16
|
+
text?: string;
|
|
17
|
+
/** Image URL or base64 data (for multimodal models like CLIP, Jina v3) */
|
|
18
|
+
image?: string;
|
|
19
|
+
/** Audio URL or base64 data */
|
|
20
|
+
audio?: string;
|
|
21
|
+
/** Arbitrary media — for custom model inputs (video frames, PDFs, etc.) */
|
|
22
|
+
media?: Record<string, unknown>;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Unified embedding function — receives structured input, returns vector.
|
|
26
|
+
* Works for text-only, multimodal, or any custom model.
|
|
27
|
+
*
|
|
28
|
+
* @example
|
|
29
|
+
* ```typescript
|
|
30
|
+
* // Text-only (OpenAI)
|
|
31
|
+
* const embed: EmbedFn = async ({ text }) =>
|
|
32
|
+
* openai.embeddings.create({ input: text!, model: 'text-embedding-3-small' })
|
|
33
|
+
* .then(r => r.data[0].embedding);
|
|
34
|
+
*
|
|
35
|
+
* // Multimodal (Jina CLIP v3)
|
|
36
|
+
* const embed: EmbedFn = async ({ text, image }) =>
|
|
37
|
+
* jina.embed({ input: [{ text, image }] }).then(r => r.data[0].embedding);
|
|
38
|
+
*
|
|
39
|
+
* // Local model
|
|
40
|
+
* const embed: EmbedFn = async ({ text }) =>
|
|
41
|
+
* fetch('http://localhost:11434/api/embeddings', {
|
|
42
|
+
* method: 'POST', body: JSON.stringify({ model: 'nomic-embed-text', prompt: text })
|
|
43
|
+
* }).then(r => r.json()).then(j => j.embedding);
|
|
44
|
+
* ```
|
|
45
|
+
*/
|
|
46
|
+
type EmbedFn = (input: EmbeddingInput) => Promise<number[]>;
|
|
47
|
+
/**
|
|
48
|
+
* Batch embedding function — same contract, multiple inputs at once.
|
|
49
|
+
* Falls back to sequential EmbedFn calls if not provided.
|
|
50
|
+
*/
|
|
51
|
+
type BatchEmbedFn = (inputs: EmbeddingInput[]) => Promise<number[][]>;
|
|
52
|
+
/** Vector field configuration for a model */
|
|
53
|
+
interface VectorFieldConfig {
|
|
54
|
+
/** Field path where the vector is stored (e.g., 'embedding') */
|
|
55
|
+
path: string;
|
|
56
|
+
/** Atlas Search index name for this field */
|
|
57
|
+
index: string;
|
|
58
|
+
/** Number of dimensions in the embedding */
|
|
59
|
+
dimensions: number;
|
|
60
|
+
/** Similarity metric used by the index (informational — the index defines this) */
|
|
61
|
+
similarity?: SimilarityMetric;
|
|
62
|
+
/** Text source fields to embed from (e.g., ['title', 'description']) */
|
|
63
|
+
sourceFields?: string[];
|
|
64
|
+
/** Image/media source fields (e.g., ['imageUrl', 'thumbnailUrl']) */
|
|
65
|
+
mediaFields?: string[];
|
|
66
|
+
}
|
|
67
|
+
/** Options for vector search operations */
|
|
68
|
+
interface VectorSearchParams {
|
|
69
|
+
/** Query — vector, text string, or structured multimodal input */
|
|
70
|
+
query: number[] | string | EmbeddingInput;
|
|
71
|
+
/** Maximum number of results */
|
|
72
|
+
limit?: number;
|
|
73
|
+
/** Candidates to consider (higher = more accurate, slower). Default: limit * 10 */
|
|
74
|
+
numCandidates?: number;
|
|
75
|
+
/** Pre-filter documents before vector search */
|
|
76
|
+
filter?: Record<string, unknown>;
|
|
77
|
+
/** Use exact KNN instead of approximate (slower but precise) */
|
|
78
|
+
exact?: boolean;
|
|
79
|
+
/** Which vector field config to use (default: first configured) */
|
|
80
|
+
field?: string;
|
|
81
|
+
/** MongoDB session for transactions */
|
|
82
|
+
session?: ClientSession;
|
|
83
|
+
/** Fields to include/exclude in results */
|
|
84
|
+
project?: Record<string, 0 | 1>;
|
|
85
|
+
/** Include similarity score in results */
|
|
86
|
+
includeScore?: boolean;
|
|
87
|
+
/** Minimum score threshold (0-1 for cosine) */
|
|
88
|
+
minScore?: number;
|
|
89
|
+
/** Additional pipeline stages to append after search */
|
|
90
|
+
postPipeline?: PipelineStage[];
|
|
91
|
+
}
|
|
92
|
+
/** Vector search result with score */
|
|
93
|
+
interface ScoredResult<T = Record<string, unknown>> {
|
|
94
|
+
/** The matched document */
|
|
95
|
+
doc: T;
|
|
96
|
+
/** Similarity score from vector search */
|
|
97
|
+
score: number;
|
|
98
|
+
}
|
|
99
|
+
/** Options for the vector search plugin */
|
|
100
|
+
interface VectorPluginOptions {
|
|
101
|
+
/** Vector field configurations */
|
|
102
|
+
fields: VectorFieldConfig[];
|
|
103
|
+
/** Unified embedding function (text, image, multimodal) */
|
|
104
|
+
embedFn?: EmbedFn;
|
|
105
|
+
/** Batch embedding function for bulk operations */
|
|
106
|
+
batchEmbedFn?: BatchEmbedFn;
|
|
107
|
+
/** Auto-generate embeddings on create/update (requires embedFn) */
|
|
108
|
+
autoEmbed?: boolean;
|
|
109
|
+
/**
|
|
110
|
+
* Called when auto-embed fails (e.g., embedding service down).
|
|
111
|
+
* If provided, the write operation continues without an embedding.
|
|
112
|
+
* If not provided, the error propagates and blocks the write.
|
|
113
|
+
*/
|
|
114
|
+
onEmbedError?: (error: Error, doc: unknown) => void;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Vector Search Plugin
|
|
119
|
+
*
|
|
120
|
+
* Adds semantic similarity search to any repository using MongoDB Atlas Vector Search.
|
|
121
|
+
* Supports auto-embedding on write, text-to-vector search, and scored results.
|
|
122
|
+
*
|
|
123
|
+
* **Requires MongoDB Atlas** — `$vectorSearch` is an Atlas-only aggregation stage.
|
|
124
|
+
* Running on standalone or self-hosted MongoDB will throw an
|
|
125
|
+
* `Unrecognized pipeline stage name: '$vectorSearch'` error.
|
|
126
|
+
*
|
|
127
|
+
* @example
|
|
128
|
+
* ```typescript
|
|
129
|
+
* import { vectorPlugin } from '@classytic/mongokit/ai';
|
|
130
|
+
*
|
|
131
|
+
* // Text-only (OpenAI)
|
|
132
|
+
* const repo = new Repository(Product, [
|
|
133
|
+
* methodRegistryPlugin(),
|
|
134
|
+
* vectorPlugin({
|
|
135
|
+
* fields: [{ path: 'embedding', index: 'vec_idx', dimensions: 1536, similarity: 'cosine', sourceFields: ['title', 'description'] }],
|
|
136
|
+
* embedFn: async ({ text }) => openai.embeddings.create({ input: text!, model: 'text-embedding-3-small' }).then(r => r.data[0].embedding),
|
|
137
|
+
* autoEmbed: true,
|
|
138
|
+
* }),
|
|
139
|
+
* ]);
|
|
140
|
+
*
|
|
141
|
+
* // Multimodal (Jina CLIP v3 — text + images in one call)
|
|
142
|
+
* const repo = new Repository(Product, [
|
|
143
|
+
* methodRegistryPlugin(),
|
|
144
|
+
* vectorPlugin({
|
|
145
|
+
* fields: [{ path: 'embedding', index: 'vec_idx', dimensions: 1024, similarity: 'cosine', sourceFields: ['title'], mediaFields: ['imageUrl'] }],
|
|
146
|
+
* embedFn: async ({ text, image }) => jina.embed({ input: [{ text, image }] }).then(r => r.data[0].embedding),
|
|
147
|
+
* autoEmbed: true,
|
|
148
|
+
* }),
|
|
149
|
+
* ]);
|
|
150
|
+
*
|
|
151
|
+
* // Search by text
|
|
152
|
+
* const results = await repo.searchSimilar({ query: 'running shoes', limit: 10 });
|
|
153
|
+
*
|
|
154
|
+
* // Search by image + text (multimodal)
|
|
155
|
+
* const results = await repo.searchSimilar({ query: { text: 'red sneakers', image: 'https://...' }, limit: 10 });
|
|
156
|
+
*
|
|
157
|
+
* // Search by vector directly
|
|
158
|
+
* const results = await repo.searchSimilar({ query: [0.1, 0.2, ...], limit: 5 });
|
|
159
|
+
* ```
|
|
160
|
+
*/
|
|
161
|
+
|
|
162
|
+
interface VectorMethods {
|
|
163
|
+
searchSimilar<T = Record<string, unknown>>(params: VectorSearchParams): Promise<ScoredResult<T>[]>;
|
|
164
|
+
embed(input: EmbeddingInput | string): Promise<number[]>;
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* Builds the $vectorSearch pipeline stage
|
|
168
|
+
*/
|
|
169
|
+
declare function buildVectorSearchPipeline(field: VectorFieldConfig, queryVector: number[], params: VectorSearchParams): PipelineStage[];
|
|
170
|
+
/**
|
|
171
|
+
* Creates the vector search plugin
|
|
172
|
+
*/
|
|
173
|
+
declare function vectorPlugin(options: VectorPluginOptions): Plugin;
|
|
174
|
+
|
|
175
|
+
export { type BatchEmbedFn, type EmbedFn, type EmbeddingInput, type ScoredResult, type SimilarityMetric, type VectorFieldConfig, type VectorMethods, type VectorPluginOptions, type VectorSearchParams, buildVectorSearchPipeline, vectorPlugin };
|
package/dist/ai/index.js
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
import '../chunks/chunk-WSFCRVEQ.js';
|
|
2
|
+
|
|
3
|
+
// src/ai/vector.plugin.ts
|
|
4
|
+
var MAX_NUM_CANDIDATES = 1e4;
|
|
5
|
+
function resolveField(fields, fieldPath) {
|
|
6
|
+
if (fieldPath) {
|
|
7
|
+
const found = fields.find((f) => f.path === fieldPath);
|
|
8
|
+
if (!found) throw new Error(`[mongokit] Vector field '${fieldPath}' not configured`);
|
|
9
|
+
return found;
|
|
10
|
+
}
|
|
11
|
+
return fields[0];
|
|
12
|
+
}
|
|
13
|
+
function toEmbeddingInput(query) {
|
|
14
|
+
return typeof query === "string" ? { text: query } : query;
|
|
15
|
+
}
|
|
16
|
+
function getNestedValue(obj, path) {
|
|
17
|
+
if (path in obj) return obj[path];
|
|
18
|
+
return path.split(".").reduce((cur, key) => {
|
|
19
|
+
if (cur != null && typeof cur === "object") return cur[key];
|
|
20
|
+
return void 0;
|
|
21
|
+
}, obj);
|
|
22
|
+
}
|
|
23
|
+
function buildInputFromDoc(data, field) {
|
|
24
|
+
const input = {};
|
|
25
|
+
if (field.sourceFields?.length) {
|
|
26
|
+
const text = field.sourceFields.map((f) => getNestedValue(data, f)).filter(Boolean).join(" ");
|
|
27
|
+
if (text.trim()) input.text = text;
|
|
28
|
+
}
|
|
29
|
+
if (field.mediaFields?.length) {
|
|
30
|
+
const firstImageField = field.mediaFields[0];
|
|
31
|
+
const imageValue = getNestedValue(data, firstImageField);
|
|
32
|
+
if (typeof imageValue === "string") input.image = imageValue;
|
|
33
|
+
if (field.mediaFields.length > 1) {
|
|
34
|
+
input.media = {};
|
|
35
|
+
for (const mf of field.mediaFields) {
|
|
36
|
+
const val = getNestedValue(data, mf);
|
|
37
|
+
if (val != null) input.media[mf] = val;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return input;
|
|
42
|
+
}
|
|
43
|
+
function hasContent(input) {
|
|
44
|
+
return !!(input.text?.trim() || input.image || input.audio || input.media && Object.keys(input.media).length);
|
|
45
|
+
}
|
|
46
|
+
function buildVectorSearchPipeline(field, queryVector, params) {
|
|
47
|
+
const limit = params.limit ?? 10;
|
|
48
|
+
const stages = [];
|
|
49
|
+
const rawCandidates = params.numCandidates ?? Math.max(limit * 10, 100);
|
|
50
|
+
const numCandidates = Math.min(Math.max(rawCandidates, limit), MAX_NUM_CANDIDATES);
|
|
51
|
+
stages.push({
|
|
52
|
+
$vectorSearch: {
|
|
53
|
+
index: field.index,
|
|
54
|
+
path: field.path,
|
|
55
|
+
queryVector,
|
|
56
|
+
numCandidates,
|
|
57
|
+
limit,
|
|
58
|
+
...params.filter && { filter: params.filter },
|
|
59
|
+
...params.exact && { exact: true }
|
|
60
|
+
}
|
|
61
|
+
});
|
|
62
|
+
const needsScore = params.includeScore !== false || params.minScore != null;
|
|
63
|
+
if (needsScore) {
|
|
64
|
+
stages.push({ $addFields: { _score: { $meta: "vectorSearchScore" } } });
|
|
65
|
+
}
|
|
66
|
+
if (params.minScore != null) {
|
|
67
|
+
stages.push({ $match: { _score: { $gte: params.minScore } } });
|
|
68
|
+
}
|
|
69
|
+
if (params.project) {
|
|
70
|
+
stages.push({ $project: { ...params.project, _score: 1 } });
|
|
71
|
+
}
|
|
72
|
+
if (params.postPipeline?.length) {
|
|
73
|
+
stages.push(...params.postPipeline);
|
|
74
|
+
}
|
|
75
|
+
return stages;
|
|
76
|
+
}
|
|
77
|
+
function vectorPlugin(options) {
|
|
78
|
+
const { fields, autoEmbed = false } = options;
|
|
79
|
+
if (!fields?.length) {
|
|
80
|
+
throw new Error("[mongokit] vectorPlugin requires at least one field config");
|
|
81
|
+
}
|
|
82
|
+
const { embedFn, batchEmbedFn } = options;
|
|
83
|
+
return {
|
|
84
|
+
name: "vector",
|
|
85
|
+
apply(repo) {
|
|
86
|
+
if (!repo.registerMethod) {
|
|
87
|
+
throw new Error("[mongokit] vectorPlugin requires methodRegistryPlugin");
|
|
88
|
+
}
|
|
89
|
+
repo.registerMethod("searchSimilar", async function searchSimilar(params) {
|
|
90
|
+
const field = resolveField(fields, params.field);
|
|
91
|
+
let queryVector;
|
|
92
|
+
if (Array.isArray(params.query)) {
|
|
93
|
+
queryVector = params.query;
|
|
94
|
+
} else {
|
|
95
|
+
if (!embedFn) {
|
|
96
|
+
throw new Error("[mongokit] Non-vector queries require embedFn in vectorPlugin options");
|
|
97
|
+
}
|
|
98
|
+
const input = toEmbeddingInput(params.query);
|
|
99
|
+
queryVector = await embedFn(input);
|
|
100
|
+
}
|
|
101
|
+
if (queryVector.length !== field.dimensions) {
|
|
102
|
+
throw new Error(
|
|
103
|
+
`[mongokit] Query vector has ${queryVector.length} dimensions, expected ${field.dimensions}`
|
|
104
|
+
);
|
|
105
|
+
}
|
|
106
|
+
const pipeline = buildVectorSearchPipeline(field, queryVector, params);
|
|
107
|
+
const agg = repo.Model.aggregate(pipeline);
|
|
108
|
+
if (params.session) agg.session(params.session);
|
|
109
|
+
const results = await agg.exec();
|
|
110
|
+
return results.map((doc) => {
|
|
111
|
+
const score = doc._score ?? 0;
|
|
112
|
+
const { _score, ...rest } = doc;
|
|
113
|
+
return { doc: rest, score };
|
|
114
|
+
});
|
|
115
|
+
});
|
|
116
|
+
repo.registerMethod("embed", async function embed(input) {
|
|
117
|
+
if (!embedFn) {
|
|
118
|
+
throw new Error("[mongokit] embed requires embedFn in vectorPlugin options");
|
|
119
|
+
}
|
|
120
|
+
return embedFn(typeof input === "string" ? { text: input } : input);
|
|
121
|
+
});
|
|
122
|
+
if (autoEmbed && embedFn) {
|
|
123
|
+
const { onEmbedError } = options;
|
|
124
|
+
const safeEmbed = async (input, doc) => {
|
|
125
|
+
try {
|
|
126
|
+
return await embedFn(input);
|
|
127
|
+
} catch (err) {
|
|
128
|
+
if (onEmbedError) {
|
|
129
|
+
onEmbedError(err, doc);
|
|
130
|
+
return null;
|
|
131
|
+
}
|
|
132
|
+
throw err;
|
|
133
|
+
}
|
|
134
|
+
};
|
|
135
|
+
const embedFromSource = async (data, field) => {
|
|
136
|
+
if (data[field.path] && Array.isArray(data[field.path])) return;
|
|
137
|
+
const input = buildInputFromDoc(data, field);
|
|
138
|
+
if (!hasContent(input)) return;
|
|
139
|
+
const vector = await safeEmbed(input, data);
|
|
140
|
+
if (vector) data[field.path] = vector;
|
|
141
|
+
};
|
|
142
|
+
const embedBatchFromSource = async (dataArray, field) => {
|
|
143
|
+
const toEmbed = [];
|
|
144
|
+
for (let i = 0; i < dataArray.length; i++) {
|
|
145
|
+
const data = dataArray[i];
|
|
146
|
+
if (data[field.path] && Array.isArray(data[field.path])) continue;
|
|
147
|
+
const input = buildInputFromDoc(data, field);
|
|
148
|
+
if (hasContent(input)) toEmbed.push({ idx: i, input });
|
|
149
|
+
}
|
|
150
|
+
if (!toEmbed.length) return;
|
|
151
|
+
if (batchEmbedFn) {
|
|
152
|
+
try {
|
|
153
|
+
const vectors = await batchEmbedFn(toEmbed.map((e) => e.input));
|
|
154
|
+
for (let i = 0; i < toEmbed.length; i++) {
|
|
155
|
+
dataArray[toEmbed[i].idx][field.path] = vectors[i];
|
|
156
|
+
}
|
|
157
|
+
} catch (err) {
|
|
158
|
+
if (onEmbedError) {
|
|
159
|
+
onEmbedError(err, dataArray);
|
|
160
|
+
return;
|
|
161
|
+
}
|
|
162
|
+
throw err;
|
|
163
|
+
}
|
|
164
|
+
} else {
|
|
165
|
+
for (const entry of toEmbed) {
|
|
166
|
+
const vector = await safeEmbed(entry.input, dataArray[entry.idx]);
|
|
167
|
+
if (vector) dataArray[entry.idx][field.path] = vector;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
};
|
|
171
|
+
repo.on("before:create", async (context) => {
|
|
172
|
+
if (!context.data) return;
|
|
173
|
+
for (const field of fields) {
|
|
174
|
+
await embedFromSource(context.data, field);
|
|
175
|
+
}
|
|
176
|
+
});
|
|
177
|
+
repo.on("before:createMany", async (context) => {
|
|
178
|
+
if (!context.dataArray?.length) return;
|
|
179
|
+
for (const field of fields) {
|
|
180
|
+
await embedBatchFromSource(context.dataArray, field);
|
|
181
|
+
}
|
|
182
|
+
});
|
|
183
|
+
repo.on("before:update", async (context) => {
|
|
184
|
+
if (!context.data) return;
|
|
185
|
+
const fieldsToEmbed = fields.filter((field) => {
|
|
186
|
+
const allFields = [...field.sourceFields ?? [], ...field.mediaFields ?? []];
|
|
187
|
+
return allFields.length > 0 && allFields.some((f) => f in context.data);
|
|
188
|
+
});
|
|
189
|
+
if (!fieldsToEmbed.length) return;
|
|
190
|
+
const existing = await repo.Model.findById(context.id).lean().session(context.session ?? null);
|
|
191
|
+
if (!existing) return;
|
|
192
|
+
for (const field of fieldsToEmbed) {
|
|
193
|
+
const merged = { ...existing, ...context.data };
|
|
194
|
+
delete merged[field.path];
|
|
195
|
+
const input = buildInputFromDoc(merged, field);
|
|
196
|
+
if (!hasContent(input)) continue;
|
|
197
|
+
const vector = await safeEmbed(input, merged);
|
|
198
|
+
if (vector) context.data[field.path] = vector;
|
|
199
|
+
}
|
|
200
|
+
});
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
};
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
export { buildVectorSearchPipeline, vectorPlugin };
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { createError } from './chunk-
|
|
1
|
+
import { createError } from './chunk-JWUAVZ3L.js';
|
|
2
2
|
import mongoose from 'mongoose';
|
|
3
3
|
|
|
4
4
|
function encodeCursor(doc, primaryField, sort, version = 1) {
|
|
@@ -18,6 +18,13 @@ function decodeCursor(token) {
|
|
|
18
18
|
try {
|
|
19
19
|
const json = Buffer.from(token, "base64").toString("utf-8");
|
|
20
20
|
const payload = JSON.parse(json);
|
|
21
|
+
if (!payload || typeof payload !== "object" || !("v" in payload) || !("t" in payload) || !("id" in payload) || !("idType" in payload) || !payload.sort || typeof payload.sort !== "object" || typeof payload.ver !== "number") {
|
|
22
|
+
throw new Error("Malformed cursor payload");
|
|
23
|
+
}
|
|
24
|
+
const VALID_TYPES = ["date", "objectid", "boolean", "number", "string", "null", "unknown"];
|
|
25
|
+
if (!VALID_TYPES.includes(payload.t) || !VALID_TYPES.includes(payload.idType)) {
|
|
26
|
+
throw new Error("Invalid cursor value type");
|
|
27
|
+
}
|
|
21
28
|
return {
|
|
22
29
|
value: rehydrateValue(payload.v, payload.t),
|
|
23
30
|
id: rehydrateValue(payload.id, payload.idType),
|
|
@@ -41,11 +48,13 @@ function validateCursorVersion(cursorVersion, expectedVersion) {
|
|
|
41
48
|
}
|
|
42
49
|
}
|
|
43
50
|
function serializeValue(value) {
|
|
51
|
+
if (value === null || value === void 0) return null;
|
|
44
52
|
if (value instanceof Date) return value.toISOString();
|
|
45
53
|
if (value instanceof mongoose.Types.ObjectId) return value.toString();
|
|
46
54
|
return value;
|
|
47
55
|
}
|
|
48
56
|
function getValueType(value) {
|
|
57
|
+
if (value === null || value === void 0) return "null";
|
|
49
58
|
if (value instanceof Date) return "date";
|
|
50
59
|
if (value instanceof mongoose.Types.ObjectId) return "objectid";
|
|
51
60
|
if (typeof value === "boolean") return "boolean";
|
|
@@ -54,6 +63,7 @@ function getValueType(value) {
|
|
|
54
63
|
return "unknown";
|
|
55
64
|
}
|
|
56
65
|
function rehydrateValue(serialized, type) {
|
|
66
|
+
if (type === "null" || serialized === null) return null;
|
|
57
67
|
switch (type) {
|
|
58
68
|
case "date":
|
|
59
69
|
return new Date(serialized);
|
|
@@ -113,6 +123,23 @@ function buildKeysetFilter(baseFilters, sort, cursorValue, cursorId) {
|
|
|
113
123
|
const primaryField = Object.keys(sort).find((k) => k !== "_id") || "_id";
|
|
114
124
|
const direction = sort[primaryField];
|
|
115
125
|
const operator = direction === 1 ? "$gt" : "$lt";
|
|
126
|
+
if (cursorValue === null || cursorValue === void 0) {
|
|
127
|
+
if (direction === 1) {
|
|
128
|
+
return {
|
|
129
|
+
...baseFilters,
|
|
130
|
+
$or: [
|
|
131
|
+
{ [primaryField]: null, _id: { $gt: cursorId } },
|
|
132
|
+
{ [primaryField]: { $ne: null } }
|
|
133
|
+
]
|
|
134
|
+
};
|
|
135
|
+
} else {
|
|
136
|
+
return {
|
|
137
|
+
...baseFilters,
|
|
138
|
+
[primaryField]: null,
|
|
139
|
+
_id: { $lt: cursorId }
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
}
|
|
116
143
|
return {
|
|
117
144
|
...baseFilters,
|
|
118
145
|
$or: [
|