@classytic/mongokit 3.2.0 → 3.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +470 -193
  2. package/dist/actions/index.d.mts +9 -0
  3. package/dist/actions/index.mjs +15 -0
  4. package/dist/aggregate-BAi4Do-X.mjs +767 -0
  5. package/dist/aggregate-CCHI7F51.d.mts +269 -0
  6. package/dist/ai/index.d.mts +125 -0
  7. package/dist/ai/index.mjs +203 -0
  8. package/dist/cache-keys-C8Z9B5sw.mjs +204 -0
  9. package/dist/chunk-DQk6qfdC.mjs +18 -0
  10. package/dist/create-BuO6xt0v.mjs +55 -0
  11. package/dist/custom-id.plugin-B_zIs6gE.mjs +1818 -0
  12. package/dist/custom-id.plugin-BzZI4gnE.d.mts +893 -0
  13. package/dist/index.d.mts +1012 -0
  14. package/dist/index.mjs +1906 -0
  15. package/dist/limits-DsNeCx4D.mjs +299 -0
  16. package/dist/logger-D8ily-PP.mjs +51 -0
  17. package/dist/mongooseToJsonSchema-COdDEkIJ.mjs +317 -0
  18. package/dist/{mongooseToJsonSchema-CaRF_bCN.d.ts → mongooseToJsonSchema-Wbvjfwkn.d.mts} +16 -89
  19. package/dist/pagination/PaginationEngine.d.mts +93 -0
  20. package/dist/pagination/PaginationEngine.mjs +196 -0
  21. package/dist/plugins/index.d.mts +3 -0
  22. package/dist/plugins/index.mjs +3 -0
  23. package/dist/types-D-gploPr.d.mts +1241 -0
  24. package/dist/utils/{index.d.ts → index.d.mts} +14 -21
  25. package/dist/utils/index.mjs +5 -0
  26. package/package.json +21 -21
  27. package/dist/actions/index.d.ts +0 -3
  28. package/dist/actions/index.js +0 -5
  29. package/dist/ai/index.d.ts +0 -175
  30. package/dist/ai/index.js +0 -206
  31. package/dist/chunks/chunk-2ZN65ZOP.js +0 -93
  32. package/dist/chunks/chunk-44KXLGPO.js +0 -388
  33. package/dist/chunks/chunk-DEVXDBRL.js +0 -1226
  34. package/dist/chunks/chunk-I7CWNAJB.js +0 -46
  35. package/dist/chunks/chunk-JWUAVZ3L.js +0 -8
  36. package/dist/chunks/chunk-UE2IEXZJ.js +0 -306
  37. package/dist/chunks/chunk-URLJFIR7.js +0 -22
  38. package/dist/chunks/chunk-VWKIKZYF.js +0 -737
  39. package/dist/chunks/chunk-WSFCRVEQ.js +0 -7
  40. package/dist/index-BDn5fSTE.d.ts +0 -516
  41. package/dist/index.d.ts +0 -1422
  42. package/dist/index.js +0 -1893
  43. package/dist/pagination/PaginationEngine.d.ts +0 -117
  44. package/dist/pagination/PaginationEngine.js +0 -3
  45. package/dist/plugins/index.d.ts +0 -922
  46. package/dist/plugins/index.js +0 -6
  47. package/dist/types-Jni1KgkP.d.ts +0 -780
  48. package/dist/utils/index.js +0 -5
@@ -0,0 +1,269 @@
1
+ import { C as GroupResult, F as ObjectId, G as PopulateSpec, K as ReadPreferenceType, N as MinMaxResult, R as OperationOptions, St as LookupOptions, _ as DeleteResult, at as SortSpec, ct as UpdateManyResult, et as SelectSpec, i as AnyDocument, lt as UpdateOptions, p as CreateOptions, ut as UpdateWithValidationResult } from "./types-D-gploPr.mjs";
2
+ import { ClientSession, Model, PipelineStage } from "mongoose";
3
+
4
+ //#region src/actions/create.d.ts
5
+ declare namespace create_d_exports {
6
+ export { create, createDefault, createMany, upsert };
7
+ }
8
+ /**
9
+ * Create single document
10
+ */
11
+ declare function create<TDoc = AnyDocument>(Model: Model<TDoc>, data: Record<string, unknown>, options?: CreateOptions): Promise<TDoc>;
12
+ /**
13
+ * Create multiple documents
14
+ */
15
+ declare function createMany<TDoc = AnyDocument>(Model: Model<TDoc>, dataArray: Record<string, unknown>[], options?: CreateOptions): Promise<TDoc[]>;
16
+ /**
17
+ * Create with defaults (useful for initialization)
18
+ */
19
+ declare function createDefault<TDoc = AnyDocument>(Model: Model<TDoc>, overrides?: Record<string, unknown>, options?: CreateOptions): Promise<TDoc>;
20
+ /**
21
+ * Upsert (create or update)
22
+ */
23
+ declare function upsert<TDoc = AnyDocument>(Model: Model<TDoc>, query: Record<string, unknown>, data: Record<string, unknown>, options?: {
24
+ session?: ClientSession;
25
+ updatePipeline?: boolean;
26
+ }): Promise<TDoc | null>;
27
+ declare namespace read_d_exports {
28
+ export { count, exists, getAll, getById, getByQuery, getOrCreate, tryGetByQuery };
29
+ }
30
+ /**
31
+ * Get document by ID
32
+ *
33
+ * @param Model - Mongoose model
34
+ * @param id - Document ID
35
+ * @param options - Query options
36
+ * @returns Document or null
37
+ * @throws Error if document not found and throwOnNotFound is true
38
+ */
39
+ declare function getById<TDoc = AnyDocument>(Model: Model<TDoc>, id: string | ObjectId, options?: OperationOptions): Promise<TDoc | null>;
40
+ /**
41
+ * Get document by query
42
+ *
43
+ * @param Model - Mongoose model
44
+ * @param query - MongoDB query
45
+ * @param options - Query options
46
+ * @returns Document or null
47
+ * @throws Error if document not found and throwOnNotFound is true
48
+ */
49
+ declare function getByQuery<TDoc = AnyDocument>(Model: Model<TDoc>, query: Record<string, unknown>, options?: OperationOptions): Promise<TDoc | null>;
50
+ /**
51
+ * Get document by query without throwing (returns null if not found)
52
+ */
53
+ declare function tryGetByQuery<TDoc = AnyDocument>(Model: Model<TDoc>, query: Record<string, unknown>, options?: Omit<OperationOptions, "throwOnNotFound">): Promise<TDoc | null>;
54
+ /**
55
+ * Get all documents (basic query without pagination)
56
+ * For pagination, use Repository.paginate() or Repository.stream()
57
+ */
58
+ declare function getAll<TDoc = AnyDocument>(Model: Model<TDoc>, query?: Record<string, unknown>, options?: {
59
+ select?: SelectSpec;
60
+ populate?: PopulateSpec;
61
+ sort?: SortSpec;
62
+ limit?: number;
63
+ skip?: number;
64
+ lean?: boolean;
65
+ session?: ClientSession;
66
+ readPreference?: ReadPreferenceType;
67
+ }): Promise<TDoc[]>;
68
+ /**
69
+ * Get or create document (upsert)
70
+ */
71
+ declare function getOrCreate<TDoc = AnyDocument>(Model: Model<TDoc>, query: Record<string, unknown>, createData: Record<string, unknown>, options?: {
72
+ session?: ClientSession;
73
+ updatePipeline?: boolean;
74
+ }): Promise<TDoc | null>;
75
+ /**
76
+ * Count documents matching query
77
+ */
78
+ declare function count(Model: Model<any>, query?: Record<string, unknown>, options?: {
79
+ session?: ClientSession;
80
+ readPreference?: ReadPreferenceType;
81
+ }): Promise<number>;
82
+ /**
83
+ * Check if document exists
84
+ */
85
+ declare function exists(Model: Model<any>, query: Record<string, unknown>, options?: {
86
+ session?: ClientSession;
87
+ readPreference?: ReadPreferenceType;
88
+ }): Promise<{
89
+ _id: unknown;
90
+ } | null>;
91
+ declare namespace update_d_exports {
92
+ export { increment, pullFromArray, pushToArray, update, updateByQuery, updateMany, updateWithConstraints, updateWithValidation };
93
+ }
94
+ /**
95
+ * Update by ID
96
+ */
97
+ declare function update<TDoc = AnyDocument>(Model: Model<TDoc>, id: string | ObjectId, data: Record<string, unknown>, options?: UpdateOptions): Promise<TDoc>;
98
+ /**
99
+ * Update with query constraints (optimized)
100
+ * Returns null if constraints not met (not an error)
101
+ */
102
+ declare function updateWithConstraints<TDoc = AnyDocument>(Model: Model<TDoc>, id: string | ObjectId, data: Record<string, unknown>, constraints?: Record<string, unknown>, options?: UpdateOptions): Promise<TDoc | null>;
103
+ /**
104
+ * Validation options for smart update
105
+ */
106
+ interface ValidationOptions {
107
+ buildConstraints?: (data: Record<string, unknown>) => Record<string, unknown>;
108
+ validateUpdate?: (existing: Record<string, unknown>, data: Record<string, unknown>) => {
109
+ valid: boolean;
110
+ message?: string;
111
+ violations?: Array<{
112
+ field: string;
113
+ reason: string;
114
+ }>;
115
+ };
116
+ }
117
+ /**
118
+ * Update with validation (smart optimization)
119
+ * 1-query on success, 2-queries for detailed errors
120
+ */
121
+ declare function updateWithValidation<TDoc = AnyDocument>(Model: Model<TDoc>, id: string | ObjectId, data: Record<string, unknown>, validationOptions?: ValidationOptions, options?: UpdateOptions): Promise<UpdateWithValidationResult<TDoc>>;
122
+ /**
123
+ * Update many documents
124
+ */
125
+ declare function updateMany(Model: Model<unknown>, query: Record<string, unknown>, data: Record<string, unknown>, options?: {
126
+ session?: ClientSession;
127
+ updatePipeline?: boolean;
128
+ }): Promise<UpdateManyResult>;
129
+ /**
130
+ * Update by query
131
+ */
132
+ declare function updateByQuery<TDoc = AnyDocument>(Model: Model<TDoc>, query: Record<string, unknown>, data: Record<string, unknown>, options?: UpdateOptions): Promise<TDoc | null>;
133
+ /**
134
+ * Increment field
135
+ */
136
+ declare function increment<TDoc = AnyDocument>(Model: Model<TDoc>, id: string | ObjectId, field: string, value?: number, options?: UpdateOptions): Promise<TDoc>;
137
+ /**
138
+ * Push to array
139
+ */
140
+ declare function pushToArray<TDoc = AnyDocument>(Model: Model<TDoc>, id: string | ObjectId, field: string, value: unknown, options?: UpdateOptions): Promise<TDoc>;
141
+ /**
142
+ * Pull from array
143
+ */
144
+ declare function pullFromArray<TDoc = AnyDocument>(Model: Model<TDoc>, id: string | ObjectId, field: string, value: unknown, options?: UpdateOptions): Promise<TDoc>;
145
+ declare namespace delete_d_exports {
146
+ export { deleteById, deleteByQuery, deleteMany, restore, softDelete };
147
+ }
148
+ /**
149
+ * Delete by ID
150
+ */
151
+ declare function deleteById(Model: Model<any>, id: string | ObjectId, options?: {
152
+ session?: ClientSession;
153
+ query?: Record<string, unknown>;
154
+ }): Promise<DeleteResult>;
155
+ /**
156
+ * Delete many documents
157
+ */
158
+ declare function deleteMany(Model: Model<any>, query: Record<string, unknown>, options?: {
159
+ session?: ClientSession;
160
+ }): Promise<DeleteResult>;
161
+ /**
162
+ * Delete by query
163
+ */
164
+ declare function deleteByQuery(Model: Model<any>, query: Record<string, unknown>, options?: {
165
+ session?: ClientSession;
166
+ throwOnNotFound?: boolean;
167
+ }): Promise<DeleteResult>;
168
+ /**
169
+ * Soft delete (set deleted flag)
170
+ */
171
+ declare function softDelete<TDoc = AnyDocument>(Model: Model<TDoc>, id: string | ObjectId, options?: {
172
+ session?: ClientSession;
173
+ userId?: string;
174
+ }): Promise<DeleteResult>;
175
+ /**
176
+ * Restore soft deleted document
177
+ */
178
+ declare function restore<TDoc = AnyDocument>(Model: Model<TDoc>, id: string | ObjectId, options?: {
179
+ session?: ClientSession;
180
+ }): Promise<DeleteResult>;
181
+ declare namespace aggregate_d_exports {
182
+ export { aggregate, aggregatePaginate, average, countBy, distinct, facet, groupBy, lookup, minMax, sum, unwind };
183
+ }
184
+ /**
185
+ * Execute aggregation pipeline
186
+ */
187
+ declare function aggregate<TResult = unknown>(Model: Model<any>, pipeline: PipelineStage[], options?: {
188
+ session?: ClientSession;
189
+ }): Promise<TResult[]>;
190
+ /**
191
+ * Aggregate with pagination using native MongoDB $facet
192
+ * WARNING: $facet results must be <16MB. For larger results (limit >1000),
193
+ * consider using Repository.aggregatePaginate() or splitting into separate queries.
194
+ */
195
+ declare function aggregatePaginate<TDoc = AnyDocument>(Model: Model<TDoc>, pipeline: PipelineStage[], options?: {
196
+ page?: number;
197
+ limit?: number;
198
+ session?: ClientSession;
199
+ }): Promise<{
200
+ docs: TDoc[];
201
+ total: number;
202
+ page: number;
203
+ limit: number;
204
+ pages: number;
205
+ hasNext: boolean;
206
+ hasPrev: boolean;
207
+ }>;
208
+ /**
209
+ * Group documents by field value
210
+ */
211
+ declare function groupBy(Model: Model<any>, field: string, options?: {
212
+ limit?: number;
213
+ session?: ClientSession;
214
+ }): Promise<GroupResult[]>;
215
+ /**
216
+ * Count by field values
217
+ */
218
+ declare function countBy(Model: Model<any>, field: string, query?: Record<string, unknown>, options?: {
219
+ session?: ClientSession;
220
+ }): Promise<GroupResult[]>;
221
+ /**
222
+ * Lookup (join) with another collection
223
+ *
224
+ * MongoDB $lookup has two mutually exclusive forms:
225
+ * 1. Simple form: { from, localField, foreignField, as }
226
+ * 2. Pipeline form: { from, let, pipeline, as }
227
+ *
228
+ * This function automatically selects the appropriate form based on parameters.
229
+ */
230
+ declare function lookup<TDoc = AnyDocument>(Model: Model<TDoc>, lookupOptions: LookupOptions): Promise<TDoc[]>;
231
+ /**
232
+ * Unwind array field
233
+ */
234
+ declare function unwind<TDoc = AnyDocument>(Model: Model<TDoc>, field: string, options?: {
235
+ preserveEmpty?: boolean;
236
+ session?: ClientSession;
237
+ }): Promise<TDoc[]>;
238
+ /**
239
+ * Facet search (multiple aggregations in one query)
240
+ */
241
+ declare function facet<TResult = Record<string, unknown[]>>(Model: Model<any>, facets: Record<string, PipelineStage[]>, options?: {
242
+ session?: ClientSession;
243
+ }): Promise<TResult[]>;
244
+ /**
245
+ * Get distinct values
246
+ */
247
+ declare function distinct<T = unknown>(Model: Model<any>, field: string, query?: Record<string, unknown>, options?: {
248
+ session?: ClientSession;
249
+ }): Promise<T[]>;
250
+ /**
251
+ * Calculate sum
252
+ */
253
+ declare function sum(Model: Model<any>, field: string, query?: Record<string, unknown>, options?: {
254
+ session?: ClientSession;
255
+ }): Promise<number>;
256
+ /**
257
+ * Calculate average
258
+ */
259
+ declare function average(Model: Model<any>, field: string, query?: Record<string, unknown>, options?: {
260
+ session?: ClientSession;
261
+ }): Promise<number>;
262
+ /**
263
+ * Min/Max
264
+ */
265
+ declare function minMax(Model: Model<any>, field: string, query?: Record<string, unknown>, options?: {
266
+ session?: ClientSession;
267
+ }): Promise<MinMaxResult>;
268
+ //#endregion
269
+ export { create_d_exports as a, read_d_exports as i, delete_d_exports as n, update_d_exports as r, aggregate_d_exports as t };
@@ -0,0 +1,125 @@
1
+ import { H as Plugin } from "../types-D-gploPr.mjs";
2
+ import { ClientSession, PipelineStage } from "mongoose";
3
+
4
+ //#region src/ai/types.d.ts
5
+ /** Supported similarity metrics for vector search */
6
+ type SimilarityMetric = 'cosine' | 'euclidean' | 'dotProduct';
7
+ /** A single piece of content to embed — text, image, or any media */
8
+ interface EmbeddingInput {
9
+ /** Text content to embed */
10
+ text?: string;
11
+ /** Image URL or base64 data (for multimodal models like CLIP, Jina v3) */
12
+ image?: string;
13
+ /** Audio URL or base64 data */
14
+ audio?: string;
15
+ /** Arbitrary media — for custom model inputs (video frames, PDFs, etc.) */
16
+ media?: Record<string, unknown>;
17
+ }
18
+ /**
19
+ * Unified embedding function — receives structured input, returns vector.
20
+ * Works for text-only, multimodal, or any custom model.
21
+ *
22
+ * @example
23
+ * ```typescript
24
+ * // Text-only (OpenAI)
25
+ * const embed: EmbedFn = async ({ text }) =>
26
+ * openai.embeddings.create({ input: text!, model: 'text-embedding-3-small' })
27
+ * .then(r => r.data[0].embedding);
28
+ *
29
+ * // Multimodal (Jina CLIP v3)
30
+ * const embed: EmbedFn = async ({ text, image }) =>
31
+ * jina.embed({ input: [{ text, image }] }).then(r => r.data[0].embedding);
32
+ *
33
+ * // Local model
34
+ * const embed: EmbedFn = async ({ text }) =>
35
+ * fetch('http://localhost:11434/api/embeddings', {
36
+ * method: 'POST', body: JSON.stringify({ model: 'nomic-embed-text', prompt: text })
37
+ * }).then(r => r.json()).then(j => j.embedding);
38
+ * ```
39
+ */
40
+ type EmbedFn = (input: EmbeddingInput) => Promise<number[]>;
41
+ /**
42
+ * Batch embedding function — same contract, multiple inputs at once.
43
+ * Falls back to sequential EmbedFn calls if not provided.
44
+ */
45
+ type BatchEmbedFn = (inputs: EmbeddingInput[]) => Promise<number[][]>;
46
+ /** Vector field configuration for a model */
47
+ interface VectorFieldConfig {
48
+ /** Field path where the vector is stored (e.g., 'embedding') */
49
+ path: string;
50
+ /** Atlas Search index name for this field */
51
+ index: string;
52
+ /** Number of dimensions in the embedding */
53
+ dimensions: number;
54
+ /** Similarity metric used by the index (informational — the index defines this) */
55
+ similarity?: SimilarityMetric;
56
+ /** Text source fields to embed from (e.g., ['title', 'description']) */
57
+ sourceFields?: string[];
58
+ /** Image/media source fields (e.g., ['imageUrl', 'thumbnailUrl']) */
59
+ mediaFields?: string[];
60
+ }
61
+ /** Options for vector search operations */
62
+ interface VectorSearchParams {
63
+ /** Query — vector, text string, or structured multimodal input */
64
+ query: number[] | string | EmbeddingInput;
65
+ /** Maximum number of results */
66
+ limit?: number;
67
+ /** Candidates to consider (higher = more accurate, slower). Default: limit * 10 */
68
+ numCandidates?: number;
69
+ /** Pre-filter documents before vector search */
70
+ filter?: Record<string, unknown>;
71
+ /** Use exact KNN instead of approximate (slower but precise) */
72
+ exact?: boolean;
73
+ /** Which vector field config to use (default: first configured) */
74
+ field?: string;
75
+ /** MongoDB session for transactions */
76
+ session?: ClientSession;
77
+ /** Fields to include/exclude in results */
78
+ project?: Record<string, 0 | 1>;
79
+ /** Include similarity score in results */
80
+ includeScore?: boolean;
81
+ /** Minimum score threshold (0-1 for cosine) */
82
+ minScore?: number;
83
+ /** Additional pipeline stages to append after search */
84
+ postPipeline?: PipelineStage[];
85
+ }
86
+ /** Vector search result with score */
87
+ interface ScoredResult<T = Record<string, unknown>> {
88
+ /** The matched document */
89
+ doc: T;
90
+ /** Similarity score from vector search */
91
+ score: number;
92
+ }
93
+ /** Options for the vector search plugin */
94
+ interface VectorPluginOptions {
95
+ /** Vector field configurations */
96
+ fields: VectorFieldConfig[];
97
+ /** Unified embedding function (text, image, multimodal) */
98
+ embedFn?: EmbedFn;
99
+ /** Batch embedding function for bulk operations */
100
+ batchEmbedFn?: BatchEmbedFn;
101
+ /** Auto-generate embeddings on create/update (requires embedFn) */
102
+ autoEmbed?: boolean;
103
+ /**
104
+ * Called when auto-embed fails (e.g., embedding service down).
105
+ * If provided, the write operation continues without an embedding.
106
+ * If not provided, the error propagates and blocks the write.
107
+ */
108
+ onEmbedError?: (error: Error, doc: unknown) => void;
109
+ }
110
+ //#endregion
111
+ //#region src/ai/vector.plugin.d.ts
112
+ interface VectorMethods {
113
+ searchSimilar<T = Record<string, unknown>>(params: VectorSearchParams): Promise<ScoredResult<T>[]>;
114
+ embed(input: EmbeddingInput | string): Promise<number[]>;
115
+ }
116
+ /**
117
+ * Builds the $vectorSearch pipeline stage
118
+ */
119
+ declare function buildVectorSearchPipeline(field: VectorFieldConfig, queryVector: number[], params: VectorSearchParams): PipelineStage[];
120
+ /**
121
+ * Creates the vector search plugin
122
+ */
123
+ declare function vectorPlugin(options: VectorPluginOptions): Plugin;
124
+ //#endregion
125
+ export { type BatchEmbedFn, type EmbedFn, type EmbeddingInput, type ScoredResult, type SimilarityMetric, type VectorFieldConfig, type VectorMethods, type VectorPluginOptions, type VectorSearchParams, buildVectorSearchPipeline, vectorPlugin };
@@ -0,0 +1,203 @@
1
+ //#region src/ai/vector.plugin.ts
2
+ /** Maximum numCandidates allowed by Atlas Vector Search */
3
+ const MAX_NUM_CANDIDATES = 1e4;
4
+ /**
5
+ * Resolves which vector field config to use
6
+ */
7
+ function resolveField(fields, fieldPath) {
8
+ if (fieldPath) {
9
+ const found = fields.find((f) => f.path === fieldPath);
10
+ if (!found) throw new Error(`[mongokit] Vector field '${fieldPath}' not configured`);
11
+ return found;
12
+ }
13
+ return fields[0];
14
+ }
15
+ /**
16
+ * Normalizes query input to EmbeddingInput
17
+ */
18
+ function toEmbeddingInput(query) {
19
+ return typeof query === "string" ? { text: query } : query;
20
+ }
21
+ /**
22
+ * Resolves a potentially dot-notated path from an object (e.g. 'metadata.title')
23
+ */
24
+ function getNestedValue(obj, path) {
25
+ if (path in obj) return obj[path];
26
+ return path.split(".").reduce((cur, key) => {
27
+ if (cur != null && typeof cur === "object") return cur[key];
28
+ }, obj);
29
+ }
30
+ /**
31
+ * Builds EmbeddingInput from document source fields
32
+ */
33
+ function buildInputFromDoc(data, field) {
34
+ const input = {};
35
+ if (field.sourceFields?.length) {
36
+ const text = field.sourceFields.map((f) => getNestedValue(data, f)).filter(Boolean).join(" ");
37
+ if (text.trim()) input.text = text;
38
+ }
39
+ if (field.mediaFields?.length) {
40
+ const firstImageField = field.mediaFields[0];
41
+ const imageValue = getNestedValue(data, firstImageField);
42
+ if (typeof imageValue === "string") input.image = imageValue;
43
+ if (field.mediaFields.length > 1) {
44
+ input.media = {};
45
+ for (const mf of field.mediaFields) {
46
+ const val = getNestedValue(data, mf);
47
+ if (val != null) input.media[mf] = val;
48
+ }
49
+ }
50
+ }
51
+ return input;
52
+ }
53
+ /**
54
+ * Checks if an EmbeddingInput has any content worth embedding
55
+ */
56
+ function hasContent(input) {
57
+ return !!(input.text?.trim() || input.image || input.audio || input.media && Object.keys(input.media).length);
58
+ }
59
+ /**
60
+ * Builds the $vectorSearch pipeline stage
61
+ */
62
+ function buildVectorSearchPipeline(field, queryVector, params) {
63
+ const limit = params.limit ?? 10;
64
+ const stages = [];
65
+ const rawCandidates = params.numCandidates ?? Math.max(limit * 10, 100);
66
+ const numCandidates = Math.min(Math.max(rawCandidates, limit), MAX_NUM_CANDIDATES);
67
+ stages.push({ $vectorSearch: {
68
+ index: field.index,
69
+ path: field.path,
70
+ queryVector,
71
+ numCandidates,
72
+ limit,
73
+ ...params.filter && { filter: params.filter },
74
+ ...params.exact && { exact: true }
75
+ } });
76
+ if (params.includeScore !== false || params.minScore != null) stages.push({ $addFields: { _score: { $meta: "vectorSearchScore" } } });
77
+ if (params.minScore != null) stages.push({ $match: { _score: { $gte: params.minScore } } });
78
+ if (params.project) stages.push({ $project: {
79
+ ...params.project,
80
+ _score: 1
81
+ } });
82
+ if (params.postPipeline?.length) stages.push(...params.postPipeline);
83
+ return stages;
84
+ }
85
+ /**
86
+ * Creates the vector search plugin
87
+ */
88
+ function vectorPlugin(options) {
89
+ const { fields, autoEmbed = false } = options;
90
+ if (!fields?.length) throw new Error("[mongokit] vectorPlugin requires at least one field config");
91
+ const { embedFn, batchEmbedFn } = options;
92
+ return {
93
+ name: "vector",
94
+ apply(repo) {
95
+ if (!repo.registerMethod) throw new Error("[mongokit] vectorPlugin requires methodRegistryPlugin");
96
+ repo.registerMethod("searchSimilar", async function searchSimilar(params) {
97
+ const field = resolveField(fields, params.field);
98
+ let queryVector;
99
+ if (Array.isArray(params.query)) queryVector = params.query;
100
+ else {
101
+ if (!embedFn) throw new Error("[mongokit] Non-vector queries require embedFn in vectorPlugin options");
102
+ queryVector = await embedFn(toEmbeddingInput(params.query));
103
+ }
104
+ if (queryVector.length !== field.dimensions) throw new Error(`[mongokit] Query vector has ${queryVector.length} dimensions, expected ${field.dimensions}`);
105
+ const pipeline = buildVectorSearchPipeline(field, queryVector, params);
106
+ const agg = repo.Model.aggregate(pipeline);
107
+ if (params.session) agg.session(params.session);
108
+ return (await agg.exec()).map((doc) => {
109
+ const score = doc._score ?? 0;
110
+ const { _score, ...rest } = doc;
111
+ return {
112
+ doc: rest,
113
+ score
114
+ };
115
+ });
116
+ });
117
+ repo.registerMethod("embed", async function embed(input) {
118
+ if (!embedFn) throw new Error("[mongokit] embed requires embedFn in vectorPlugin options");
119
+ return embedFn(typeof input === "string" ? { text: input } : input);
120
+ });
121
+ if (autoEmbed && embedFn) {
122
+ const { onEmbedError } = options;
123
+ const safeEmbed = async (input, doc) => {
124
+ try {
125
+ return await embedFn(input);
126
+ } catch (err) {
127
+ if (onEmbedError) {
128
+ onEmbedError(err, doc);
129
+ return null;
130
+ }
131
+ throw err;
132
+ }
133
+ };
134
+ const embedFromSource = async (data, field) => {
135
+ if (data[field.path] && Array.isArray(data[field.path])) return;
136
+ const input = buildInputFromDoc(data, field);
137
+ if (!hasContent(input)) return;
138
+ const vector = await safeEmbed(input, data);
139
+ if (vector) data[field.path] = vector;
140
+ };
141
+ const embedBatchFromSource = async (dataArray, field) => {
142
+ const toEmbed = [];
143
+ for (let i = 0; i < dataArray.length; i++) {
144
+ const data = dataArray[i];
145
+ if (data[field.path] && Array.isArray(data[field.path])) continue;
146
+ const input = buildInputFromDoc(data, field);
147
+ if (hasContent(input)) toEmbed.push({
148
+ idx: i,
149
+ input
150
+ });
151
+ }
152
+ if (!toEmbed.length) return;
153
+ if (batchEmbedFn) try {
154
+ const vectors = await batchEmbedFn(toEmbed.map((e) => e.input));
155
+ for (let i = 0; i < toEmbed.length; i++) dataArray[toEmbed[i].idx][field.path] = vectors[i];
156
+ } catch (err) {
157
+ if (onEmbedError) {
158
+ onEmbedError(err, dataArray);
159
+ return;
160
+ }
161
+ throw err;
162
+ }
163
+ else for (const entry of toEmbed) {
164
+ const vector = await safeEmbed(entry.input, dataArray[entry.idx]);
165
+ if (vector) dataArray[entry.idx][field.path] = vector;
166
+ }
167
+ };
168
+ repo.on("before:create", async (context) => {
169
+ if (!context.data) return;
170
+ for (const field of fields) await embedFromSource(context.data, field);
171
+ });
172
+ repo.on("before:createMany", async (context) => {
173
+ if (!context.dataArray?.length) return;
174
+ for (const field of fields) await embedBatchFromSource(context.dataArray, field);
175
+ });
176
+ repo.on("before:update", async (context) => {
177
+ if (!context.data) return;
178
+ const fieldsToEmbed = fields.filter((field) => {
179
+ const allFields = [...field.sourceFields ?? [], ...field.mediaFields ?? []];
180
+ return allFields.length > 0 && allFields.some((f) => f in context.data);
181
+ });
182
+ if (!fieldsToEmbed.length) return;
183
+ const existing = await repo.Model.findById(context.id).lean().session(context.session ?? null);
184
+ if (!existing) return;
185
+ for (const field of fieldsToEmbed) {
186
+ const merged = {
187
+ ...existing,
188
+ ...context.data
189
+ };
190
+ delete merged[field.path];
191
+ const input = buildInputFromDoc(merged, field);
192
+ if (!hasContent(input)) continue;
193
+ const vector = await safeEmbed(input, merged);
194
+ if (vector) context.data[field.path] = vector;
195
+ }
196
+ });
197
+ }
198
+ }
199
+ };
200
+ }
201
+
202
+ //#endregion
203
+ export { buildVectorSearchPipeline, vectorPlugin };