@mastra/pg 1.0.0-beta.11 → 1.0.0-beta.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,351 @@
1
+ # Rag API Reference
2
+
3
+ > API reference for rag - 1 entries
4
+
5
+
6
+ ---
7
+
8
+ ## Reference: Metadata Filters
9
+
10
+ > Documentation for metadata filtering capabilities in Mastra, which allow for precise querying of vector search results across different vector stores.
11
+
12
+ Mastra provides a unified metadata filtering syntax across all vector stores, based on MongoDB/Sift query syntax. Each vector store translates these filters into their native format.
13
+
14
+ ## Basic Example
15
+
16
+ ```typescript
17
+ import { PgVector } from "@mastra/pg";
18
+
19
+ const store = new PgVector({
20
+ id: 'pg-vector',
21
+ connectionString
22
+ });
23
+
24
+ const results = await store.query({
25
+ indexName: "my_index",
26
+ queryVector: queryVector,
27
+ topK: 10,
28
+ filter: {
29
+ category: "electronics", // Simple equality
30
+ price: { $gt: 100 }, // Numeric comparison
31
+ tags: { $in: ["sale", "new"] }, // Array membership
32
+ },
33
+ });
34
+ ```
35
+
36
+ ## Supported Operators
37
+
38
+ <OperatorsTable
39
+ title="Basic Comparison"
40
+ operators={[
41
+ {
42
+ name: "$eq",
43
+ description: "Matches values equal to specified value",
44
+ example: "{ age: { $eq: 25 } }",
45
+ supportedBy: ["All except Couchbase"],
46
+ },
47
+ {
48
+ name: "$ne",
49
+ description: "Matches values not equal",
50
+ example: "{ status: { $ne: 'inactive' } }",
51
+ supportedBy: ["All except Couchbase"],
52
+ },
53
+ {
54
+ name: "$gt",
55
+ description: "Greater than",
56
+ example: "{ price: { $gt: 100 } }",
57
+ supportedBy: ["All except Couchbase"],
58
+ },
59
+ {
60
+ name: "$gte",
61
+ description: "Greater than or equal",
62
+ example: "{ rating: { $gte: 4.5 } }",
63
+ supportedBy: ["All except Couchbase"],
64
+ },
65
+ {
66
+ name: "$lt",
67
+ description: "Less than",
68
+ example: "{ stock: { $lt: 20 } }",
69
+ supportedBy: ["All except Couchbase"],
70
+ },
71
+ {
72
+ name: "$lte",
73
+ description: "Less than or equal",
74
+ example: "{ priority: { $lte: 3 } }",
75
+ supportedBy: ["All except Couchbase"],
76
+ },
77
+ ]}
78
+ />
79
+
80
+ <OperatorsTable
81
+ title="Array Operators"
82
+ operators={[
83
+ {
84
+ name: "$in",
85
+ description: "Matches any value in array",
86
+ example: '{ category: { $in: ["A", "B"] } }',
87
+ supportedBy: ["All except Couchbase"],
88
+ },
89
+ {
90
+ name: "$nin",
91
+ description: "Matches none of the values",
92
+ example: '{ status: { $nin: ["deleted", "archived"] } }',
93
+ supportedBy: ["All except Couchbase"],
94
+ },
95
+ {
96
+ name: "$all",
97
+ description: "Matches arrays containing all elements",
98
+ example: '{ tags: { $all: ["urgent", "high"] } }',
99
+ supportedBy: ["Astra", "Pinecone", "Upstash", "MongoDB"],
100
+ },
101
+ {
102
+ name: "$elemMatch",
103
+ description: "Matches array elements meeting criteria",
104
+ example: "{ scores: { $elemMatch: { $gt: 80 } } }",
105
+ supportedBy: ["libSQL", "PgVector", "MongoDB"],
106
+ },
107
+ ]}
108
+ />
109
+
110
+ <OperatorsTable
111
+ title="Logical Operators"
112
+ operators={[
113
+ {
114
+ name: "$and",
115
+ description: "Logical AND",
116
+ example: "{ $and: [{ price: { $gt: 100 } }, { stock: { $gt: 0 } }] }",
117
+ supportedBy: ["All except Vectorize, Couchbase"],
118
+ },
119
+ {
120
+ name: "$or",
121
+ description: "Logical OR",
122
+ example: '{ $or: [{ status: "active" }, { priority: "high" }] }',
123
+ supportedBy: ["All except Vectorize, Couchbase"],
124
+ },
125
+ {
126
+ name: "$not",
127
+ description: "Logical NOT",
128
+ example: "{ price: { $not: { $lt: 100 } } }",
129
+ supportedBy: [
130
+ "Astra",
131
+ "Qdrant",
132
+ "Upstash",
133
+ "PgVector",
134
+ "libSQL",
135
+ "MongoDB",
136
+ ],
137
+ },
138
+ {
139
+ name: "$nor",
140
+ description: "Logical NOR",
141
+ example: '{ $nor: [{ status: "deleted" }, { archived: true }] }',
142
+ supportedBy: ["Qdrant", "Upstash", "PgVector", "libSQL", "MongoDB"],
143
+ },
144
+ ]}
145
+ />
146
+
147
+ <OperatorsTable
148
+ title="Element Operators"
149
+ operators={[
150
+ {
151
+ name: "$exists",
152
+ description: "Matches documents with field",
153
+ example: "{ rating: { $exists: true } }",
154
+ supportedBy: ["All except Vectorize, Chroma, Couchbase"],
155
+ },
156
+ ]}
157
+ />
158
+
159
+ <OperatorsTable
160
+ title="Custom Operators"
161
+ operators={[
162
+ {
163
+ name: "$contains",
164
+ description: "Text contains substring",
165
+ example: '{ description: { $contains: "sale" } }',
166
+ supportedBy: ["Upstash", "libSQL", "PgVector"],
167
+ },
168
+ {
169
+ name: "$regex",
170
+ description: "Regular expression match",
171
+ example: '{ name: { $regex: "^test" } }',
172
+ supportedBy: ["Qdrant", "PgVector", "Upstash", "MongoDB"],
173
+ },
174
+ {
175
+ name: "$size",
176
+ description: "Array length check",
177
+ example: "{ tags: { $size: { $gt: 2 } } }",
178
+ supportedBy: ["Astra", "libSQL", "PgVector", "MongoDB"],
179
+ },
180
+ {
181
+ name: "$geo",
182
+ description: "Geospatial query",
183
+ example: '{ location: { $geo: { type: "radius", ... } } }',
184
+ supportedBy: ["Qdrant"],
185
+ },
186
+ {
187
+ name: "$datetime",
188
+ description: "Datetime range query",
189
+ example: '{ created: { $datetime: { range: { gt: "2024-01-01" } } } }',
190
+ supportedBy: ["Qdrant"],
191
+ },
192
+ {
193
+ name: "$hasId",
194
+ description: "Vector ID existence check",
195
+ example: '{ $hasId: ["id1", "id2"] }',
196
+ supportedBy: ["Qdrant"],
197
+ },
198
+ {
199
+ name: "$hasVector",
200
+ description: "Vector existence check",
201
+ example: "{ $hasVector: true }",
202
+ supportedBy: ["Qdrant"],
203
+ },
204
+ ]}
205
+ />
206
+
207
+ ## Common Rules and Restrictions
208
+
209
+ 1. Field names cannot:
210
+ - Contain dots (.) unless referring to nested fields
211
+ - Start with $ or contain null characters
212
+ - Be empty strings
213
+
214
+ 2. Values must be:
215
+ - Valid JSON types (string, number, boolean, object, array)
216
+ - Not undefined
217
+ - Properly typed for the operator (e.g., numbers for numeric comparisons)
218
+
219
+ 3. Logical operators:
220
+ - Must contain valid conditions
221
+ - Cannot be empty
222
+ - Must be properly nested
223
+ - Can only be used at top level or nested within other logical operators
224
+ - Cannot be used at field level or nested inside a field
225
+ - Cannot be used inside an operator
226
+ - Valid: `{ "$and": [{ "field": { "$gt": 100 } }] }`
227
+ - Valid: `{ "$or": [{ "$and": [{ "field": { "$gt": 100 } }] }] }`
228
+ - Invalid: `{ "field": { "$and": [{ "$gt": 100 }] } }`
229
+ - Invalid: `{ "field": { "$gt": { "$and": [{...}] } } }`
230
+
231
+ 4. $not operator:
232
+ - Must be an object
233
+ - Cannot be empty
234
+ - Can be used at field level or top level
235
+ - Valid: `{ "$not": { "field": "value" } }`
236
+ - Valid: `{ "field": { "$not": { "$eq": "value" } } }`
237
+
238
+ 5. Operator nesting:
239
+ - Logical operators must contain field conditions, not direct operators
240
+ - Valid: `{ "$and": [{ "field": { "$gt": 100 } }] }`
241
+ - Invalid: `{ "$and": [{ "$gt": 100 }] }`
242
+
243
+ ## Store-Specific Notes
244
+
245
+ ### Astra
246
+
247
+ - Nested field queries are supported using dot notation
248
+ - Array fields must be explicitly defined as arrays in the metadata
249
+ - Metadata values are case-sensitive
250
+
251
+ ### ChromaDB
252
+
253
+ - Where filters only return results where the filtered field exists in metadata
254
+ - Empty metadata fields are not included in filter results
255
+ - Metadata fields must be present for negative matches (e.g., $ne won't match documents missing the field)
256
+
257
+ ### Cloudflare Vectorize
258
+
259
+ - Requires explicit metadata indexing before filtering can be used
260
+ - Use `createMetadataIndex()` to index fields you want to filter on
261
+ - Up to 10 metadata indexes per Vectorize index
262
+ - String values are indexed up to first 64 bytes (truncated on UTF-8 boundaries)
263
+ - Number values use float64 precision
264
+ - Filter JSON must be under 2048 bytes
265
+ - Field names cannot contain dots (.) or start with $
266
+ - Field names limited to 512 characters
267
+ - Vectors must be re-upserted after creating new metadata indexes to be included in filtered results
268
+ - Range queries may have reduced accuracy with very large datasets (~10M+ vectors)
269
+
270
+ ### libSQL
271
+
272
+ - Supports nested object queries with dot notation
273
+ - Array fields are validated to ensure they contain valid JSON arrays
274
+ - Numeric comparisons maintain proper type handling
275
+ - Empty arrays in conditions are handled gracefully
276
+ - Metadata is stored in a JSONB column for efficient querying
277
+
278
+ ### PgVector
279
+
280
+ - Full support for PostgreSQL's native JSON querying capabilities
281
+ - Efficient handling of array operations using native array functions
282
+ - Proper type handling for numbers, strings, and booleans
283
+ - Nested field queries use PostgreSQL's JSON path syntax internally
284
+ - Metadata is stored in a JSONB column for efficient indexing
285
+
286
+ ### Pinecone
287
+
288
+ - Metadata field names are limited to 512 characters
289
+ - Numeric values must be within the range of ±1e38
290
+ - Arrays in metadata are limited to 64KB total size
291
+ - Nested objects are flattened with dot notation
292
+ - Metadata updates replace the entire metadata object
293
+
294
+ ### Qdrant
295
+
296
+ - Supports advanced filtering with nested conditions
297
+ - Payload (metadata) fields must be explicitly indexed for filtering
298
+ - Efficient handling of geo-spatial queries
299
+ - Special handling for null and empty values
300
+ - Vector-specific filtering capabilities
301
+ - Datetime values must be in RFC 3339 format
302
+
303
+ ### Upstash
304
+
305
+ - 512-character limit for metadata field keys
306
+ - Query size is limited (avoid large IN clauses)
307
+ - No support for null/undefined values in filters
308
+ - Translates to SQL-like syntax internally
309
+ - Case-sensitive string comparisons
310
+ - Metadata updates are atomic
311
+
312
+ ### MongoDB
313
+
314
+ - Full support for MongoDB/Sift query syntax for metadata filters
315
+ - Supports all standard comparison, array, logical, and element operators
316
+ - Supports nested fields and arrays in metadata
317
+ - Filtering can be applied to both `metadata` and the original document content using the `filter` and `documentFilter` options, respectively
318
+ - `filter` applies to the metadata object; `documentFilter` applies to the original document fields
319
+ - No artificial limits on filter size or complexity (subject to MongoDB query limits)
320
+ - Indexing metadata fields is recommended for optimal performance
321
+
322
+ ### Couchbase
323
+
324
+ - Currently does not have support for metadata filters. Filtering must be done client-side after retrieving results or by using the Couchbase SDK's Search capabilities directly for more complex queries.
325
+
326
+ ### Amazon S3 Vectors
327
+
328
+ - Equality values must be primitives (string/number/boolean). `null`/`undefined`, arrays, objects, and Date are not allowed for equality. Range operators accept numbers or Date (Dates are normalized to epoch ms).
329
+ - `$in`/`$nin` require **non-empty arrays of primitives**; Date elements are allowed and normalized to epoch ms. **Array equality** is not supported.
330
+ - Implicit AND is canonicalized (`{a:1,b:2}` → `{$and:[{a:1},{b:2}]}`). Logical operators must contain field conditions, use non-empty arrays, and appear only at the root or within other logical operators (not inside field values).
331
+ - Keys listed in `nonFilterableMetadataKeys` at index creation are stored but not filterable; this setting is immutable.
332
+ - $exists requires a boolean value.
333
+ - undefined/null/empty filters are treated as no filter.
334
+ - Each metadata key name limited to 63 characters.
335
+ - Total metadata per vector: Up to 40 KB (filterable + non-filterable)
336
+ - Total metadata keys per vector: Up to 10
337
+ - Filterable metadata per vector: Up to 2 KB
338
+ - Non-filterable metadata keys per vector index: Up to 10
339
+
340
+ ## Related
341
+
342
+ - [Astra](https://mastra.ai/reference/v1/vectors/astra)
343
+ - [Chroma](https://mastra.ai/reference/v1/vectors/chroma)
344
+ - [Cloudflare Vectorize](https://mastra.ai/reference/v1/vectors/vectorize)
345
+ - [libSQL](https://mastra.ai/reference/v1/vectors/libsql)
346
+ - [MongoDB](https://mastra.ai/reference/v1/vectors/mongodb)
347
+ - [PgStore](https://mastra.ai/reference/v1/vectors/pg)
348
+ - [Pinecone](https://mastra.ai/reference/v1/vectors/pinecone)
349
+ - [Qdrant](https://mastra.ai/reference/v1/vectors/qdrant)
350
+ - [Upstash](https://mastra.ai/reference/v1/vectors/upstash)
351
+ - [Amazon S3 Vectors](https://mastra.ai/reference/v1/vectors/s3vectors)