chub-dev 0.1.0 → 0.1.2-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +55 -0
- package/bin/chub-mcp +2 -0
- package/dist/airtable/docs/database/javascript/DOC.md +1437 -0
- package/dist/airtable/docs/database/python/DOC.md +1735 -0
- package/dist/amplitude/docs/analytics/javascript/DOC.md +1282 -0
- package/dist/amplitude/docs/analytics/python/DOC.md +1199 -0
- package/dist/anthropic/docs/claude-api/javascript/DOC.md +503 -0
- package/dist/anthropic/docs/claude-api/python/DOC.md +389 -0
- package/dist/asana/docs/tasks/DOC.md +1396 -0
- package/dist/assemblyai/docs/transcription/DOC.md +1043 -0
- package/dist/atlassian/docs/confluence/javascript/DOC.md +1347 -0
- package/dist/atlassian/docs/confluence/python/DOC.md +1604 -0
- package/dist/auth0/docs/identity/javascript/DOC.md +968 -0
- package/dist/auth0/docs/identity/python/DOC.md +1199 -0
- package/dist/aws/docs/s3/javascript/DOC.md +1773 -0
- package/dist/aws/docs/s3/python/DOC.md +1807 -0
- package/dist/binance/docs/trading/javascript/DOC.md +1315 -0
- package/dist/binance/docs/trading/python/DOC.md +1454 -0
- package/dist/braintree/docs/gateway/javascript/DOC.md +1278 -0
- package/dist/braintree/docs/gateway/python/DOC.md +1179 -0
- package/dist/chromadb/docs/embeddings-db/javascript/DOC.md +1263 -0
- package/dist/chromadb/docs/embeddings-db/python/DOC.md +1707 -0
- package/dist/clerk/docs/auth/javascript/DOC.md +1220 -0
- package/dist/clerk/docs/auth/python/DOC.md +274 -0
- package/dist/cloudflare/docs/workers/javascript/DOC.md +918 -0
- package/dist/cloudflare/docs/workers/python/DOC.md +994 -0
- package/dist/cockroachdb/docs/distributed-db/DOC.md +1500 -0
- package/dist/cohere/docs/llm/DOC.md +1335 -0
- package/dist/datadog/docs/monitoring/javascript/DOC.md +1740 -0
- package/dist/datadog/docs/monitoring/python/DOC.md +1815 -0
- package/dist/deepgram/docs/speech/javascript/DOC.md +885 -0
- package/dist/deepgram/docs/speech/python/DOC.md +685 -0
- package/dist/deepl/docs/translation/javascript/DOC.md +887 -0
- package/dist/deepl/docs/translation/python/DOC.md +944 -0
- package/dist/deepseek/docs/llm/DOC.md +1220 -0
- package/dist/directus/docs/headless-cms/javascript/DOC.md +1128 -0
- package/dist/directus/docs/headless-cms/python/DOC.md +1276 -0
- package/dist/discord/docs/bot/javascript/DOC.md +1090 -0
- package/dist/discord/docs/bot/python/DOC.md +1130 -0
- package/dist/elasticsearch/docs/search/DOC.md +1634 -0
- package/dist/elevenlabs/docs/text-to-speech/javascript/DOC.md +336 -0
- package/dist/elevenlabs/docs/text-to-speech/python/DOC.md +552 -0
- package/dist/firebase/docs/auth/DOC.md +1015 -0
- package/dist/gemini/docs/genai/javascript/DOC.md +691 -0
- package/dist/gemini/docs/genai/python/DOC.md +555 -0
- package/dist/github/docs/octokit/DOC.md +1560 -0
- package/dist/google/docs/bigquery/javascript/DOC.md +1688 -0
- package/dist/google/docs/bigquery/python/DOC.md +1503 -0
- package/dist/hubspot/docs/crm/javascript/DOC.md +1805 -0
- package/dist/hubspot/docs/crm/python/DOC.md +2033 -0
- package/dist/huggingface/docs/transformers/DOC.md +948 -0
- package/dist/intercom/docs/messaging/javascript/DOC.md +1844 -0
- package/dist/intercom/docs/messaging/python/DOC.md +1797 -0
- package/dist/jira/docs/issues/javascript/DOC.md +1420 -0
- package/dist/jira/docs/issues/python/DOC.md +1492 -0
- package/dist/kafka/docs/streaming/javascript/DOC.md +1671 -0
- package/dist/kafka/docs/streaming/python/DOC.md +1464 -0
- package/dist/landingai-ade/docs/api/DOC.md +620 -0
- package/dist/landingai-ade/docs/sdk/python/DOC.md +489 -0
- package/dist/landingai-ade/docs/sdk/typescript/DOC.md +542 -0
- package/dist/landingai-ade/skills/SKILL.md +489 -0
- package/dist/launchdarkly/docs/feature-flags/javascript/DOC.md +1191 -0
- package/dist/launchdarkly/docs/feature-flags/python/DOC.md +1671 -0
- package/dist/linear/docs/tracker/DOC.md +1554 -0
- package/dist/livekit/docs/realtime/javascript/DOC.md +303 -0
- package/dist/livekit/docs/realtime/python/DOC.md +163 -0
- package/dist/mailchimp/docs/marketing/DOC.md +1420 -0
- package/dist/meilisearch/docs/search/DOC.md +1241 -0
- package/dist/microsoft/docs/onedrive/javascript/DOC.md +1421 -0
- package/dist/microsoft/docs/onedrive/python/DOC.md +1549 -0
- package/dist/mongodb/docs/atlas/DOC.md +2041 -0
- package/dist/notion/docs/workspace-api/javascript/DOC.md +1435 -0
- package/dist/notion/docs/workspace-api/python/DOC.md +1400 -0
- package/dist/okta/docs/identity/javascript/DOC.md +1171 -0
- package/dist/okta/docs/identity/python/DOC.md +1401 -0
- package/dist/openai/docs/chat/javascript/DOC.md +407 -0
- package/dist/openai/docs/chat/python/DOC.md +568 -0
- package/dist/paypal/docs/checkout/DOC.md +278 -0
- package/dist/pinecone/docs/sdk/javascript/DOC.md +984 -0
- package/dist/pinecone/docs/sdk/python/DOC.md +1395 -0
- package/dist/plaid/docs/banking/javascript/DOC.md +1163 -0
- package/dist/plaid/docs/banking/python/DOC.md +1203 -0
- package/dist/playwright-community/skills/login-flows/SKILL.md +108 -0
- package/dist/postmark/docs/transactional-email/DOC.md +1168 -0
- package/dist/prisma/docs/orm/javascript/DOC.md +1419 -0
- package/dist/prisma/docs/orm/python/DOC.md +1317 -0
- package/dist/qdrant/docs/vector-search/javascript/DOC.md +1221 -0
- package/dist/qdrant/docs/vector-search/python/DOC.md +1653 -0
- package/dist/rabbitmq/docs/message-queue/javascript/DOC.md +1193 -0
- package/dist/rabbitmq/docs/message-queue/python/DOC.md +1243 -0
- package/dist/razorpay/docs/payments/javascript/DOC.md +1219 -0
- package/dist/razorpay/docs/payments/python/DOC.md +1330 -0
- package/dist/redis/docs/key-value/javascript/DOC.md +1851 -0
- package/dist/redis/docs/key-value/python/DOC.md +2054 -0
- package/dist/registry.json +2817 -0
- package/dist/replicate/docs/model-hosting/DOC.md +1318 -0
- package/dist/resend/docs/email/DOC.md +1271 -0
- package/dist/salesforce/docs/crm/javascript/DOC.md +1241 -0
- package/dist/salesforce/docs/crm/python/DOC.md +1183 -0
- package/dist/search-index.json +1 -0
- package/dist/sendgrid/docs/email-api/javascript/DOC.md +371 -0
- package/dist/sendgrid/docs/email-api/python/DOC.md +656 -0
- package/dist/sentry/docs/error-tracking/javascript/DOC.md +1073 -0
- package/dist/sentry/docs/error-tracking/python/DOC.md +1309 -0
- package/dist/shopify/docs/storefront/DOC.md +457 -0
- package/dist/slack/docs/workspace/javascript/DOC.md +933 -0
- package/dist/slack/docs/workspace/python/DOC.md +271 -0
- package/dist/square/docs/payments/javascript/DOC.md +1855 -0
- package/dist/square/docs/payments/python/DOC.md +1728 -0
- package/dist/stripe/docs/api/DOC.md +1727 -0
- package/dist/stripe/docs/payments/DOC.md +1726 -0
- package/dist/stytch/docs/auth/javascript/DOC.md +1813 -0
- package/dist/stytch/docs/auth/python/DOC.md +1962 -0
- package/dist/supabase/docs/client/DOC.md +1606 -0
- package/dist/twilio/docs/messaging/python/DOC.md +469 -0
- package/dist/twilio/docs/messaging/typescript/DOC.md +946 -0
- package/dist/vercel/docs/platform/DOC.md +1940 -0
- package/dist/weaviate/docs/vector-db/javascript/DOC.md +1268 -0
- package/dist/weaviate/docs/vector-db/python/DOC.md +1388 -0
- package/dist/zendesk/docs/support/javascript/DOC.md +2150 -0
- package/dist/zendesk/docs/support/python/DOC.md +2297 -0
- package/package.json +22 -6
- package/skills/get-api-docs/SKILL.md +84 -0
- package/src/commands/annotate.js +83 -0
- package/src/commands/build.js +12 -1
- package/src/commands/feedback.js +150 -0
- package/src/commands/get.js +83 -42
- package/src/commands/search.js +7 -0
- package/src/index.js +43 -17
- package/src/lib/analytics.js +90 -0
- package/src/lib/annotations.js +57 -0
- package/src/lib/bm25.js +170 -0
- package/src/lib/cache.js +69 -6
- package/src/lib/config.js +8 -3
- package/src/lib/identity.js +99 -0
- package/src/lib/registry.js +103 -20
- package/src/lib/telemetry.js +86 -0
- package/src/mcp/server.js +177 -0
- package/src/mcp/tools.js +251 -0
|
@@ -0,0 +1,1263 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: embeddings-db
|
|
3
|
+
description: "ChromaDB JavaScript/TypeScript SDK for vector embeddings and AI-powered search"
|
|
4
|
+
metadata:
|
|
5
|
+
languages: "javascript"
|
|
6
|
+
versions: "3.0.17"
|
|
7
|
+
updated-on: "2026-03-02"
|
|
8
|
+
source: maintainer
|
|
9
|
+
tags: "chromadb,embeddings,vector-db,ai,search"
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
# ChromaDB JavaScript/TypeScript SDK - v3.0.17
|
|
13
|
+
|
|
14
|
+
## Golden Rule
|
|
15
|
+
|
|
16
|
+
**ALWAYS use the official `chromadb` package (v3.0.17 or later) for JavaScript/TypeScript projects.**
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
npm install chromadb
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
**For default embeddings support, also install:**
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
npm install @chroma-core/default-embed
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
**DO NOT use:**
|
|
29
|
+
- Deprecated or unofficial ChromaDB packages
|
|
30
|
+
- Old client libraries like `chromadb-client`
|
|
31
|
+
- Community wrappers that may be outdated
|
|
32
|
+
|
|
33
|
+
ChromaDB is the official AI-native open-source vector database. It handles embeddings, indexing, and vector similarity search automatically.
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
## Installation
|
|
38
|
+
|
|
39
|
+
### Using npm
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
npm install chromadb @chroma-core/default-embed
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### Using yarn
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
yarn add chromadb @chroma-core/default-embed
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### Using pnpm
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
pnpm add chromadb @chroma-core/default-embed
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### Using bun
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
bun add chromadb @chroma-core/default-embed
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
## Server Setup
|
|
66
|
+
|
|
67
|
+
ChromaDB requires a backend server to connect to. You have two options:
|
|
68
|
+
|
|
69
|
+
### Option 1: Run ChromaDB Server Locally
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
chroma run --path ./my-chroma-data
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
This starts a ChromaDB server on `http://localhost:8000`
|
|
76
|
+
|
|
77
|
+
### Option 2: Run ChromaDB with Docker
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
docker pull chromadb/chroma
|
|
81
|
+
docker run -p 8000:8000 chromadb/chroma
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
---
|
|
85
|
+
|
|
86
|
+
## Initialization
|
|
87
|
+
|
|
88
|
+
### Basic Client Connection
|
|
89
|
+
|
|
90
|
+
```typescript
|
|
91
|
+
import { ChromaClient } from "chromadb";
|
|
92
|
+
|
|
93
|
+
const client = new ChromaClient();
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
This connects to `http://localhost:8000` by default.
|
|
97
|
+
|
|
98
|
+
### Custom Host/Port Configuration
|
|
99
|
+
|
|
100
|
+
```typescript
|
|
101
|
+
import { ChromaClient } from "chromadb";
|
|
102
|
+
|
|
103
|
+
const client = new ChromaClient({
|
|
104
|
+
path: "http://localhost:8000"
|
|
105
|
+
});
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### With Authentication Headers
|
|
109
|
+
|
|
110
|
+
```typescript
|
|
111
|
+
import { ChromaClient } from "chromadb";
|
|
112
|
+
|
|
113
|
+
const client = new ChromaClient({
|
|
114
|
+
path: "http://localhost:8000",
|
|
115
|
+
auth: {
|
|
116
|
+
provider: "token",
|
|
117
|
+
credentials: "your-auth-token"
|
|
118
|
+
}
|
|
119
|
+
});
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### Remote Server Connection
|
|
123
|
+
|
|
124
|
+
```typescript
|
|
125
|
+
import { ChromaClient } from "chromadb";
|
|
126
|
+
|
|
127
|
+
const client = new ChromaClient({
|
|
128
|
+
path: "https://your-chroma-server.com:8000"
|
|
129
|
+
});
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
---
|
|
133
|
+
|
|
134
|
+
## Collections
|
|
135
|
+
|
|
136
|
+
### Create a Collection
|
|
137
|
+
|
|
138
|
+
```typescript
|
|
139
|
+
const collection = await client.createCollection({
|
|
140
|
+
name: "my_collection"
|
|
141
|
+
});
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
### Create Collection with Distance Metric
|
|
145
|
+
|
|
146
|
+
```typescript
|
|
147
|
+
const collection = await client.createCollection({
|
|
148
|
+
name: "my_collection",
|
|
149
|
+
metadata: {
|
|
150
|
+
"hnsw:space": "cosine" // Options: "cosine", "l2", "ip"
|
|
151
|
+
}
|
|
152
|
+
});
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
**Distance Metrics:**
|
|
156
|
+
- `cosine`: Cosine similarity (best for text, normalized vectors)
|
|
157
|
+
- `l2`: Euclidean/L2 distance (default, sensitive to magnitude)
|
|
158
|
+
- `ip`: Inner product (for recommendation systems)
|
|
159
|
+
|
|
160
|
+
### Get an Existing Collection
|
|
161
|
+
|
|
162
|
+
```typescript
|
|
163
|
+
const collection = await client.getCollection({
|
|
164
|
+
name: "my_collection"
|
|
165
|
+
});
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
### Get or Create Collection
|
|
169
|
+
|
|
170
|
+
```typescript
|
|
171
|
+
const collection = await client.getOrCreateCollection({
|
|
172
|
+
name: "my_collection"
|
|
173
|
+
});
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
### List All Collections
|
|
177
|
+
|
|
178
|
+
```typescript
|
|
179
|
+
const collections = await client.listCollections();
|
|
180
|
+
console.log(collections);
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
### Delete a Collection
|
|
184
|
+
|
|
185
|
+
```typescript
|
|
186
|
+
await client.deleteCollection({
|
|
187
|
+
name: "my_collection"
|
|
188
|
+
});
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
---
|
|
192
|
+
|
|
193
|
+
## Adding Data
|
|
194
|
+
|
|
195
|
+
### Add Documents (Auto-Embedding)
|
|
196
|
+
|
|
197
|
+
```typescript
|
|
198
|
+
await collection.add({
|
|
199
|
+
ids: ["id1", "id2", "id3"],
|
|
200
|
+
documents: [
|
|
201
|
+
"This is a document about pineapples",
|
|
202
|
+
"This is a document about oranges",
|
|
203
|
+
"This is a document about apples"
|
|
204
|
+
]
|
|
205
|
+
});
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
ChromaDB will automatically embed the documents using the default embedding function.
|
|
209
|
+
|
|
210
|
+
### Add with Metadata
|
|
211
|
+
|
|
212
|
+
```typescript
|
|
213
|
+
await collection.add({
|
|
214
|
+
ids: ["id1", "id2", "id3"],
|
|
215
|
+
documents: [
|
|
216
|
+
"This is a document about pineapples",
|
|
217
|
+
"This is a document about oranges",
|
|
218
|
+
"This is a document about apples"
|
|
219
|
+
],
|
|
220
|
+
metadatas: [
|
|
221
|
+
{ category: "tropical", color: "yellow" },
|
|
222
|
+
{ category: "citrus", color: "orange" },
|
|
223
|
+
{ category: "temperate", color: "red" }
|
|
224
|
+
]
|
|
225
|
+
});
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
### Add with Custom Embeddings
|
|
229
|
+
|
|
230
|
+
```typescript
|
|
231
|
+
await collection.add({
|
|
232
|
+
ids: ["id1", "id2"],
|
|
233
|
+
embeddings: [
|
|
234
|
+
[1.5, 2.9, 3.4, 1.2, 0.8],
|
|
235
|
+
[9.8, 2.3, 2.9, 4.1, 3.3]
|
|
236
|
+
],
|
|
237
|
+
documents: ["Document one", "Document two"],
|
|
238
|
+
metadatas: [
|
|
239
|
+
{ source: "manual" },
|
|
240
|
+
{ source: "manual" }
|
|
241
|
+
]
|
|
242
|
+
});
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
### Batch Adding (Large Datasets)
|
|
246
|
+
|
|
247
|
+
```typescript
|
|
248
|
+
const batchSize = 5000;
|
|
249
|
+
for (let i = 0; i < totalDocuments.length; i += batchSize) {
|
|
250
|
+
const batch = totalDocuments.slice(i, i + batchSize);
|
|
251
|
+
await collection.add({
|
|
252
|
+
ids: batch.map((_, idx) => `id${i + idx}`),
|
|
253
|
+
documents: batch
|
|
254
|
+
});
|
|
255
|
+
}
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
ChromaDB supports adding up to 100k+ documents at once.
|
|
259
|
+
|
|
260
|
+
---
|
|
261
|
+
|
|
262
|
+
## Querying Data
|
|
263
|
+
|
|
264
|
+
### Query with Text (Auto-Embedding)
|
|
265
|
+
|
|
266
|
+
```typescript
|
|
267
|
+
const results = await collection.query({
|
|
268
|
+
queryTexts: ["What fruits are tropical?"],
|
|
269
|
+
nResults: 2
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
console.log(results);
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
**Response Structure:**
|
|
276
|
+
|
|
277
|
+
```typescript
|
|
278
|
+
{
|
|
279
|
+
ids: [["id1", "id2"]],
|
|
280
|
+
distances: [[0.1234, 0.5678]],
|
|
281
|
+
documents: [["This is a document about pineapples", "This is a document..."]],
|
|
282
|
+
metadatas: [[{ category: "tropical", color: "yellow" }, { ... }]]
|
|
283
|
+
}
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
### Query with Multiple Texts
|
|
287
|
+
|
|
288
|
+
```typescript
|
|
289
|
+
const results = await collection.query({
|
|
290
|
+
queryTexts: [
|
|
291
|
+
"What fruits are tropical?",
|
|
292
|
+
"What fruits are citrus?"
|
|
293
|
+
],
|
|
294
|
+
nResults: 2
|
|
295
|
+
});
|
|
296
|
+
```
|
|
297
|
+
|
|
298
|
+
Returns `nResults` for each query text.
|
|
299
|
+
|
|
300
|
+
### Query with Custom Embeddings
|
|
301
|
+
|
|
302
|
+
```typescript
|
|
303
|
+
const results = await collection.query({
|
|
304
|
+
queryEmbeddings: [[1.5, 2.9, 3.4, 1.2, 0.8]],
|
|
305
|
+
nResults: 3
|
|
306
|
+
});
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
### Query with Metadata Filters
|
|
310
|
+
|
|
311
|
+
```typescript
|
|
312
|
+
const results = await collection.query({
|
|
313
|
+
queryTexts: ["What fruits are available?"],
|
|
314
|
+
nResults: 5,
|
|
315
|
+
where: {
|
|
316
|
+
category: "tropical"
|
|
317
|
+
}
|
|
318
|
+
});
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
### Complex Metadata Filtering
|
|
322
|
+
|
|
323
|
+
```typescript
|
|
324
|
+
// Using $or operator
|
|
325
|
+
const results = await collection.query({
|
|
326
|
+
queryTexts: ["Find fruits"],
|
|
327
|
+
nResults: 5,
|
|
328
|
+
where: {
|
|
329
|
+
$or: [
|
|
330
|
+
{ category: "tropical" },
|
|
331
|
+
{ category: "citrus" }
|
|
332
|
+
]
|
|
333
|
+
}
|
|
334
|
+
});
|
|
335
|
+
|
|
336
|
+
// Using $and operator
|
|
337
|
+
const results = await collection.query({
|
|
338
|
+
queryTexts: ["Find fruits"],
|
|
339
|
+
nResults: 5,
|
|
340
|
+
where: {
|
|
341
|
+
$and: [
|
|
342
|
+
{ category: "tropical" },
|
|
343
|
+
{ color: "yellow" }
|
|
344
|
+
]
|
|
345
|
+
}
|
|
346
|
+
});
|
|
347
|
+
|
|
348
|
+
// Using comparison operators
|
|
349
|
+
const results = await collection.query({
|
|
350
|
+
queryTexts: ["Find items"],
|
|
351
|
+
nResults: 5,
|
|
352
|
+
where: {
|
|
353
|
+
price: { $gt: 10 } // $gt, $gte, $lt, $lte, $ne, $eq
|
|
354
|
+
}
|
|
355
|
+
});
|
|
356
|
+
```
|
|
357
|
+
|
|
358
|
+
### Query with Document Content Filters
|
|
359
|
+
|
|
360
|
+
```typescript
|
|
361
|
+
const results = await collection.query({
|
|
362
|
+
queryTexts: ["Find documents"],
|
|
363
|
+
nResults: 5,
|
|
364
|
+
whereDocument: {
|
|
365
|
+
$contains: "pineapple"
|
|
366
|
+
}
|
|
367
|
+
});
|
|
368
|
+
|
|
369
|
+
// Using $not_contains
|
|
370
|
+
const results = await collection.query({
|
|
371
|
+
queryTexts: ["Find documents"],
|
|
372
|
+
nResults: 5,
|
|
373
|
+
whereDocument: {
|
|
374
|
+
$not_contains: "apple"
|
|
375
|
+
}
|
|
376
|
+
});
|
|
377
|
+
```
|
|
378
|
+
|
|
379
|
+
### Query with Include Options
|
|
380
|
+
|
|
381
|
+
```typescript
|
|
382
|
+
const results = await collection.query({
|
|
383
|
+
queryTexts: ["What fruits are tropical?"],
|
|
384
|
+
nResults: 2,
|
|
385
|
+
include: ["documents", "metadatas", "distances", "embeddings"]
|
|
386
|
+
});
|
|
387
|
+
```
|
|
388
|
+
|
|
389
|
+
**Include Options:**
|
|
390
|
+
- `documents`: The document text (included by default)
|
|
391
|
+
- `metadatas`: Metadata for each document (included by default)
|
|
392
|
+
- `distances`: Distance/similarity scores (included by default)
|
|
393
|
+
- `embeddings`: Vector embeddings (not included by default for performance)
|
|
394
|
+
|
|
395
|
+
---
|
|
396
|
+
|
|
397
|
+
## Getting Data
|
|
398
|
+
|
|
399
|
+
### Get Documents by IDs
|
|
400
|
+
|
|
401
|
+
```typescript
|
|
402
|
+
const results = await collection.get({
|
|
403
|
+
ids: ["id1", "id2"]
|
|
404
|
+
});
|
|
405
|
+
|
|
406
|
+
console.log(results);
|
|
407
|
+
```
|
|
408
|
+
|
|
409
|
+
### Get All Documents
|
|
410
|
+
|
|
411
|
+
```typescript
|
|
412
|
+
const results = await collection.get();
|
|
413
|
+
```
|
|
414
|
+
|
|
415
|
+
Returns all documents in the collection.
|
|
416
|
+
|
|
417
|
+
### Get with Metadata Filter
|
|
418
|
+
|
|
419
|
+
```typescript
|
|
420
|
+
const results = await collection.get({
|
|
421
|
+
where: {
|
|
422
|
+
category: "tropical"
|
|
423
|
+
}
|
|
424
|
+
});
|
|
425
|
+
```
|
|
426
|
+
|
|
427
|
+
### Get with Document Filter
|
|
428
|
+
|
|
429
|
+
```typescript
|
|
430
|
+
const results = await collection.get({
|
|
431
|
+
whereDocument: {
|
|
432
|
+
$contains: "pineapple"
|
|
433
|
+
}
|
|
434
|
+
});
|
|
435
|
+
```
|
|
436
|
+
|
|
437
|
+
### Get with Limit and Offset
|
|
438
|
+
|
|
439
|
+
```typescript
|
|
440
|
+
const results = await collection.get({
|
|
441
|
+
limit: 10,
|
|
442
|
+
offset: 20
|
|
443
|
+
});
|
|
444
|
+
```
|
|
445
|
+
|
|
446
|
+
### Get with Include Options
|
|
447
|
+
|
|
448
|
+
```typescript
|
|
449
|
+
const results = await collection.get({
|
|
450
|
+
ids: ["id1", "id2"],
|
|
451
|
+
include: ["documents", "metadatas", "embeddings"]
|
|
452
|
+
});
|
|
453
|
+
```
|
|
454
|
+
|
|
455
|
+
---
|
|
456
|
+
|
|
457
|
+
## Updating Data
|
|
458
|
+
|
|
459
|
+
### Update Documents
|
|
460
|
+
|
|
461
|
+
```typescript
|
|
462
|
+
await collection.update({
|
|
463
|
+
ids: ["id1", "id2"],
|
|
464
|
+
documents: [
|
|
465
|
+
"Updated document about pineapples",
|
|
466
|
+
"Updated document about oranges"
|
|
467
|
+
],
|
|
468
|
+
metadatas: [
|
|
469
|
+
{ category: "tropical", color: "yellow", updated: true },
|
|
470
|
+
{ category: "citrus", color: "orange", updated: true }
|
|
471
|
+
]
|
|
472
|
+
});
|
|
473
|
+
```
|
|
474
|
+
|
|
475
|
+
### Update with Custom Embeddings
|
|
476
|
+
|
|
477
|
+
```typescript
|
|
478
|
+
await collection.update({
|
|
479
|
+
ids: ["id1"],
|
|
480
|
+
embeddings: [[1.1, 2.2, 3.3, 4.4, 5.5]],
|
|
481
|
+
documents: ["Updated document"],
|
|
482
|
+
metadatas: [{ source: "updated" }]
|
|
483
|
+
});
|
|
484
|
+
```
|
|
485
|
+
|
|
486
|
+
---
|
|
487
|
+
|
|
488
|
+
## Upsert (Add or Update)
|
|
489
|
+
|
|
490
|
+
### Upsert Documents
|
|
491
|
+
|
|
492
|
+
```typescript
|
|
493
|
+
await collection.upsert({
|
|
494
|
+
ids: ["id1", "id2", "id3"],
|
|
495
|
+
documents: [
|
|
496
|
+
"Document one - may be new or updated",
|
|
497
|
+
"Document two - may be new or updated",
|
|
498
|
+
"Document three - may be new or updated"
|
|
499
|
+
],
|
|
500
|
+
metadatas: [
|
|
501
|
+
{ version: 2 },
|
|
502
|
+
{ version: 2 },
|
|
503
|
+
{ version: 1 }
|
|
504
|
+
]
|
|
505
|
+
});
|
|
506
|
+
```
|
|
507
|
+
|
|
508
|
+
If the ID exists, it updates the document. If not, it adds it as new.
|
|
509
|
+
|
|
510
|
+
---
|
|
511
|
+
|
|
512
|
+
## Deleting Data
|
|
513
|
+
|
|
514
|
+
### Delete by IDs
|
|
515
|
+
|
|
516
|
+
```typescript
|
|
517
|
+
await collection.delete({
|
|
518
|
+
ids: ["id1", "id2"]
|
|
519
|
+
});
|
|
520
|
+
```
|
|
521
|
+
|
|
522
|
+
### Delete with Metadata Filter
|
|
523
|
+
|
|
524
|
+
```typescript
|
|
525
|
+
await collection.delete({
|
|
526
|
+
where: {
|
|
527
|
+
category: "tropical"
|
|
528
|
+
}
|
|
529
|
+
});
|
|
530
|
+
```
|
|
531
|
+
|
|
532
|
+
### Delete with Document Filter
|
|
533
|
+
|
|
534
|
+
```typescript
|
|
535
|
+
await collection.delete({
|
|
536
|
+
whereDocument: {
|
|
537
|
+
$contains: "deprecated"
|
|
538
|
+
}
|
|
539
|
+
});
|
|
540
|
+
```
|
|
541
|
+
|
|
542
|
+
### Delete All Documents (Keep Collection)
|
|
543
|
+
|
|
544
|
+
```typescript
|
|
545
|
+
await collection.delete();
|
|
546
|
+
```
|
|
547
|
+
|
|
548
|
+
---
|
|
549
|
+
|
|
550
|
+
## Collection Utilities
|
|
551
|
+
|
|
552
|
+
### Count Documents
|
|
553
|
+
|
|
554
|
+
```typescript
|
|
555
|
+
const count = await collection.count();
|
|
556
|
+
console.log(`Total documents: ${count}`);
|
|
557
|
+
```
|
|
558
|
+
|
|
559
|
+
### Peek at First Documents
|
|
560
|
+
|
|
561
|
+
```typescript
|
|
562
|
+
const firstDocs = await collection.peek({
|
|
563
|
+
limit: 5
|
|
564
|
+
});
|
|
565
|
+
|
|
566
|
+
console.log(firstDocs);
|
|
567
|
+
```
|
|
568
|
+
|
|
569
|
+
Returns the first 5 documents in the collection.
|
|
570
|
+
|
|
571
|
+
### Modify Collection Metadata
|
|
572
|
+
|
|
573
|
+
```typescript
|
|
574
|
+
await collection.modify({
|
|
575
|
+
metadata: {
|
|
576
|
+
description: "Collection of fruit documents",
|
|
577
|
+
version: "1.0"
|
|
578
|
+
}
|
|
579
|
+
});
|
|
580
|
+
```
|
|
581
|
+
|
|
582
|
+
---
|
|
583
|
+
|
|
584
|
+
## Embedding Functions
|
|
585
|
+
|
|
586
|
+
### Using Default Embedding Function
|
|
587
|
+
|
|
588
|
+
```typescript
|
|
589
|
+
import { ChromaClient } from "chromadb";
|
|
590
|
+
|
|
591
|
+
const client = new ChromaClient();
|
|
592
|
+
const collection = await client.createCollection({
|
|
593
|
+
name: "my_collection"
|
|
594
|
+
});
|
|
595
|
+
```
|
|
596
|
+
|
|
597
|
+
By default, ChromaDB uses the Sentence Transformers `all-MiniLM-L6-v2` model.
|
|
598
|
+
|
|
599
|
+
### Using OpenAI Embeddings
|
|
600
|
+
|
|
601
|
+
```typescript
|
|
602
|
+
import { ChromaClient } from "chromadb";
|
|
603
|
+
import { OpenAIEmbeddingFunction } from "chromadb";
|
|
604
|
+
|
|
605
|
+
const embedder = new OpenAIEmbeddingFunction({
|
|
606
|
+
openai_api_key: "your-openai-api-key",
|
|
607
|
+
model_name: "text-embedding-3-small"
|
|
608
|
+
});
|
|
609
|
+
|
|
610
|
+
const collection = await client.createCollection({
|
|
611
|
+
name: "openai_collection",
|
|
612
|
+
embeddingFunction: embedder
|
|
613
|
+
});
|
|
614
|
+
```
|
|
615
|
+
|
|
616
|
+
**Available OpenAI Models:**
|
|
617
|
+
- `text-embedding-3-small`
|
|
618
|
+
- `text-embedding-3-large`
|
|
619
|
+
- `text-embedding-ada-002`
|
|
620
|
+
|
|
621
|
+
### Using Cohere Embeddings
|
|
622
|
+
|
|
623
|
+
```typescript
|
|
624
|
+
import { ChromaClient } from "chromadb";
|
|
625
|
+
import { CohereEmbeddingFunction } from "chromadb";
|
|
626
|
+
|
|
627
|
+
const embedder = new CohereEmbeddingFunction({
|
|
628
|
+
cohere_api_key: "your-cohere-api-key",
|
|
629
|
+
model_name: "embed-english-v3.0"
|
|
630
|
+
});
|
|
631
|
+
|
|
632
|
+
const collection = await client.createCollection({
|
|
633
|
+
name: "cohere_collection",
|
|
634
|
+
embeddingFunction: embedder
|
|
635
|
+
});
|
|
636
|
+
```
|
|
637
|
+
|
|
638
|
+
### Using Hugging Face Embeddings
|
|
639
|
+
|
|
640
|
+
```typescript
|
|
641
|
+
import { ChromaClient } from "chromadb";
|
|
642
|
+
import { HuggingFaceEmbeddingFunction } from "chromadb";
|
|
643
|
+
|
|
644
|
+
const embedder = new HuggingFaceEmbeddingFunction({
|
|
645
|
+
api_key: "your-hf-api-key",
|
|
646
|
+
model_name: "sentence-transformers/all-MiniLM-L6-v2"
|
|
647
|
+
});
|
|
648
|
+
|
|
649
|
+
const collection = await client.createCollection({
|
|
650
|
+
name: "hf_collection",
|
|
651
|
+
embeddingFunction: embedder
|
|
652
|
+
});
|
|
653
|
+
```
|
|
654
|
+
|
|
655
|
+
### Custom Embedding Function
|
|
656
|
+
|
|
657
|
+
```typescript
|
|
658
|
+
import { IEmbeddingFunction } from "chromadb";
|
|
659
|
+
|
|
660
|
+
class CustomEmbeddingFunction implements IEmbeddingFunction {
|
|
661
|
+
async generate(texts: string[]): Promise<number[][]> {
|
|
662
|
+
// Your custom embedding logic here
|
|
663
|
+
const embeddings = texts.map(text => {
|
|
664
|
+
// Example: simple character code embedding (replace with real model)
|
|
665
|
+
return Array.from(text).slice(0, 384).map(c => c.charCodeAt(0) / 255);
|
|
666
|
+
});
|
|
667
|
+
return embeddings;
|
|
668
|
+
}
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
const embedder = new CustomEmbeddingFunction();
|
|
672
|
+
const collection = await client.createCollection({
|
|
673
|
+
name: "custom_collection",
|
|
674
|
+
embeddingFunction: embedder
|
|
675
|
+
});
|
|
676
|
+
```
|
|
677
|
+
|
|
678
|
+
---
|
|
679
|
+
|
|
680
|
+
## Advanced Client Configuration
|
|
681
|
+
|
|
682
|
+
### HttpClient with Full Options
|
|
683
|
+
|
|
684
|
+
```typescript
|
|
685
|
+
import { ChromaClient } from "chromadb";
|
|
686
|
+
|
|
687
|
+
const client = new ChromaClient({
|
|
688
|
+
path: "https://my-chroma-server.com:8000",
|
|
689
|
+
auth: {
|
|
690
|
+
provider: "token",
|
|
691
|
+
credentials: "my-auth-token"
|
|
692
|
+
},
|
|
693
|
+
tenant: "my-tenant",
|
|
694
|
+
database: "my-database"
|
|
695
|
+
});
|
|
696
|
+
```
|
|
697
|
+
|
|
698
|
+
### Multi-Tenancy Setup
|
|
699
|
+
|
|
700
|
+
```typescript
|
|
701
|
+
import { ChromaClient } from "chromadb";
|
|
702
|
+
|
|
703
|
+
const client = new ChromaClient({
|
|
704
|
+
path: "http://localhost:8000"
|
|
705
|
+
});
|
|
706
|
+
|
|
707
|
+
// Create a new tenant and database
|
|
708
|
+
await client.createTenant({ name: "acme-corp" });
|
|
709
|
+
await client.createDatabase({ name: "production", tenant: "acme-corp" });
|
|
710
|
+
|
|
711
|
+
// Connect to specific tenant/database
|
|
712
|
+
const tenantClient = new ChromaClient({
|
|
713
|
+
path: "http://localhost:8000",
|
|
714
|
+
tenant: "acme-corp",
|
|
715
|
+
database: "production"
|
|
716
|
+
});
|
|
717
|
+
```
|
|
718
|
+
|
|
719
|
+
---
|
|
720
|
+
|
|
721
|
+
## Complete Example: Document Search System
|
|
722
|
+
|
|
723
|
+
```typescript
|
|
724
|
+
import { ChromaClient } from "chromadb";
|
|
725
|
+
|
|
726
|
+
async function main() {
|
|
727
|
+
// Initialize client
|
|
728
|
+
const client = new ChromaClient({
|
|
729
|
+
path: "http://localhost:8000"
|
|
730
|
+
});
|
|
731
|
+
|
|
732
|
+
// Create or get collection
|
|
733
|
+
const collection = await client.getOrCreateCollection({
|
|
734
|
+
name: "knowledge_base",
|
|
735
|
+
metadata: {
|
|
736
|
+
"hnsw:space": "cosine"
|
|
737
|
+
}
|
|
738
|
+
});
|
|
739
|
+
|
|
740
|
+
// Add documents
|
|
741
|
+
await collection.add({
|
|
742
|
+
ids: ["doc1", "doc2", "doc3", "doc4"],
|
|
743
|
+
documents: [
|
|
744
|
+
"The quick brown fox jumps over the lazy dog",
|
|
745
|
+
"Machine learning is a subset of artificial intelligence",
|
|
746
|
+
"Python is a popular programming language",
|
|
747
|
+
"ChromaDB is a vector database for AI applications"
|
|
748
|
+
],
|
|
749
|
+
metadatas: [
|
|
750
|
+
{ category: "phrases", language: "english" },
|
|
751
|
+
{ category: "ai", language: "english" },
|
|
752
|
+
{ category: "programming", language: "english" },
|
|
753
|
+
{ category: "database", language: "english" }
|
|
754
|
+
]
|
|
755
|
+
});
|
|
756
|
+
|
|
757
|
+
// Query the collection
|
|
758
|
+
const results = await collection.query({
|
|
759
|
+
queryTexts: ["What is AI?"],
|
|
760
|
+
nResults: 2,
|
|
761
|
+
where: {
|
|
762
|
+
category: "ai"
|
|
763
|
+
}
|
|
764
|
+
});
|
|
765
|
+
|
|
766
|
+
console.log("Search Results:");
|
|
767
|
+
console.log(results.documents[0]);
|
|
768
|
+
console.log(results.metadatas[0]);
|
|
769
|
+
console.log(results.distances[0]);
|
|
770
|
+
|
|
771
|
+
// Get document count
|
|
772
|
+
const count = await collection.count();
|
|
773
|
+
console.log(`Total documents: ${count}`);
|
|
774
|
+
|
|
775
|
+
// Update a document
|
|
776
|
+
await collection.update({
|
|
777
|
+
ids: ["doc2"],
|
|
778
|
+
documents: ["Machine learning is a powerful subset of artificial intelligence"],
|
|
779
|
+
metadatas: [{ category: "ai", language: "english", updated: true }]
|
|
780
|
+
});
|
|
781
|
+
|
|
782
|
+
// Delete documents
|
|
783
|
+
await collection.delete({
|
|
784
|
+
ids: ["doc4"]
|
|
785
|
+
});
|
|
786
|
+
}
|
|
787
|
+
|
|
788
|
+
main();
|
|
789
|
+
```
|
|
790
|
+
|
|
791
|
+
---
|
|
792
|
+
|
|
793
|
+
## Complete Example: Semantic Search with OpenAI
|
|
794
|
+
|
|
795
|
+
```typescript
|
|
796
|
+
import { ChromaClient, OpenAIEmbeddingFunction } from "chromadb";
|
|
797
|
+
import * as dotenv from "dotenv";
|
|
798
|
+
|
|
799
|
+
dotenv.config();
|
|
800
|
+
|
|
801
|
+
async function semanticSearch() {
|
|
802
|
+
const client = new ChromaClient();
|
|
803
|
+
|
|
804
|
+
const embedder = new OpenAIEmbeddingFunction({
|
|
805
|
+
openai_api_key: process.env.OPENAI_API_KEY!,
|
|
806
|
+
model_name: "text-embedding-3-small"
|
|
807
|
+
});
|
|
808
|
+
|
|
809
|
+
const collection = await client.getOrCreateCollection({
|
|
810
|
+
name: "articles",
|
|
811
|
+
embeddingFunction: embedder,
|
|
812
|
+
metadata: {
|
|
813
|
+
"hnsw:space": "cosine"
|
|
814
|
+
}
|
|
815
|
+
});
|
|
816
|
+
|
|
817
|
+
// Add articles
|
|
818
|
+
await collection.add({
|
|
819
|
+
ids: ["art1", "art2", "art3"],
|
|
820
|
+
documents: [
|
|
821
|
+
"Climate change is affecting global weather patterns",
|
|
822
|
+
"New breakthrough in quantum computing announced",
|
|
823
|
+
"The future of renewable energy looks promising"
|
|
824
|
+
],
|
|
825
|
+
metadatas: [
|
|
826
|
+
{ topic: "environment", date: "2024-01-15" },
|
|
827
|
+
{ topic: "technology", date: "2024-01-16" },
|
|
828
|
+
{ topic: "energy", date: "2024-01-17" }
|
|
829
|
+
]
|
|
830
|
+
});
|
|
831
|
+
|
|
832
|
+
// Search for relevant articles
|
|
833
|
+
const results = await collection.query({
|
|
834
|
+
queryTexts: ["Tell me about environmental issues"],
|
|
835
|
+
nResults: 3
|
|
836
|
+
});
|
|
837
|
+
|
|
838
|
+
results.documents[0].forEach((doc, idx) => {
|
|
839
|
+
console.log(`Result ${idx + 1}:`);
|
|
840
|
+
console.log(`Document: ${doc}`);
|
|
841
|
+
console.log(`Metadata: ${JSON.stringify(results.metadatas[0][idx])}`);
|
|
842
|
+
console.log(`Distance: ${results.distances![0][idx]}`);
|
|
843
|
+
console.log("---");
|
|
844
|
+
});
|
|
845
|
+
}
|
|
846
|
+
|
|
847
|
+
semanticSearch();
|
|
848
|
+
```
|
|
849
|
+
|
|
850
|
+
---
|
|
851
|
+
|
|
852
|
+
## Complete Example: RAG (Retrieval-Augmented Generation)
|
|
853
|
+
|
|
854
|
+
```typescript
|
|
855
|
+
import { ChromaClient } from "chromadb";
|
|
856
|
+
import OpenAI from "openai";
|
|
857
|
+
|
|
858
|
+
async function ragExample() {
|
|
859
|
+
// Initialize ChromaDB
|
|
860
|
+
const chroma = new ChromaClient();
|
|
861
|
+
const collection = await chroma.getOrCreateCollection({
|
|
862
|
+
name: "company_docs",
|
|
863
|
+
metadata: { "hnsw:space": "cosine" }
|
|
864
|
+
});
|
|
865
|
+
|
|
866
|
+
// Add company knowledge base
|
|
867
|
+
await collection.add({
|
|
868
|
+
ids: ["policy1", "policy2", "policy3"],
|
|
869
|
+
documents: [
|
|
870
|
+
"Our company offers 20 days of paid vacation per year",
|
|
871
|
+
"Remote work is available 3 days per week",
|
|
872
|
+
"Health insurance includes dental and vision coverage"
|
|
873
|
+
],
|
|
874
|
+
metadatas: [
|
|
875
|
+
{ type: "policy", category: "time-off" },
|
|
876
|
+
{ type: "policy", category: "work-arrangement" },
|
|
877
|
+
{ type: "policy", category: "benefits" }
|
|
878
|
+
]
|
|
879
|
+
});
|
|
880
|
+
|
|
881
|
+
// User question
|
|
882
|
+
const question = "How many vacation days do I get?";
|
|
883
|
+
|
|
884
|
+
// Retrieve relevant context
|
|
885
|
+
const searchResults = await collection.query({
|
|
886
|
+
queryTexts: [question],
|
|
887
|
+
nResults: 2
|
|
888
|
+
});
|
|
889
|
+
|
|
890
|
+
const context = searchResults.documents[0].join("\n");
|
|
891
|
+
|
|
892
|
+
// Generate answer with OpenAI
|
|
893
|
+
const openai = new OpenAI({
|
|
894
|
+
apiKey: process.env.OPENAI_API_KEY
|
|
895
|
+
});
|
|
896
|
+
|
|
897
|
+
const completion = await openai.chat.completions.create({
|
|
898
|
+
model: "gpt-4",
|
|
899
|
+
messages: [
|
|
900
|
+
{
|
|
901
|
+
role: "system",
|
|
902
|
+
content: "Answer the question based on the context provided."
|
|
903
|
+
},
|
|
904
|
+
{
|
|
905
|
+
role: "user",
|
|
906
|
+
content: `Context:\n${context}\n\nQuestion: ${question}`
|
|
907
|
+
}
|
|
908
|
+
]
|
|
909
|
+
});
|
|
910
|
+
|
|
911
|
+
console.log("Answer:", completion.choices[0].message.content);
|
|
912
|
+
}
|
|
913
|
+
|
|
914
|
+
ragExample();
|
|
915
|
+
```
|
|
916
|
+
|
|
917
|
+
---
|
|
918
|
+
|
|
919
|
+
## Environment Variables
|
|
920
|
+
|
|
921
|
+
### .env File Setup
|
|
922
|
+
|
|
923
|
+
```bash
|
|
924
|
+
# ChromaDB Server
|
|
925
|
+
CHROMA_HOST=http://localhost:8000
|
|
926
|
+
|
|
927
|
+
# Authentication (if required)
|
|
928
|
+
CHROMA_AUTH_TOKEN=your-auth-token
|
|
929
|
+
|
|
930
|
+
# Embedding API Keys
|
|
931
|
+
OPENAI_API_KEY=sk-...
|
|
932
|
+
COHERE_API_KEY=...
|
|
933
|
+
HF_API_KEY=...
|
|
934
|
+
```
|
|
935
|
+
|
|
936
|
+
### Using Environment Variables
|
|
937
|
+
|
|
938
|
+
```typescript
|
|
939
|
+
import { ChromaClient } from "chromadb";
|
|
940
|
+
import * as dotenv from "dotenv";
|
|
941
|
+
|
|
942
|
+
dotenv.config();
|
|
943
|
+
|
|
944
|
+
const client = new ChromaClient({
|
|
945
|
+
path: process.env.CHROMA_HOST || "http://localhost:8000",
|
|
946
|
+
auth: process.env.CHROMA_AUTH_TOKEN ? {
|
|
947
|
+
provider: "token",
|
|
948
|
+
credentials: process.env.CHROMA_AUTH_TOKEN
|
|
949
|
+
} : undefined
|
|
950
|
+
});
|
|
951
|
+
```
|
|
952
|
+
|
|
953
|
+
---
|
|
954
|
+
|
|
955
|
+
## TypeScript Types
|
|
956
|
+
|
|
957
|
+
### Query Results Type
|
|
958
|
+
|
|
959
|
+
```typescript
|
|
960
|
+
interface QueryResults {
|
|
961
|
+
ids: string[][];
|
|
962
|
+
embeddings?: number[][][];
|
|
963
|
+
documents: (string | null)[][];
|
|
964
|
+
metadatas: (Record<string, any> | null)[][];
|
|
965
|
+
distances?: number[][];
|
|
966
|
+
}
|
|
967
|
+
```
|
|
968
|
+
|
|
969
|
+
### Get Results Type
|
|
970
|
+
|
|
971
|
+
```typescript
|
|
972
|
+
interface GetResults {
|
|
973
|
+
ids: string[];
|
|
974
|
+
embeddings?: number[][];
|
|
975
|
+
documents: (string | null)[];
|
|
976
|
+
metadatas: (Record<string, any> | null)[];
|
|
977
|
+
}
|
|
978
|
+
```
|
|
979
|
+
|
|
980
|
+
### Collection Metadata
|
|
981
|
+
|
|
982
|
+
```typescript
|
|
983
|
+
interface CollectionMetadata {
|
|
984
|
+
[key: string]: string | number | boolean;
|
|
985
|
+
"hnsw:space"?: "cosine" | "l2" | "ip";
|
|
986
|
+
}
|
|
987
|
+
```
|
|
988
|
+
|
|
989
|
+
---
|
|
990
|
+
|
|
991
|
+
## Error Handling
|
|
992
|
+
|
|
993
|
+
### Handle Collection Not Found
|
|
994
|
+
|
|
995
|
+
```typescript
|
|
996
|
+
try {
|
|
997
|
+
const collection = await client.getCollection({
|
|
998
|
+
name: "nonexistent_collection"
|
|
999
|
+
});
|
|
1000
|
+
} catch (error) {
|
|
1001
|
+
if (error instanceof Error && error.message.includes("does not exist")) {
|
|
1002
|
+
console.error("Collection not found");
|
|
1003
|
+
// Create the collection
|
|
1004
|
+
const collection = await client.createCollection({
|
|
1005
|
+
name: "nonexistent_collection"
|
|
1006
|
+
});
|
|
1007
|
+
}
|
|
1008
|
+
}
|
|
1009
|
+
```
|
|
1010
|
+
|
|
1011
|
+
### Handle Duplicate IDs
|
|
1012
|
+
|
|
1013
|
+
```typescript
|
|
1014
|
+
try {
|
|
1015
|
+
await collection.add({
|
|
1016
|
+
ids: ["id1"],
|
|
1017
|
+
documents: ["Document"]
|
|
1018
|
+
});
|
|
1019
|
+
|
|
1020
|
+
// This will fail - ID already exists
|
|
1021
|
+
await collection.add({
|
|
1022
|
+
ids: ["id1"],
|
|
1023
|
+
documents: ["Another document"]
|
|
1024
|
+
});
|
|
1025
|
+
} catch (error) {
|
|
1026
|
+
console.error("ID already exists. Use update() or upsert() instead.");
|
|
1027
|
+
|
|
1028
|
+
// Use upsert to add or update
|
|
1029
|
+
await collection.upsert({
|
|
1030
|
+
ids: ["id1"],
|
|
1031
|
+
documents: ["Another document"]
|
|
1032
|
+
});
|
|
1033
|
+
}
|
|
1034
|
+
```
|
|
1035
|
+
|
|
1036
|
+
### Handle Connection Errors
|
|
1037
|
+
|
|
1038
|
+
```typescript
|
|
1039
|
+
try {
|
|
1040
|
+
const client = new ChromaClient({
|
|
1041
|
+
path: "http://localhost:8000"
|
|
1042
|
+
});
|
|
1043
|
+
|
|
1044
|
+
const collections = await client.listCollections();
|
|
1045
|
+
} catch (error) {
|
|
1046
|
+
if (error instanceof Error && error.message.includes("ECONNREFUSED")) {
|
|
1047
|
+
console.error("Cannot connect to ChromaDB server. Make sure it's running.");
|
|
1048
|
+
console.error("Start server with: chroma run --path ./data");
|
|
1049
|
+
} else {
|
|
1050
|
+
console.error("Error:", error);
|
|
1051
|
+
}
|
|
1052
|
+
}
|
|
1053
|
+
```
|
|
1054
|
+
|
|
1055
|
+
---
|
|
1056
|
+
|
|
1057
|
+
## Performance Optimization
|
|
1058
|
+
|
|
1059
|
+
### Batch Operations
|
|
1060
|
+
|
|
1061
|
+
```typescript
|
|
1062
|
+
// Batch add for large datasets
|
|
1063
|
+
const chunkSize = 5000;
|
|
1064
|
+
for (let i = 0; i < documents.length; i += chunkSize) {
|
|
1065
|
+
await collection.add({
|
|
1066
|
+
ids: ids.slice(i, i + chunkSize),
|
|
1067
|
+
documents: documents.slice(i, i + chunkSize),
|
|
1068
|
+
metadatas: metadatas.slice(i, i + chunkSize)
|
|
1069
|
+
});
|
|
1070
|
+
}
|
|
1071
|
+
```
|
|
1072
|
+
|
|
1073
|
+
### Parallel Queries
|
|
1074
|
+
|
|
1075
|
+
```typescript
|
|
1076
|
+
// Run multiple queries in parallel
|
|
1077
|
+
const queries = [
|
|
1078
|
+
"Query about topic A",
|
|
1079
|
+
"Query about topic B",
|
|
1080
|
+
"Query about topic C"
|
|
1081
|
+
];
|
|
1082
|
+
|
|
1083
|
+
const results = await Promise.all(
|
|
1084
|
+
queries.map(query =>
|
|
1085
|
+
collection.query({
|
|
1086
|
+
queryTexts: [query],
|
|
1087
|
+
nResults: 5
|
|
1088
|
+
})
|
|
1089
|
+
)
|
|
1090
|
+
);
|
|
1091
|
+
```
|
|
1092
|
+
|
|
1093
|
+
### Limit Included Fields
|
|
1094
|
+
|
|
1095
|
+
```typescript
|
|
1096
|
+
// Exclude embeddings for better performance
|
|
1097
|
+
const results = await collection.query({
|
|
1098
|
+
queryTexts: ["Search query"],
|
|
1099
|
+
nResults: 10,
|
|
1100
|
+
include: ["documents", "metadatas", "distances"]
|
|
1101
|
+
// Don't include embeddings unless needed
|
|
1102
|
+
});
|
|
1103
|
+
```
|
|
1104
|
+
|
|
1105
|
+
---
|
|
1106
|
+
|
|
1107
|
+
## Common Patterns
|
|
1108
|
+
|
|
1109
|
+
### Incremental Updates
|
|
1110
|
+
|
|
1111
|
+
```typescript
|
|
1112
|
+
// Add new documents daily
|
|
1113
|
+
async function addDailyDocuments(newDocs: string[]) {
|
|
1114
|
+
const count = await collection.count();
|
|
1115
|
+
const newIds = newDocs.map((_, idx) => `doc${count + idx}`);
|
|
1116
|
+
|
|
1117
|
+
await collection.add({
|
|
1118
|
+
ids: newIds,
|
|
1119
|
+
documents: newDocs,
|
|
1120
|
+
metadatas: newDocs.map(() => ({
|
|
1121
|
+
added_date: new Date().toISOString()
|
|
1122
|
+
}))
|
|
1123
|
+
});
|
|
1124
|
+
}
|
|
1125
|
+
```
|
|
1126
|
+
|
|
1127
|
+
### Search with Fallback
|
|
1128
|
+
|
|
1129
|
+
```typescript
|
|
1130
|
+
async function searchWithFallback(query: string) {
|
|
1131
|
+
// Try specific category first
|
|
1132
|
+
let results = await collection.query({
|
|
1133
|
+
queryTexts: [query],
|
|
1134
|
+
nResults: 5,
|
|
1135
|
+
where: { category: "premium" }
|
|
1136
|
+
});
|
|
1137
|
+
|
|
1138
|
+
// If no results, search all documents
|
|
1139
|
+
if (results.ids[0].length === 0) {
|
|
1140
|
+
results = await collection.query({
|
|
1141
|
+
queryTexts: [query],
|
|
1142
|
+
nResults: 5
|
|
1143
|
+
});
|
|
1144
|
+
}
|
|
1145
|
+
|
|
1146
|
+
return results;
|
|
1147
|
+
}
|
|
1148
|
+
```
|
|
1149
|
+
|
|
1150
|
+
### Deduplicate Documents
|
|
1151
|
+
|
|
1152
|
+
```typescript
|
|
1153
|
+
async function addUnique(id: string, document: string, metadata: Record<string, any>) {
|
|
1154
|
+
try {
|
|
1155
|
+
const existing = await collection.get({ ids: [id] });
|
|
1156
|
+
if (existing.ids.length > 0) {
|
|
1157
|
+
console.log("Document already exists, updating...");
|
|
1158
|
+
await collection.update({ ids: [id], documents: [document], metadatas: [metadata] });
|
|
1159
|
+
} else {
|
|
1160
|
+
await collection.add({ ids: [id], documents: [document], metadatas: [metadata] });
|
|
1161
|
+
}
|
|
1162
|
+
} catch (error) {
|
|
1163
|
+
await collection.add({ ids: [id], documents: [document], metadatas: [metadata] });
|
|
1164
|
+
}
|
|
1165
|
+
}
|
|
1166
|
+
```
|
|
1167
|
+
|
|
1168
|
+
---
|
|
1169
|
+
|
|
1170
|
+
## Metadata Filter Operators
|
|
1171
|
+
|
|
1172
|
+
### Comparison Operators
|
|
1173
|
+
|
|
1174
|
+
```typescript
|
|
1175
|
+
// Equal
|
|
1176
|
+
where: { category: "tech" }
|
|
1177
|
+
where: { category: { $eq: "tech" } }
|
|
1178
|
+
|
|
1179
|
+
// Not equal
|
|
1180
|
+
where: { category: { $ne: "tech" } }
|
|
1181
|
+
|
|
1182
|
+
// Greater than
|
|
1183
|
+
where: { price: { $gt: 100 } }
|
|
1184
|
+
|
|
1185
|
+
// Greater than or equal
|
|
1186
|
+
where: { price: { $gte: 100 } }
|
|
1187
|
+
|
|
1188
|
+
// Less than
|
|
1189
|
+
where: { price: { $lt: 100 } }
|
|
1190
|
+
|
|
1191
|
+
// Less than or equal
|
|
1192
|
+
where: { price: { $lte: 100 } }
|
|
1193
|
+
```
|
|
1194
|
+
|
|
1195
|
+
### Logical Operators
|
|
1196
|
+
|
|
1197
|
+
```typescript
|
|
1198
|
+
// AND
|
|
1199
|
+
where: {
|
|
1200
|
+
$and: [
|
|
1201
|
+
{ category: "tech" },
|
|
1202
|
+
{ price: { $lt: 1000 } }
|
|
1203
|
+
]
|
|
1204
|
+
}
|
|
1205
|
+
|
|
1206
|
+
// OR
|
|
1207
|
+
where: {
|
|
1208
|
+
$or: [
|
|
1209
|
+
{ category: "tech" },
|
|
1210
|
+
{ category: "science" }
|
|
1211
|
+
]
|
|
1212
|
+
}
|
|
1213
|
+
|
|
1214
|
+
// NOT
|
|
1215
|
+
where: {
|
|
1216
|
+
$not: { category: "archived" }
|
|
1217
|
+
}
|
|
1218
|
+
```
|
|
1219
|
+
|
|
1220
|
+
### Set Operators
|
|
1221
|
+
|
|
1222
|
+
```typescript
|
|
1223
|
+
// In array
|
|
1224
|
+
where: {
|
|
1225
|
+
category: { $in: ["tech", "science", "health"] }
|
|
1226
|
+
}
|
|
1227
|
+
|
|
1228
|
+
// Not in array
|
|
1229
|
+
where: {
|
|
1230
|
+
category: { $nin: ["archived", "deleted"] }
|
|
1231
|
+
}
|
|
1232
|
+
```
|
|
1233
|
+
|
|
1234
|
+
---
|
|
1235
|
+
|
|
1236
|
+
## Document Filter Operators
|
|
1237
|
+
|
|
1238
|
+
### Contains
|
|
1239
|
+
|
|
1240
|
+
```typescript
|
|
1241
|
+
whereDocument: {
|
|
1242
|
+
$contains: "machine learning"
|
|
1243
|
+
}
|
|
1244
|
+
```
|
|
1245
|
+
|
|
1246
|
+
### Not Contains
|
|
1247
|
+
|
|
1248
|
+
```typescript
|
|
1249
|
+
whereDocument: {
|
|
1250
|
+
$not_contains: "deprecated"
|
|
1251
|
+
}
|
|
1252
|
+
```
|
|
1253
|
+
|
|
1254
|
+
### Combined with Metadata
|
|
1255
|
+
|
|
1256
|
+
```typescript
|
|
1257
|
+
const results = await collection.query({
|
|
1258
|
+
queryTexts: ["AI research"],
|
|
1259
|
+
where: { category: "research" },
|
|
1260
|
+
whereDocument: { $contains: "neural network" },
|
|
1261
|
+
nResults: 10
|
|
1262
|
+
});
|
|
1263
|
+
```
|