llama-index-vector-stores-chroma 0.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llama_index/py.typed ADDED
File without changes
@@ -0,0 +1,3 @@
1
+ from llama_index.vector_stores.chroma.base import ChromaVectorStore
2
+
3
+ __all__ = ["ChromaVectorStore"]
@@ -0,0 +1,708 @@
1
+ """Chroma vector store."""
2
+
3
+ import logging
4
+ import math
5
+ from typing import Any, Dict, Generator, List, Optional, Union, cast
6
+
7
+ import chromadb
8
+ from chromadb.api.models.Collection import Collection
9
+ from llama_index.core.bridge.pydantic import Field, PrivateAttr
10
+ from llama_index.core.indices.query.embedding_utils import get_top_k_mmr_embeddings
11
+ from llama_index.core.schema import BaseNode, MetadataMode, TextNode
12
+ from llama_index.core.utils import truncate_text
13
+ from llama_index.core.vector_stores.types import (
14
+ BasePydanticVectorStore,
15
+ MetadataFilters,
16
+ VectorStoreQuery,
17
+ VectorStoreQueryMode,
18
+ VectorStoreQueryResult,
19
+ )
20
+ from llama_index.core.vector_stores.utils import (
21
+ legacy_metadata_dict_to_node,
22
+ metadata_dict_to_node,
23
+ node_to_metadata_dict,
24
+ )
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+ # MMR constants
29
+ DEFAULT_MMR_PREFETCH_FACTOR = 4.0
30
+
31
+
32
+ def _transform_chroma_filter_condition(condition: str) -> str:
33
+ """Translate standard metadata filter op to Chroma specific spec."""
34
+ if condition == "and":
35
+ return "$and"
36
+ elif condition == "or":
37
+ return "$or"
38
+ else:
39
+ raise ValueError(f"Filter condition {condition} not supported")
40
+
41
+
42
+ def _transform_chroma_filter_operator(operator: str) -> str:
43
+ """Translate standard metadata filter operator to Chroma specific spec."""
44
+ if operator == "!=":
45
+ return "$ne"
46
+ elif operator == "==":
47
+ return "$eq"
48
+ elif operator == ">":
49
+ return "$gt"
50
+ elif operator == "<":
51
+ return "$lt"
52
+ elif operator == ">=":
53
+ return "$gte"
54
+ elif operator == "<=":
55
+ return "$lte"
56
+ elif operator == "in":
57
+ return "$in"
58
+ elif operator == "nin":
59
+ return "$nin"
60
+ else:
61
+ raise ValueError(f"Filter operator {operator} not supported")
62
+
63
+
64
+ def _to_chroma_filter(
65
+ standard_filters: MetadataFilters,
66
+ ) -> dict:
67
+ """Translate standard metadata filters to Chroma specific spec."""
68
+ filters = {}
69
+ filters_list = []
70
+ condition = standard_filters.condition or "and"
71
+ condition = _transform_chroma_filter_condition(condition)
72
+ if standard_filters.filters:
73
+ for filter in standard_filters.filters:
74
+ if isinstance(filter, MetadataFilters):
75
+ filters_list.append(_to_chroma_filter(filter))
76
+ elif filter.operator:
77
+ filters_list.append(
78
+ {
79
+ filter.key: {
80
+ _transform_chroma_filter_operator(
81
+ filter.operator
82
+ ): filter.value
83
+ }
84
+ }
85
+ )
86
+ else:
87
+ filters_list.append({filter.key: filter.value})
88
+
89
+ if len(filters_list) == 1:
90
+ # If there is only one filter, return it directly
91
+ return filters_list[0]
92
+ elif len(filters_list) > 1:
93
+ filters[condition] = filters_list
94
+ return filters
95
+
96
+
97
+ import_err_msg = "`chromadb` package not found, please run `pip install chromadb`"
98
+
99
+ MAX_CHUNK_SIZE = 41665 # One less than the max chunk size for ChromaDB
100
+
101
+
102
+ def chunk_list(
103
+ lst: List[BaseNode], max_chunk_size: int
104
+ ) -> Generator[List[BaseNode], None, None]:
105
+ """
106
+ Yield successive max_chunk_size-sized chunks from lst.
107
+
108
+ Args:
109
+ lst (List[BaseNode]): list of nodes with embeddings
110
+ max_chunk_size (int): max chunk size
111
+
112
+ Yields:
113
+ Generator[List[BaseNode], None, None]: list of nodes with embeddings
114
+
115
+ """
116
+ for i in range(0, len(lst), max_chunk_size):
117
+ yield lst[i : i + max_chunk_size]
118
+
119
+
120
+ class ChromaVectorStore(BasePydanticVectorStore):
121
+ """
122
+ Chroma vector store.
123
+
124
+ In this vector store, embeddings are stored within a ChromaDB collection.
125
+
126
+ During query time, the index uses ChromaDB to query for the top
127
+ k most similar nodes.
128
+
129
+ Supports MMR (Maximum Marginal Relevance) search mode for improved diversity
130
+ in search results.
131
+
132
+ Args:
133
+ chroma_collection (chromadb.api.models.Collection.Collection):
134
+ ChromaDB collection instance
135
+
136
+ Examples:
137
+ `uv add llama-index-vector-stores-chroma`
138
+
139
+ ```python
140
+ import chromadb
141
+ from llama_index.vector_stores.chroma import ChromaVectorStore
142
+
143
+ # Create a Chroma client and collection
144
+ chroma_client = chromadb.EphemeralClient()
145
+ chroma_collection = chroma_client.create_collection("example_collection")
146
+
147
+ # Set up the ChromaVectorStore and StorageContext
148
+ vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
149
+
150
+ # Use MMR mode with threshold
151
+ query_engine = index.as_query_engine(
152
+ vector_store_query_mode="mmr",
153
+ vector_store_kwargs={"mmr_threshold": 0.5}
154
+ )
155
+ ```
156
+
157
+ """
158
+
159
+ stores_text: bool = True
160
+ flat_metadata: bool = True
161
+
162
+ collection_name: Optional[str]
163
+ host: Optional[str]
164
+ port: Optional[Union[str, int]]
165
+ ssl: bool
166
+ headers: Optional[Dict[str, str]]
167
+ persist_dir: Optional[str]
168
+ collection_kwargs: Dict[str, Any] = Field(default_factory=dict)
169
+
170
+ _collection: Collection = PrivateAttr()
171
+
172
+ def __init__(
173
+ self,
174
+ chroma_collection: Optional[Any] = None,
175
+ collection_name: Optional[str] = None,
176
+ host: Optional[str] = None,
177
+ port: Optional[Union[str, int]] = None,
178
+ ssl: bool = False,
179
+ headers: Optional[Dict[str, str]] = None,
180
+ persist_dir: Optional[str] = None,
181
+ collection_kwargs: Optional[dict] = None,
182
+ **kwargs: Any,
183
+ ) -> None:
184
+ """Init params."""
185
+ collection_kwargs = collection_kwargs or {}
186
+
187
+ super().__init__(
188
+ host=host,
189
+ port=port,
190
+ ssl=ssl,
191
+ headers=headers,
192
+ collection_name=collection_name,
193
+ persist_dir=persist_dir,
194
+ collection_kwargs=collection_kwargs or {},
195
+ )
196
+ if chroma_collection is None:
197
+ client = chromadb.HttpClient(host=host, port=port, ssl=ssl, headers=headers)
198
+ self._collection = client.get_or_create_collection(
199
+ name=collection_name, **collection_kwargs
200
+ )
201
+ else:
202
+ self._collection = cast(Collection, chroma_collection)
203
+
204
+ @classmethod
205
+ def from_collection(cls, collection: Any) -> "ChromaVectorStore":
206
+ try:
207
+ from chromadb import Collection
208
+ except ImportError:
209
+ raise ImportError(import_err_msg)
210
+
211
+ if not isinstance(collection, Collection):
212
+ raise Exception("argument is not chromadb collection instance")
213
+
214
+ return cls(chroma_collection=collection)
215
+
216
+ @classmethod
217
+ def from_params(
218
+ cls,
219
+ collection_name: str,
220
+ host: Optional[str] = None,
221
+ port: Optional[Union[str, int]] = None,
222
+ ssl: bool = False,
223
+ headers: Optional[Dict[str, str]] = None,
224
+ persist_dir: Optional[str] = None,
225
+ collection_kwargs: dict = {},
226
+ **kwargs: Any,
227
+ ) -> "ChromaVectorStore":
228
+ if persist_dir:
229
+ client = chromadb.PersistentClient(path=persist_dir)
230
+ collection = client.get_or_create_collection(
231
+ name=collection_name, **collection_kwargs
232
+ )
233
+ elif host and port:
234
+ client = chromadb.HttpClient(host=host, port=port, ssl=ssl, headers=headers)
235
+ collection = client.get_or_create_collection(
236
+ name=collection_name, **collection_kwargs
237
+ )
238
+ else:
239
+ raise ValueError(
240
+ "Either `persist_dir` or (`host`,`port`) must be specified"
241
+ )
242
+ return cls(
243
+ chroma_collection=collection,
244
+ host=host,
245
+ port=port,
246
+ ssl=ssl,
247
+ headers=headers,
248
+ persist_dir=persist_dir,
249
+ collection_kwargs=collection_kwargs,
250
+ **kwargs,
251
+ )
252
+
253
+ @classmethod
254
+ def class_name(cls) -> str:
255
+ return "ChromaVectorStore"
256
+
257
+ def get_nodes(
258
+ self,
259
+ node_ids: Optional[List[str]],
260
+ filters: Optional[List[MetadataFilters]] = None,
261
+ ) -> List[BaseNode]:
262
+ """
263
+ Get nodes from index.
264
+
265
+ Args:
266
+ node_ids (List[str]): list of node ids
267
+ filters (List[MetadataFilters]): list of metadata filters
268
+
269
+ """
270
+ if not self._collection:
271
+ raise ValueError("Collection not initialized")
272
+
273
+ node_ids = node_ids or None
274
+
275
+ if filters:
276
+ where = _to_chroma_filter(filters)
277
+ else:
278
+ where = None
279
+
280
+ result = self._get(None, where=where, ids=node_ids)
281
+
282
+ return result.nodes
283
+
284
+ def add(self, nodes: List[BaseNode], **add_kwargs: Any) -> List[str]:
285
+ """
286
+ Add nodes to index.
287
+
288
+ Args:
289
+ nodes: List[BaseNode]: list of nodes with embeddings
290
+
291
+ """
292
+ if not self._collection:
293
+ raise ValueError("Collection not initialized")
294
+
295
+ max_chunk_size = MAX_CHUNK_SIZE
296
+ node_chunks = chunk_list(nodes, max_chunk_size)
297
+
298
+ all_ids = []
299
+ for node_chunk in node_chunks:
300
+ embeddings = []
301
+ metadatas = []
302
+ ids = []
303
+ documents = []
304
+ for node in node_chunk:
305
+ embeddings.append(node.get_embedding())
306
+ metadata_dict = node_to_metadata_dict(
307
+ node, remove_text=True, flat_metadata=self.flat_metadata
308
+ )
309
+ for key in metadata_dict:
310
+ if metadata_dict[key] is None:
311
+ metadata_dict[key] = ""
312
+ metadatas.append(metadata_dict)
313
+ ids.append(node.node_id)
314
+ documents.append(node.get_content(metadata_mode=MetadataMode.NONE))
315
+
316
+ self._collection.add(
317
+ embeddings=embeddings,
318
+ ids=ids,
319
+ metadatas=metadatas,
320
+ documents=documents,
321
+ )
322
+ all_ids.extend(ids)
323
+
324
+ return all_ids
325
+
326
+ def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
327
+ """
328
+ Delete nodes using with ref_doc_id.
329
+
330
+ Args:
331
+ ref_doc_id (str): The doc_id of the document to delete.
332
+
333
+ """
334
+ self._collection.delete(where={"document_id": ref_doc_id})
335
+
336
+ def delete_nodes(
337
+ self,
338
+ node_ids: Optional[List[str]] = None,
339
+ filters: Optional[List[MetadataFilters]] = None,
340
+ ) -> None:
341
+ """
342
+ Delete nodes from index.
343
+
344
+ Args:
345
+ node_ids (List[str]): list of node ids
346
+ filters (List[MetadataFilters]): list of metadata filters
347
+
348
+ """
349
+ if not self._collection:
350
+ raise ValueError("Collection not initialized")
351
+
352
+ node_ids = node_ids or []
353
+
354
+ if filters:
355
+ where = _to_chroma_filter(filters)
356
+ self._collection.delete(ids=node_ids, where=where)
357
+
358
+ else:
359
+ self._collection.delete(ids=node_ids)
360
+
361
+ def clear(self) -> None:
362
+ """Clear the collection."""
363
+ ids = self._collection.get()["ids"]
364
+ self._collection.delete(ids=ids)
365
+
366
+ @property
367
+ def client(self) -> Any:
368
+ """Return client."""
369
+ return self._collection
370
+
371
+ def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
372
+ """
373
+ Query index for top k most similar nodes.
374
+
375
+ Args:
376
+ query (VectorStoreQuery): Query object containing:
377
+ - query_embedding (List[float]): query embedding
378
+ - similarity_top_k (int): top k most similar nodes
379
+ - filters (Optional[MetadataFilters]): metadata filters to apply
380
+ - mode (VectorStoreQueryMode): query mode (default or MMR)
381
+ **kwargs: Additional keyword arguments passed to ChromaDB query method.
382
+ For MMR mode, supports:
383
+ - mmr_threshold (Optional[float]): MMR threshold between 0 and 1
384
+ - mmr_prefetch_factor (Optional[float]): Factor to multiply similarity_top_k
385
+ for prefetching candidates (default: 4.0)
386
+ - mmr_prefetch_k (Optional[int]): Explicit number of candidates to prefetch
387
+ (cannot be used with mmr_prefetch_factor)
388
+ For ChromaDB-specific parameters:
389
+ - where (dict): ChromaDB where clause (use query.filters instead for standard filtering)
390
+ - include (List[str]): ChromaDB include parameter
391
+ - where_document (dict): ChromaDB where_document parameter
392
+
393
+ Returns:
394
+ VectorStoreQueryResult: Query result containing matched nodes, similarities, and IDs.
395
+
396
+ Raises:
397
+ ValueError: If MMR parameters are invalid or if both query.filters and
398
+ where kwargs are specified.
399
+
400
+ """
401
+ if query.filters is not None:
402
+ if "where" in kwargs:
403
+ raise ValueError(
404
+ "Cannot specify metadata filters via both query and kwargs. "
405
+ "Use kwargs only for chroma specific items that are "
406
+ "not supported via the generic query interface."
407
+ )
408
+ where = _to_chroma_filter(query.filters)
409
+ else:
410
+ where = kwargs.pop("where", None)
411
+
412
+ if not query.query_embedding:
413
+ return self._get(limit=query.similarity_top_k, where=where, **kwargs)
414
+
415
+ # Handle MMR mode
416
+ if query.mode == VectorStoreQueryMode.MMR:
417
+ return self._mmr_search(query, where, **kwargs)
418
+
419
+ return self._query(
420
+ query_embeddings=query.query_embedding,
421
+ n_results=query.similarity_top_k,
422
+ where=where,
423
+ **kwargs,
424
+ )
425
+
426
+ def _query(
427
+ self, query_embeddings: List["float"], n_results: int, where: dict, **kwargs
428
+ ) -> VectorStoreQueryResult:
429
+ if where:
430
+ results = self._collection.query(
431
+ query_embeddings=query_embeddings,
432
+ n_results=n_results,
433
+ where=where,
434
+ **kwargs,
435
+ )
436
+ else:
437
+ results = self._collection.query(
438
+ query_embeddings=query_embeddings,
439
+ n_results=n_results,
440
+ **kwargs,
441
+ )
442
+
443
+ logger.debug(f"> Top {len(results['documents'][0])} nodes:")
444
+ nodes = []
445
+ similarities = []
446
+ ids = []
447
+ for node_id, text, metadata, distance in zip(
448
+ results["ids"][0],
449
+ results["documents"][0],
450
+ results["metadatas"][0],
451
+ results["distances"][0],
452
+ ):
453
+ try:
454
+ node = metadata_dict_to_node(metadata, text=text)
455
+ except Exception:
456
+ # NOTE: deprecated legacy logic for backward compatibility
457
+ metadata, node_info, relationships = legacy_metadata_dict_to_node(
458
+ metadata
459
+ )
460
+
461
+ node = TextNode(
462
+ text=text or "",
463
+ id_=node_id,
464
+ metadata=metadata,
465
+ start_char_idx=node_info.get("start", None),
466
+ end_char_idx=node_info.get("end", None),
467
+ relationships=relationships,
468
+ )
469
+
470
+ nodes.append(node)
471
+
472
+ similarity_score = math.exp(-distance)
473
+ similarities.append(similarity_score)
474
+
475
+ logger.debug(
476
+ f"> [Node {node_id}] [Similarity score: {similarity_score}] "
477
+ f"{truncate_text(str(text), 100)}"
478
+ )
479
+ ids.append(node_id)
480
+
481
+ return VectorStoreQueryResult(nodes=nodes, similarities=similarities, ids=ids)
482
+
483
+ def _mmr_search(
484
+ self, query: VectorStoreQuery, where: dict, **kwargs
485
+ ) -> VectorStoreQueryResult:
486
+ """
487
+ Perform MMR search using ChromaDB.
488
+
489
+ Args:
490
+ query: VectorStoreQuery object containing the query parameters
491
+ where: ChromaDB filter conditions
492
+ **kwargs: Additional keyword arguments including mmr_threshold
493
+
494
+ Returns:
495
+ VectorStoreQueryResult: Query result with MMR-applied nodes
496
+
497
+ """
498
+ # Extract MMR parameters
499
+ mmr_threshold = kwargs.get("mmr_threshold")
500
+
501
+ # Validate MMR parameters
502
+ if mmr_threshold is not None and (
503
+ not isinstance(mmr_threshold, (int, float))
504
+ or mmr_threshold < 0
505
+ or mmr_threshold > 1
506
+ ):
507
+ raise ValueError("mmr_threshold must be a float between 0 and 1")
508
+
509
+ # Validate prefetch parameters (check before popping)
510
+ raw_prefetch_factor = kwargs.get("mmr_prefetch_factor")
511
+ raw_prefetch_k = kwargs.get("mmr_prefetch_k")
512
+ if raw_prefetch_factor is not None and raw_prefetch_k is not None:
513
+ raise ValueError(
514
+ "'mmr_prefetch_factor' and 'mmr_prefetch_k' "
515
+ "cannot coexist in a call to query()"
516
+ )
517
+
518
+ # Strip MMR-only kwargs so they aren't forwarded to Chroma
519
+ mmr_threshold = kwargs.pop("mmr_threshold", None)
520
+ prefetch_k_override = kwargs.pop("mmr_prefetch_k", None)
521
+ prefetch_factor = kwargs.pop("mmr_prefetch_factor", DEFAULT_MMR_PREFETCH_FACTOR)
522
+
523
+ # Calculate prefetch size (get more candidates than needed for MMR)
524
+ if prefetch_k_override is not None:
525
+ prefetch_k = int(prefetch_k_override)
526
+ else:
527
+ prefetch_k = int(query.similarity_top_k * prefetch_factor)
528
+
529
+ # Ensure prefetch_k is at least as large as similarity_top_k
530
+ prefetch_k = max(prefetch_k, query.similarity_top_k)
531
+
532
+ logger.debug(
533
+ f"MMR search: prefetching {prefetch_k} candidates for {query.similarity_top_k} final results"
534
+ )
535
+
536
+ # Query ChromaDB for more candidates than needed (kwargs now safe)
537
+ if where:
538
+ prefetch_results = self._collection.query(
539
+ query_embeddings=query.query_embedding,
540
+ n_results=prefetch_k,
541
+ where=where,
542
+ include=["embeddings", "documents", "metadatas", "distances"],
543
+ **kwargs,
544
+ )
545
+ else:
546
+ prefetch_results = self._collection.query(
547
+ query_embeddings=query.query_embedding,
548
+ n_results=prefetch_k,
549
+ include=["embeddings", "documents", "metadatas", "distances"],
550
+ **kwargs,
551
+ )
552
+
553
+ # Extract embeddings and metadata for MMR processing
554
+ prefetch_embeddings = []
555
+ prefetch_ids = []
556
+ prefetch_metadata = []
557
+ prefetch_documents = []
558
+ prefetch_distances = []
559
+
560
+ # Process prefetch results
561
+ for i in range(len(prefetch_results["ids"][0])):
562
+ node_id = prefetch_results["ids"][0][i]
563
+ text = prefetch_results["documents"][0][i]
564
+ metadata = prefetch_results["metadatas"][0][i]
565
+ distance = prefetch_results["distances"][0][i]
566
+
567
+ # Get the actual embedding from ChromaDB results
568
+ if "embeddings" in prefetch_results and prefetch_results["embeddings"]:
569
+ embedding = prefetch_results["embeddings"][0][i]
570
+ else:
571
+ # Fallback: if embeddings not available, we'll use distance-based approach
572
+ embedding = None
573
+
574
+ # Store for MMR processing
575
+ prefetch_embeddings.append(embedding)
576
+ prefetch_ids.append(node_id)
577
+ prefetch_metadata.append(metadata)
578
+ prefetch_documents.append(text)
579
+ prefetch_distances.append(distance)
580
+
581
+ if not prefetch_embeddings:
582
+ logger.warning("No results found during MMR prefetch")
583
+ return VectorStoreQueryResult(nodes=[], similarities=[], ids=[])
584
+
585
+ # Check if we have valid embeddings for MMR
586
+ valid_embeddings = [emb for emb in prefetch_embeddings if emb is not None]
587
+
588
+ if len(valid_embeddings) < query.similarity_top_k:
589
+ logger.warning(
590
+ f"Not enough valid embeddings for MMR: {len(valid_embeddings)} < {query.similarity_top_k}"
591
+ )
592
+ # Fallback to regular similarity search
593
+ return self._query(
594
+ query_embeddings=query.query_embedding,
595
+ n_results=query.similarity_top_k,
596
+ where=where,
597
+ **kwargs,
598
+ )
599
+
600
+ # Apply MMR algorithm using the core utility function
601
+ mmr_similarities, mmr_indices = get_top_k_mmr_embeddings(
602
+ query_embedding=query.query_embedding,
603
+ embeddings=valid_embeddings,
604
+ similarity_top_k=query.similarity_top_k,
605
+ embedding_ids=list(range(len(valid_embeddings))),
606
+ mmr_threshold=mmr_threshold,
607
+ )
608
+
609
+ # Build final results based on MMR selection
610
+ final_nodes = []
611
+ final_similarities = []
612
+ final_ids = []
613
+
614
+ # Create a mapping from valid embedding indices to original prefetch indices
615
+ valid_indices = [
616
+ i for i, emb in enumerate(prefetch_embeddings) if emb is not None
617
+ ]
618
+
619
+ for mmr_index in mmr_indices:
620
+ if mmr_index < len(valid_indices):
621
+ original_index = valid_indices[mmr_index]
622
+ if original_index < len(prefetch_ids):
623
+ node_id = prefetch_ids[original_index]
624
+ text = prefetch_documents[original_index]
625
+ metadata = prefetch_metadata[original_index]
626
+ distance = prefetch_distances[original_index]
627
+
628
+ # Create node (reusing logic from _query method)
629
+ try:
630
+ node = metadata_dict_to_node(metadata, text=text)
631
+ except Exception:
632
+ # NOTE: deprecated legacy logic for backward compatibility
633
+ metadata, node_info, relationships = (
634
+ legacy_metadata_dict_to_node(metadata)
635
+ )
636
+
637
+ node = TextNode(
638
+ text=text or "",
639
+ id_=node_id,
640
+ metadata=metadata,
641
+ start_char_idx=node_info.get("start", None),
642
+ end_char_idx=node_info.get("end", None),
643
+ relationships=relationships,
644
+ )
645
+
646
+ final_nodes.append(node)
647
+ final_similarities.append(math.exp(-distance))
648
+ final_ids.append(node_id)
649
+
650
+ logger.debug(
651
+ f"MMR search completed: {len(final_nodes)} results selected from {len(prefetch_embeddings)} candidates"
652
+ )
653
+
654
+ return VectorStoreQueryResult(
655
+ nodes=final_nodes, similarities=final_similarities, ids=final_ids
656
+ )
657
+
658
+ def _get(
659
+ self, limit: Optional[int], where: dict, **kwargs
660
+ ) -> VectorStoreQueryResult:
661
+ if where:
662
+ results = self._collection.get(
663
+ limit=limit,
664
+ where=where,
665
+ **kwargs,
666
+ )
667
+ else:
668
+ results = self._collection.get(
669
+ limit=limit,
670
+ **kwargs,
671
+ )
672
+
673
+ logger.debug(f"> Top {len(results['documents'])} nodes:")
674
+ nodes = []
675
+ ids = []
676
+
677
+ if not results["ids"]:
678
+ results["ids"] = [[]]
679
+
680
+ for node_id, text, metadata in zip(
681
+ results["ids"], results["documents"], results["metadatas"]
682
+ ):
683
+ try:
684
+ node = metadata_dict_to_node(metadata, text=text)
685
+ except Exception:
686
+ # NOTE: deprecated legacy logic for backward compatibility
687
+ metadata, node_info, relationships = legacy_metadata_dict_to_node(
688
+ metadata
689
+ )
690
+
691
+ node = TextNode(
692
+ text=text or "",
693
+ id_=node_id,
694
+ metadata=metadata,
695
+ start_char_idx=node_info.get("start", None),
696
+ end_char_idx=node_info.get("end", None),
697
+ relationships=relationships,
698
+ )
699
+
700
+ nodes.append(node)
701
+
702
+ logger.debug(
703
+ f"> [Node {node_id}] [Similarity score: N/A - using get()] "
704
+ f"{truncate_text(str(text), 100)}"
705
+ )
706
+ ids.append(node_id)
707
+
708
+ return VectorStoreQueryResult(nodes=nodes, ids=ids)
@@ -0,0 +1,13 @@
1
+ Metadata-Version: 2.4
2
+ Name: llama-index-vector-stores-chroma
3
+ Version: 0.5.5
4
+ Summary: llama-index vector_stores chroma integration
5
+ Author-email: Your Name <you@example.com>
6
+ License-Expression: MIT
7
+ License-File: LICENSE
8
+ Requires-Python: <4.0,>=3.10
9
+ Requires-Dist: chromadb>=0.5.17
10
+ Requires-Dist: llama-index-core<0.15,>=0.13.0
11
+ Description-Content-Type: text/markdown
12
+
13
+ # LlamaIndex Vector_Stores Integration: Chroma
@@ -0,0 +1,7 @@
1
+ llama_index/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ llama_index/vector_stores/chroma/__init__.py,sha256=QNMK-nHKEt-wmks5mhWfdOKDybpmsqrL4neV-HCA6N4,101
3
+ llama_index/vector_stores/chroma/base.py,sha256=a1vzhTREGAM-CaEGpbki6u3rnKqIQQzNQonVOozTgyQ,24473
4
+ llama_index_vector_stores_chroma-0.5.5.dist-info/METADATA,sha256=Jrl0l3LuHI6TSgN4m8gDuYdbyUp6OggpP4dmSoof_Yc,413
5
+ llama_index_vector_stores_chroma-0.5.5.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
6
+ llama_index_vector_stores_chroma-0.5.5.dist-info/licenses/LICENSE,sha256=JPQLUZD9rKvCTdu192Nk0V5PAwklIg6jANii3UmTyMs,1065
7
+ llama_index_vector_stores_chroma-0.5.5.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) Jerry Liu
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.