endee-llamaindex 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,140 @@
1
+ Metadata-Version: 2.4
2
+ Name: endee-llamaindex
3
+ Version: 0.1.2
4
+ Summary: Vector Database for Fast ANN Searches
5
+ Home-page: https://endee.io
6
+ Author: Endee Labs
7
+ Author-email: vineet@endee.io
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.6
12
+ Description-Content-Type: text/markdown
13
+ Requires-Dist: llama-index>=0.12.34
14
+ Requires-Dist: endee>=0.1.2
15
+ Dynamic: author
16
+ Dynamic: author-email
17
+ Dynamic: classifier
18
+ Dynamic: description
19
+ Dynamic: description-content-type
20
+ Dynamic: home-page
21
+ Dynamic: requires-dist
22
+ Dynamic: requires-python
23
+ Dynamic: summary
24
+
25
+ # Endee LlamaIndex Integration
26
+
27
+ This package provides an integration between [Endee](https://endeedb.ai) (a vector database) and [LlamaIndex](https://www.llamaindex.ai/), allowing you to use Endee as a vector store backend for LlamaIndex.
28
+
29
+ ## Features
30
+
31
+ - **Vector Storage**: Use Endee for your LlamaIndex embeddings
32
+ - **Multiple Distance Metrics**: Support for cosine, L2, and inner product distance metrics
33
+ - **Metadata Filtering**: Filter search results based on metadata
34
+ - **High Performance**: Optimized for speed and efficiency
35
+
36
+ ## Installation
37
+
38
+ ```bash
39
+ pip install endee-llamaindex
40
+ ```
41
+
42
+ This will install both the `endee-llamaindex` package and its dependencies (`endee` and `llama-index`).
43
+
44
+ ## Quick Start
45
+
46
+ ```python
47
+ import os
48
+ from llama_index.core.schema import TextNode
49
+ from llama_index.core.vector_stores.types import VectorStoreQuery
50
+ from endee_llamaindex import EndeeVectorStore
51
+
52
+ # Configure your Endee credentials
53
+ api_token = os.environ.get("ENDEE_API_TOKEN")
54
+ index_name = "my_llamaindex_vectors"
55
+ dimension = 1536 # OpenAI ada-002 embedding dimension
56
+
57
+ # Initialize the vector store
58
+ vector_store = EndeeVectorStore.from_params(
59
+ api_token=api_token,
60
+ index_name=index_name,
61
+ dimension=dimension,
62
+ space_type="cosine"
63
+ )
64
+
65
+ # Create a node with embedding
66
+ node = TextNode(
67
+ text="This is a sample document",
68
+ id_="doc1",
69
+ embedding=[0.1, 0.2, 0.3, ...], # Your embedding vector
70
+ metadata={
71
+ "doc_id": "doc1",
72
+ "source": "example",
73
+ "author": "Endee"
74
+ }
75
+ )
76
+
77
+ # Add the node to the vector store
78
+ vector_store.add([node])
79
+
80
+ # Query the vector store
81
+ query = VectorStoreQuery(
82
+ query_embedding=[0.2, 0.3, 0.4, ...], # Your query vector
83
+ similarity_top_k=5
84
+ )
85
+
86
+ results = vector_store.query(query)
87
+
88
+ # Process results
89
+ for node, score in zip(results.nodes, results.similarities):
90
+ print(f"Node ID: {node.node_id}, Similarity: {score}")
91
+ print(f"Text: {node.text}")
92
+ print(f"Metadata: {node.metadata}")
93
+ ```
94
+
95
+ ## Using with LlamaIndex
96
+
97
+ ```python
98
+ from llama_index.core import VectorStoreIndex, StorageContext
99
+ from llama_index.embeddings.openai import OpenAIEmbedding
100
+
101
+ # Initialize your nodes or documents
102
+ nodes = [...] # Your nodes with text but no embeddings yet
103
+
104
+ # Setup embedding function
105
+ embed_model = OpenAIEmbedding() # Or any other embedding model
106
+
107
+ # Initialize Endee vector store
108
+ vector_store = EndeeVectorStore.from_params(
109
+ api_token=api_token,
110
+ index_name=index_name,
111
+ dimension=1536, # Make sure this matches your embedding dimension
112
+ )
113
+
114
+ # Create storage context
115
+ storage_context = StorageContext.from_defaults(vector_store=vector_store)
116
+
117
+ # Create vector index
118
+ index = VectorStoreIndex(
119
+ nodes,
120
+ storage_context=storage_context,
121
+ embed_model=embed_model
122
+ )
123
+
124
+ # Query the index
125
+ query_engine = index.as_query_engine()
126
+ response = query_engine.query("Your query here")
127
+ print(response)
128
+ ```
129
+
130
+ ## Configuration Options
131
+
132
+ The `EndeeVectorStore` constructor accepts the following parameters:
133
+
134
+ - `api_token`: Your Endee API token
135
+ - `index_name`: Name of the Endee index
136
+ - `dimension`: Vector dimension (required when creating a new index)
137
+ - `space_type`: Distance metric, one of "cosine", "l2", or "ip" (default: "cosine")
138
+ - `batch_size`: Number of vectors to insert in a single API call (default: 100)
139
+ - `text_key`: Key to use for storing text in metadata (default: "text")
140
+ - `remove_text_from_metadata`: Whether to remove text from metadata (default: False)
@@ -0,0 +1,116 @@
1
+ # Endee LlamaIndex Integration
2
+
3
+ This package provides an integration between [Endee](https://endeedb.ai) (a vector database) and [LlamaIndex](https://www.llamaindex.ai/), allowing you to use Endee as a vector store backend for LlamaIndex.
4
+
5
+ ## Features
6
+
7
+ - **Vector Storage**: Use Endee for your LlamaIndex embeddings
8
+ - **Multiple Distance Metrics**: Support for cosine, L2, and inner product distance metrics
9
+ - **Metadata Filtering**: Filter search results based on metadata
10
+ - **High Performance**: Optimized for speed and efficiency
11
+
12
+ ## Installation
13
+
14
+ ```bash
15
+ pip install endee-llamaindex
16
+ ```
17
+
18
+ This will install both the `endee-llamaindex` package and its dependencies (`endee` and `llama-index`).
19
+
20
+ ## Quick Start
21
+
22
+ ```python
23
+ import os
24
+ from llama_index.core.schema import TextNode
25
+ from llama_index.core.vector_stores.types import VectorStoreQuery
26
+ from endee_llamaindex import EndeeVectorStore
27
+
28
+ # Configure your Endee credentials
29
+ api_token = os.environ.get("ENDEE_API_TOKEN")
30
+ index_name = "my_llamaindex_vectors"
31
+ dimension = 1536 # OpenAI ada-002 embedding dimension
32
+
33
+ # Initialize the vector store
34
+ vector_store = EndeeVectorStore.from_params(
35
+ api_token=api_token,
36
+ index_name=index_name,
37
+ dimension=dimension,
38
+ space_type="cosine"
39
+ )
40
+
41
+ # Create a node with embedding
42
+ node = TextNode(
43
+ text="This is a sample document",
44
+ id_="doc1",
45
+ embedding=[0.1, 0.2, 0.3, ...], # Your embedding vector
46
+ metadata={
47
+ "doc_id": "doc1",
48
+ "source": "example",
49
+ "author": "Endee"
50
+ }
51
+ )
52
+
53
+ # Add the node to the vector store
54
+ vector_store.add([node])
55
+
56
+ # Query the vector store
57
+ query = VectorStoreQuery(
58
+ query_embedding=[0.2, 0.3, 0.4, ...], # Your query vector
59
+ similarity_top_k=5
60
+ )
61
+
62
+ results = vector_store.query(query)
63
+
64
+ # Process results
65
+ for node, score in zip(results.nodes, results.similarities):
66
+ print(f"Node ID: {node.node_id}, Similarity: {score}")
67
+ print(f"Text: {node.text}")
68
+ print(f"Metadata: {node.metadata}")
69
+ ```
70
+
71
+ ## Using with LlamaIndex
72
+
73
+ ```python
74
+ from llama_index.core import VectorStoreIndex, StorageContext
75
+ from llama_index.embeddings.openai import OpenAIEmbedding
76
+
77
+ # Initialize your nodes or documents
78
+ nodes = [...] # Your nodes with text but no embeddings yet
79
+
80
+ # Setup embedding function
81
+ embed_model = OpenAIEmbedding() # Or any other embedding model
82
+
83
+ # Initialize Endee vector store
84
+ vector_store = EndeeVectorStore.from_params(
85
+ api_token=api_token,
86
+ index_name=index_name,
87
+ dimension=1536, # Make sure this matches your embedding dimension
88
+ )
89
+
90
+ # Create storage context
91
+ storage_context = StorageContext.from_defaults(vector_store=vector_store)
92
+
93
+ # Create vector index
94
+ index = VectorStoreIndex(
95
+ nodes,
96
+ storage_context=storage_context,
97
+ embed_model=embed_model
98
+ )
99
+
100
+ # Query the index
101
+ query_engine = index.as_query_engine()
102
+ response = query_engine.query("Your query here")
103
+ print(response)
104
+ ```
105
+
106
+ ## Configuration Options
107
+
108
+ The `EndeeVectorStore` constructor accepts the following parameters:
109
+
110
+ - `api_token`: Your Endee API token
111
+ - `index_name`: Name of the Endee index
112
+ - `dimension`: Vector dimension (required when creating a new index)
113
+ - `space_type`: Distance metric, one of "cosine", "l2", or "ip" (default: "cosine")
114
+ - `batch_size`: Number of vectors to insert in a single API call (default: 100)
115
+ - `text_key`: Key to use for storing text in metadata (default: "text")
116
+ - `remove_text_from_metadata`: Whether to remove text from metadata (default: False)
@@ -0,0 +1,3 @@
1
+ from endee_llamaindex.base import EndeeVectorStore
2
+
3
+ __all__ = ["EndeeVectorStore"]
@@ -0,0 +1,416 @@
1
+ import logging
2
+ from collections import Counter
3
+ from functools import partial
4
+ import json
5
+ from typing import Any, Callable, Dict, List, Optional, cast
6
+
7
+ from llama_index.core.bridge.pydantic import PrivateAttr
8
+ from llama_index.core.schema import BaseNode, MetadataMode, TextNode
9
+ from llama_index.core.vector_stores.types import (
10
+ BasePydanticVectorStore,
11
+ MetadataFilters,
12
+ VectorStoreQuery,
13
+ VectorStoreQueryMode,
14
+ VectorStoreQueryResult,
15
+ )
16
+ from llama_index.core.vector_stores.utils import (
17
+ DEFAULT_TEXT_KEY,
18
+ legacy_metadata_dict_to_node,
19
+ metadata_dict_to_node,
20
+ node_to_metadata_dict,
21
+ )
22
+
23
+ from datetime import datetime
24
+
25
+ def _import_endee() -> Any:
26
+ """
27
+ Try to import endee module. If it's not already installed, instruct user how to install.
28
+ """
29
+ try:
30
+ import endee
31
+ from endee.endee_client import Endee
32
+ except ImportError as e:
33
+ raise ImportError(
34
+ "Could not import endee python package. "
35
+ "Please install it with `pip install endee`."
36
+ ) from e
37
+ return endee
38
+
39
+ ID_KEY = "id"
40
+ VECTOR_KEY = "values"
41
+ SPARSE_VECTOR_KEY = "sparse_values"
42
+ METADATA_KEY = "metadata"
43
+
44
+ DEFAULT_BATCH_SIZE = 100
45
+
46
+ _logger = logging.getLogger(__name__)
47
+
48
+ from llama_index.core.vector_stores.types import MetadataFilter, FilterOperator
49
+
50
+ reverse_operator_map = {
51
+ FilterOperator.EQ: "$eq",
52
+ FilterOperator.NE: "$ne",
53
+ FilterOperator.GT: "$gt",
54
+ FilterOperator.GTE: "$gte",
55
+ FilterOperator.LT: "$lt",
56
+ FilterOperator.LTE: "$lte",
57
+ FilterOperator.IN: "$in",
58
+ FilterOperator.NIN: "$nin",
59
+ }
60
+
61
+
62
+
63
+ def build_dict(input_batch: List[List[int]]) -> List[Dict[str, Any]]:
64
+ """
65
+ Build a list of sparse dictionaries from a batch of input_ids.
66
+
67
+ NOTE: taken from https://www.pinecone.io/learn/hybrid-search-intro/.
68
+
69
+ """
70
+ # store a batch of sparse embeddings
71
+ sparse_emb = []
72
+ # iterate through input batch
73
+ for token_ids in input_batch:
74
+ indices = []
75
+ values = []
76
+ # convert the input_ids list to a dictionary of key to frequency values
77
+ d = dict(Counter(token_ids))
78
+ for idx in d:
79
+ indices.append(idx)
80
+ values.append(float(d[idx]))
81
+ sparse_emb.append({"indices": indices, "values": values})
82
+ # return sparse_emb list
83
+ return sparse_emb
84
+
85
+
86
+ def generate_sparse_vectors(
87
+ context_batch: List[str], tokenizer: Callable
88
+ ) -> List[Dict[str, Any]]:
89
+ """
90
+ Generate sparse vectors from a batch of contexts.
91
+
92
+ NOTE: taken from https://www.pinecone.io/learn/hybrid-search-intro/.
93
+
94
+ """
95
+ # create batch of input_ids
96
+ inputs = tokenizer(context_batch)["input_ids"]
97
+ # create sparse dictionaries
98
+ return build_dict(inputs)
99
+
100
+
101
+ import_err_msg = (
102
+ "`endee` package not found, please run `pip install endee` to install it.`"
103
+ )
104
+
105
+
106
+ class EndeeVectorStore(BasePydanticVectorStore):
107
+
108
+ stores_text: bool = True
109
+ flat_metadata: bool = False
110
+
111
+ api_token: Optional[str]
112
+ index_name: Optional[str]
113
+ space_type: Optional[str]
114
+ dimension: Optional[int]
115
+ insert_kwargs: Optional[Dict]
116
+ add_sparse_vector: bool
117
+ text_key: str
118
+ batch_size: int
119
+ remove_text_from_metadata: bool
120
+
121
+ _endee_index: Any = PrivateAttr()
122
+
123
+ def __init__(
124
+ self,
125
+ endee_index: Optional[Any] = None,
126
+ api_token: Optional[str] = None,
127
+ index_name: Optional[str] = None,
128
+ space_type: Optional[str] = "cosine",
129
+ dimension: Optional[int] = None,
130
+ insert_kwargs: Optional[Dict] = None,
131
+ add_sparse_vector: bool = False,
132
+ text_key: str = DEFAULT_TEXT_KEY,
133
+ batch_size: int = DEFAULT_BATCH_SIZE,
134
+ remove_text_from_metadata: bool = False,
135
+ **kwargs: Any,
136
+ ) -> None:
137
+ insert_kwargs = insert_kwargs or {}
138
+
139
+ super().__init__(
140
+ index_name=index_name,
141
+ api_token=api_token,
142
+ space_type=space_type,
143
+ dimension=dimension,
144
+ insert_kwargs=insert_kwargs,
145
+ add_sparse_vector=add_sparse_vector,
146
+ text_key=text_key,
147
+ batch_size=batch_size,
148
+ remove_text_from_metadata=remove_text_from_metadata,
149
+ )
150
+
151
+ # Use existing endee_index or initialize a new one
152
+ self._endee_index = endee_index or self._initialize_endee_index(
153
+ api_token, index_name, dimension, space_type
154
+ )
155
+
156
+ @classmethod
157
+ def _initialize_endee_index(
158
+ cls,
159
+ api_token: Optional[str],
160
+ index_name: Optional[str],
161
+ dimension: Optional[int] = None,
162
+ space_type: Optional[str] = "cosine",
163
+ ) -> Any:
164
+ """Initialize Endee index using the current API."""
165
+ endee = _import_endee()
166
+ from endee.endee_client import Endee
167
+
168
+ # Initialize Endee client
169
+ nd = Endee(token=api_token)
170
+
171
+ try:
172
+ # Try to get existing index
173
+ index = nd.get_index(name=index_name)
174
+ _logger.info(f"Retrieved existing index: {index_name}")
175
+ return index
176
+ except Exception as e:
177
+ if dimension is None:
178
+ raise ValueError(
179
+ "Must provide dimension when creating a new index"
180
+ ) from e
181
+
182
+ # Create a new index if it doesn't exist
183
+ _logger.info(f"Creating new index: {index_name}")
184
+ nd.create_index(
185
+ name=index_name,
186
+ dimension=dimension,
187
+ space_type=space_type,
188
+ )
189
+ return nd.get_index(name=index_name)
190
+
191
+ @classmethod
192
+ def from_params(
193
+ cls,
194
+ api_token: Optional[str] = None,
195
+ index_name: Optional[str] = None,
196
+ dimension: Optional[int] = None,
197
+ space_type: str = "cosine",
198
+ batch_size: int = DEFAULT_BATCH_SIZE,
199
+ ) -> "EndeeVectorStore":
200
+ """Create EndeeVectorStore from parameters."""
201
+ endee_index = cls._initialize_endee_index(
202
+ api_token, index_name, dimension, space_type
203
+ )
204
+
205
+ return cls(
206
+ endee_index=endee_index,
207
+ api_token=api_token,
208
+ index_name=index_name,
209
+ dimension=dimension,
210
+ space_type=space_type,
211
+ batch_size=batch_size,
212
+ )
213
+
214
+ @classmethod
215
+ def class_name(cls) -> str:
216
+ return "EndeeVectorStore"
217
+
218
+ def add(
219
+ self,
220
+ nodes: List[BaseNode],
221
+ **add_kwargs: Any,
222
+ ) -> List[str]:
223
+ """
224
+ Add nodes to index.
225
+
226
+ Args:
227
+ nodes: List[BaseNode]: list of nodes with embeddings
228
+ """
229
+ ids = []
230
+ entries = []
231
+
232
+ for node in nodes:
233
+ node_id = node.node_id
234
+ metadata = node_to_metadata_dict(node)
235
+
236
+ # Filter values must be simple key-value pairs
237
+ filter_data = {}
238
+ if "file_name" in metadata:
239
+ filter_data["file_name"] = metadata["file_name"]
240
+ if "doc_id" in metadata:
241
+ filter_data["doc_id"] = metadata["doc_id"]
242
+ if "category" in metadata:
243
+ filter_data["category"] = metadata["category"]
244
+ if "difficulty" in metadata:
245
+ filter_data["difficulty"] = metadata["difficulty"]
246
+ if "language" in metadata:
247
+ filter_data["language"] = metadata["language"]
248
+ if "field" in metadata:
249
+ filter_data["field"] = metadata["field"]
250
+ if "type" in metadata:
251
+ filter_data["type"] = metadata["type"]
252
+ if "feature" in metadata:
253
+ filter_data["feature"] = metadata["feature"]
254
+
255
+
256
+ entry = {
257
+ "id": node_id,
258
+ "vector": node.get_embedding(),
259
+ "meta": metadata,
260
+ "filter": filter_data
261
+ }
262
+
263
+ ids.append(node_id)
264
+ entries.append(entry)
265
+
266
+ # Batch insert to avoid hitting API limits
267
+ batch_size = self.batch_size
268
+ for i in range(0, len(entries), batch_size):
269
+ batch = entries[i : i + batch_size]
270
+ self._endee_index.upsert(batch)
271
+
272
+ return ids
273
+
274
+ def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
275
+ """
276
+ Delete nodes using with ref_doc_id.
277
+
278
+ Args:
279
+ ref_doc_id (str): The id of the document to delete.
280
+ """
281
+ try:
282
+ self._endee_index.delete_with_filter({"doc_id": ref_doc_id})
283
+ except Exception as e:
284
+ _logger.error(f"Error deleting vectors for doc_id {ref_doc_id}: {e}")
285
+
286
+ @property
287
+ def client(self) -> Any:
288
+ """Return Endee index client."""
289
+ return self._endee_index
290
+
291
+ def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
292
+ """
293
+ Query index for top k most similar nodes.
294
+
295
+ Args:
296
+ query: VectorStoreQuery object containing query parameters
297
+ """
298
+ if not hasattr(self._endee_index, 'dimension'):
299
+ # Get dimension from index if available, otherwise try to infer from query
300
+ try:
301
+ dimension = self._endee_index.describe()["dimension"]
302
+ except:
303
+ if query.query_embedding is not None:
304
+ dimension = len(query.query_embedding)
305
+ else:
306
+ raise ValueError("Could not determine vector dimension")
307
+ else:
308
+ dimension = self._endee_index.dimension
309
+
310
+ query_embedding = [0.0] * dimension # Default empty vector
311
+ filters = {}
312
+
313
+ # Apply any metadata filters if provided
314
+ if query.filters is not None:
315
+ for filter_item in query.filters.filters:
316
+ # Case 1: MetadataFilter object
317
+ if hasattr(filter_item, "key") and hasattr(filter_item, "value") and hasattr(filter_item, "operator"):
318
+ op_symbol = reverse_operator_map.get(filter_item.operator)
319
+ if not op_symbol:
320
+ raise ValueError(f"Unsupported filter operator: {filter_item.operator}")
321
+
322
+ if filter_item.key not in filters:
323
+ filters[filter_item.key] = {}
324
+
325
+ filters[filter_item.key][op_symbol] = filter_item.value
326
+
327
+ # Case 2: Raw dict, e.g. {"category": {"$eq": "programming"}}
328
+ elif isinstance(filter_item, dict):
329
+ for key, op_dict in filter_item.items():
330
+ if isinstance(op_dict, dict):
331
+ for op, val in op_dict.items():
332
+ if key not in filters:
333
+ filters[key] = {}
334
+ filters[key][op] = val
335
+ else:
336
+ raise ValueError(f"Unsupported filter format: {filter_item}")
337
+
338
+ _logger.info(f"Final structured filters: {filters}")
339
+
340
+ # Use the query embedding if provided
341
+ if query.query_embedding is not None:
342
+ query_embedding = cast(List[float], query.query_embedding)
343
+ if query.alpha is not None and query.mode == VectorStoreQueryMode.HYBRID:
344
+ # Apply alpha scaling in hybrid mode
345
+ query_embedding = [v * query.alpha for v in query_embedding]
346
+
347
+ # Execute query
348
+ try:
349
+ results = self._endee_index.query(
350
+ vector=query_embedding,
351
+ top_k=query.similarity_top_k,
352
+ filter=filters if filters else None,
353
+ include_vectors=True
354
+ )
355
+ except Exception as e:
356
+ _logger.error(f"Error querying Endee: {e}")
357
+ return VectorStoreQueryResult(nodes=[], similarities=[], ids=[])
358
+
359
+ # Process results
360
+ nodes = []
361
+ similarities = []
362
+ ids = []
363
+
364
+ for result in results:
365
+ node_id = result["id"]
366
+ score = result["similarity"]
367
+
368
+ # Get metadata from result
369
+ metadata = result.get("meta", {})
370
+
371
+ # Create node from metadata
372
+ if self.flat_metadata:
373
+ node = metadata_dict_to_node(
374
+ metadata=metadata,
375
+ text=metadata.pop(self.text_key, None),
376
+ id_=node_id,
377
+ )
378
+ else:
379
+ metadata_dict, node_info, relationships = legacy_metadata_dict_to_node(
380
+ metadata=metadata,
381
+ text_key=self.text_key,
382
+ )
383
+
384
+ # Create TextNode with the extracted metadata
385
+ # Step 1: Get the JSON string from "_node_content"
386
+ _node_content_str = metadata.get("_node_content", "{}")
387
+
388
+ # Step 2: Convert JSON string to Python dict
389
+ try:
390
+ node_content = json.loads(_node_content_str)
391
+ except json.JSONDecodeError:
392
+ node_content = {}
393
+
394
+ # Step 3: Get the text
395
+ text = node_content.get(self.text_key, "")
396
+ node = TextNode(
397
+ text=text,
398
+ metadata=metadata_dict,
399
+ relationships=relationships,
400
+ node_id=node_id,
401
+ )
402
+
403
+ # Add any node_info properties to the node
404
+ for key, val in node_info.items():
405
+ if hasattr(node, key):
406
+ setattr(node, key, val)
407
+
408
+ # If embedding was returned in the results, add it to the node
409
+ if "vector" in result:
410
+ node.embedding = result["vector"]
411
+
412
+ nodes.append(node)
413
+ similarities.append(score)
414
+ ids.append(node_id)
415
+
416
+ return VectorStoreQueryResult(nodes=nodes, similarities=similarities, ids=ids)
@@ -0,0 +1,140 @@
1
+ Metadata-Version: 2.4
2
+ Name: endee-llamaindex
3
+ Version: 0.1.2
4
+ Summary: Vector Database for Fast ANN Searches
5
+ Home-page: https://endee.io
6
+ Author: Endee Labs
7
+ Author-email: vineet@endee.io
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.6
12
+ Description-Content-Type: text/markdown
13
+ Requires-Dist: llama-index>=0.12.34
14
+ Requires-Dist: endee>=0.1.2
15
+ Dynamic: author
16
+ Dynamic: author-email
17
+ Dynamic: classifier
18
+ Dynamic: description
19
+ Dynamic: description-content-type
20
+ Dynamic: home-page
21
+ Dynamic: requires-dist
22
+ Dynamic: requires-python
23
+ Dynamic: summary
24
+
25
+ # Endee LlamaIndex Integration
26
+
27
+ This package provides an integration between [Endee](https://endeedb.ai) (a vector database) and [LlamaIndex](https://www.llamaindex.ai/), allowing you to use Endee as a vector store backend for LlamaIndex.
28
+
29
+ ## Features
30
+
31
+ - **Vector Storage**: Use Endee for your LlamaIndex embeddings
32
+ - **Multiple Distance Metrics**: Support for cosine, L2, and inner product distance metrics
33
+ - **Metadata Filtering**: Filter search results based on metadata
34
+ - **High Performance**: Optimized for speed and efficiency
35
+
36
+ ## Installation
37
+
38
+ ```bash
39
+ pip install endee-llamaindex
40
+ ```
41
+
42
+ This will install both the `endee-llamaindex` package and its dependencies (`endee` and `llama-index`).
43
+
44
+ ## Quick Start
45
+
46
+ ```python
47
+ import os
48
+ from llama_index.core.schema import TextNode
49
+ from llama_index.core.vector_stores.types import VectorStoreQuery
50
+ from endee_llamaindex import EndeeVectorStore
51
+
52
+ # Configure your Endee credentials
53
+ api_token = os.environ.get("ENDEE_API_TOKEN")
54
+ index_name = "my_llamaindex_vectors"
55
+ dimension = 1536 # OpenAI ada-002 embedding dimension
56
+
57
+ # Initialize the vector store
58
+ vector_store = EndeeVectorStore.from_params(
59
+ api_token=api_token,
60
+ index_name=index_name,
61
+ dimension=dimension,
62
+ space_type="cosine"
63
+ )
64
+
65
+ # Create a node with embedding
66
+ node = TextNode(
67
+ text="This is a sample document",
68
+ id_="doc1",
69
+ embedding=[0.1, 0.2, 0.3, ...], # Your embedding vector
70
+ metadata={
71
+ "doc_id": "doc1",
72
+ "source": "example",
73
+ "author": "Endee"
74
+ }
75
+ )
76
+
77
+ # Add the node to the vector store
78
+ vector_store.add([node])
79
+
80
+ # Query the vector store
81
+ query = VectorStoreQuery(
82
+ query_embedding=[0.2, 0.3, 0.4, ...], # Your query vector
83
+ similarity_top_k=5
84
+ )
85
+
86
+ results = vector_store.query(query)
87
+
88
+ # Process results
89
+ for node, score in zip(results.nodes, results.similarities):
90
+ print(f"Node ID: {node.node_id}, Similarity: {score}")
91
+ print(f"Text: {node.text}")
92
+ print(f"Metadata: {node.metadata}")
93
+ ```
94
+
95
+ ## Using with LlamaIndex
96
+
97
+ ```python
98
+ from llama_index.core import VectorStoreIndex, StorageContext
99
+ from llama_index.embeddings.openai import OpenAIEmbedding
100
+
101
+ # Initialize your nodes or documents
102
+ nodes = [...] # Your nodes with text but no embeddings yet
103
+
104
+ # Setup embedding function
105
+ embed_model = OpenAIEmbedding() # Or any other embedding model
106
+
107
+ # Initialize Endee vector store
108
+ vector_store = EndeeVectorStore.from_params(
109
+ api_token=api_token,
110
+ index_name=index_name,
111
+ dimension=1536, # Make sure this matches your embedding dimension
112
+ )
113
+
114
+ # Create storage context
115
+ storage_context = StorageContext.from_defaults(vector_store=vector_store)
116
+
117
+ # Create vector index
118
+ index = VectorStoreIndex(
119
+ nodes,
120
+ storage_context=storage_context,
121
+ embed_model=embed_model
122
+ )
123
+
124
+ # Query the index
125
+ query_engine = index.as_query_engine()
126
+ response = query_engine.query("Your query here")
127
+ print(response)
128
+ ```
129
+
130
+ ## Configuration Options
131
+
132
+ The `EndeeVectorStore` constructor accepts the following parameters:
133
+
134
+ - `api_token`: Your Endee API token
135
+ - `index_name`: Name of the Endee index
136
+ - `dimension`: Vector dimension (required when creating a new index)
137
+ - `space_type`: Distance metric, one of "cosine", "l2", or "ip" (default: "cosine")
138
+ - `batch_size`: Number of vectors to insert in a single API call (default: 100)
139
+ - `text_key`: Key to use for storing text in metadata (default: "text")
140
+ - `remove_text_from_metadata`: Whether to remove text from metadata (default: False)
@@ -0,0 +1,9 @@
1
+ README.md
2
+ setup.py
3
+ endee_llamaindex/__init__.py
4
+ endee_llamaindex/base.py
5
+ endee_llamaindex.egg-info/PKG-INFO
6
+ endee_llamaindex.egg-info/SOURCES.txt
7
+ endee_llamaindex.egg-info/dependency_links.txt
8
+ endee_llamaindex.egg-info/requires.txt
9
+ endee_llamaindex.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ llama-index>=0.12.34
2
+ endee>=0.1.2
@@ -0,0 +1 @@
1
+ endee_llamaindex
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,26 @@
1
+ # myproject/setup.py
2
+
3
+ from setuptools import setup, find_packages
4
+
5
+ setup(
6
+ name="endee-llamaindex",
7
+ version="0.1.2",
8
+ packages=find_packages(include=['endee_llamaindex', 'endee_llamaindex.*']),
9
+ install_requires=[
10
+ # List your dependencies here
11
+ "llama-index>=0.12.34",
12
+ "endee>=0.1.2",
13
+ ],
14
+ author="Endee Labs",
15
+ author_email="vineet@endee.io",
16
+ description="Vector Database for Fast ANN Searches",
17
+ long_description=open('README.md').read(),
18
+ long_description_content_type="text/markdown",
19
+ url="https://endee.io",
20
+ classifiers=[
21
+ "Programming Language :: Python :: 3",
22
+ "License :: OSI Approved :: MIT License",
23
+ "Operating System :: OS Independent",
24
+ ],
25
+ python_requires='>=3.6',
26
+ )