endee-llamaindex 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
endee_llamaindex/base.py CHANGED
@@ -28,7 +28,7 @@ def _import_endee() -> Any:
28
28
  """
29
29
  try:
30
30
  import endee
31
- from endee.endee_client import Endee
31
+ from endee.endee import Endee
32
32
  except ImportError as e:
33
33
  raise ImportError(
34
34
  "Could not import endee python package. "
@@ -98,6 +98,181 @@ def generate_sparse_vectors(
98
98
  return build_dict(inputs)
99
99
 
100
100
 
101
+ # Supported sparse embedding models
102
+ SUPPORTED_SPARSE_MODELS = {
103
+ "splade_pp": "prithivida/Splade_PP_en_v1",
104
+ "splade_cocondenser": "naver/splade-cocondenser-ensembledistil",
105
+ "bert_base": "bert-base-uncased",
106
+ "distilbert": "distilbert-base-uncased",
107
+ "minilm": "sentence-transformers/all-MiniLM-L6-v2",
108
+ "mpnet": "sentence-transformers/all-mpnet-base-v2",
109
+ "roberta": "roberta-base",
110
+ "xlm_roberta": "xlm-roberta-base",
111
+ }
112
+
113
+
114
+ def _initialize_sparse_encoder_fastembed(
115
+ model_name: str,
116
+ batch_size: int = 256,
117
+ cache_dir: Optional[str] = None,
118
+ threads: Optional[int] = None,
119
+ ) -> Callable:
120
+ """
121
+ Initialize a sparse encoder using FastEmbed (recommended for SPLADE models).
122
+
123
+ Args:
124
+ model_name: Model identifier or alias
125
+ batch_size: Batch size for encoding
126
+ cache_dir: Directory to cache model files
127
+ threads: Number of threads to use
128
+
129
+ Returns:
130
+ Callable function that generates sparse vectors from text
131
+ """
132
+ try:
133
+ from fastembed.sparse.sparse_text_embedding import SparseTextEmbedding
134
+ except ImportError as e:
135
+ raise ImportError(
136
+ "Could not import FastEmbed. "
137
+ "Please install it with `pip install fastembed` or "
138
+ "`pip install fastembed-gpu` for GPU support."
139
+ ) from e
140
+
141
+ # Resolve model name from alias if needed
142
+ resolved_model_name = SUPPORTED_SPARSE_MODELS.get(model_name, model_name)
143
+
144
+ # Try GPU first, fallback to CPU
145
+ try:
146
+ model = SparseTextEmbedding(
147
+ resolved_model_name,
148
+ cache_dir=cache_dir,
149
+ threads=threads,
150
+ providers=["CUDAExecutionProvider"],
151
+ )
152
+ _logger.info(f"Initialized sparse encoder '{resolved_model_name}' on GPU")
153
+ except Exception:
154
+ model = SparseTextEmbedding(
155
+ resolved_model_name,
156
+ cache_dir=cache_dir,
157
+ threads=threads
158
+ )
159
+ _logger.info(f"Initialized sparse encoder '{resolved_model_name}' on CPU")
160
+
161
+ def compute_vectors(texts: List[str]) -> tuple:
162
+ """Compute sparse vectors (indices, values) for a list of texts."""
163
+ embeddings = model.embed(texts, batch_size=batch_size)
164
+ indices = []
165
+ values = []
166
+ for embedding in embeddings:
167
+ indices.append(embedding.indices.tolist())
168
+ values.append(embedding.values.tolist())
169
+ return indices, values
170
+
171
+ return compute_vectors
172
+
173
+
174
+ def _initialize_sparse_encoder_transformers(
175
+ model_name: str,
176
+ ) -> Callable:
177
+ """
178
+ Initialize a sparse encoder using Transformers library.
179
+
180
+ Args:
181
+ model_name: Model identifier or alias
182
+
183
+ Returns:
184
+ Callable function that generates sparse vectors from text
185
+ """
186
+ try:
187
+ import torch
188
+ from transformers import AutoModelForMaskedLM, AutoTokenizer
189
+ except ImportError as e:
190
+ raise ImportError(
191
+ "Could not import transformers library. "
192
+ 'Please install transformers with `pip install "transformers[torch]"`'
193
+ ) from e
194
+
195
+ # Resolve model name from alias if needed
196
+ resolved_model_name = SUPPORTED_SPARSE_MODELS.get(model_name, model_name)
197
+
198
+ tokenizer = AutoTokenizer.from_pretrained(resolved_model_name)
199
+ model = AutoModelForMaskedLM.from_pretrained(resolved_model_name)
200
+
201
+ if torch.cuda.is_available():
202
+ model = model.to("cuda")
203
+ _logger.info(f"Initialized sparse encoder '{resolved_model_name}' on GPU")
204
+ else:
205
+ _logger.info(f"Initialized sparse encoder '{resolved_model_name}' on CPU")
206
+
207
+ def compute_vectors(texts: List[str]) -> tuple:
208
+ """
209
+ Compute sparse vectors from logits using ReLU, log, and max operations.
210
+ """
211
+ tokens = tokenizer(
212
+ texts,
213
+ truncation=True,
214
+ padding=True,
215
+ max_length=512,
216
+ return_tensors="pt"
217
+ )
218
+
219
+ if torch.cuda.is_available():
220
+ tokens = tokens.to("cuda")
221
+
222
+ with torch.no_grad():
223
+ output = model(**tokens)
224
+ logits, attention_mask = output.logits, tokens.attention_mask
225
+ relu_log = torch.log(1 + torch.relu(logits))
226
+ weighted_log = relu_log * attention_mask.unsqueeze(-1)
227
+ tvecs, _ = torch.max(weighted_log, dim=1)
228
+
229
+ # Extract non-zero vectors and their indices
230
+ indices = []
231
+ values = []
232
+ for batch in tvecs:
233
+ nz_indices = batch.nonzero(as_tuple=True)[0].tolist()
234
+ indices.append(nz_indices)
235
+ values.append(batch[nz_indices].tolist())
236
+
237
+ return indices, values
238
+
239
+ return compute_vectors
240
+
241
+
242
+ def get_sparse_encoder(
243
+ model_name: Optional[str] = None,
244
+ use_fastembed: bool = True,
245
+ batch_size: int = 256,
246
+ cache_dir: Optional[str] = None,
247
+ threads: Optional[int] = None,
248
+ ) -> Optional[Callable]:
249
+ """
250
+ Get a sparse encoder function for the specified model.
251
+
252
+ Args:
253
+ model_name: Model name or alias (e.g., 'splade_pp', 'bert_base', or full model ID)
254
+ use_fastembed: If True, use FastEmbed (recommended for SPLADE models), else use Transformers
255
+ batch_size: Batch size for encoding
256
+ cache_dir: Directory to cache model files
257
+ threads: Number of threads to use
258
+
259
+ Returns:
260
+ Callable function that generates sparse vectors, or None if model_name is not provided
261
+ """
262
+ if model_name is None:
263
+ return None
264
+
265
+ if use_fastembed:
266
+ return _initialize_sparse_encoder_fastembed(
267
+ model_name=model_name,
268
+ batch_size=batch_size,
269
+ cache_dir=cache_dir,
270
+ threads=threads,
271
+ )
272
+ else:
273
+ return _initialize_sparse_encoder_transformers(model_name=model_name)
274
+
275
+
101
276
  import_err_msg = (
102
277
  "`endee` package not found, please run `pip install endee` to install it.`"
103
278
  )
@@ -117,8 +292,14 @@ class EndeeVectorStore(BasePydanticVectorStore):
117
292
  text_key: str
118
293
  batch_size: int
119
294
  remove_text_from_metadata: bool
295
+ hybrid: bool
296
+ vocab_size: Optional[int]
297
+ model_name: Optional[str]
298
+ precision: Optional[str]
299
+ key: Optional[str]
120
300
 
121
301
  _endee_index: Any = PrivateAttr()
302
+ _sparse_encoder: Optional[Callable] = PrivateAttr(default=None)
122
303
 
123
304
  def __init__(
124
305
  self,
@@ -132,6 +313,11 @@ class EndeeVectorStore(BasePydanticVectorStore):
132
313
  text_key: str = DEFAULT_TEXT_KEY,
133
314
  batch_size: int = DEFAULT_BATCH_SIZE,
134
315
  remove_text_from_metadata: bool = False,
316
+ hybrid: bool = False,
317
+ vocab_size: Optional[int] = None,
318
+ model_name: Optional[str] = None,
319
+ precision: Optional[str] = "medium",
320
+ key: Optional[str] = None,
135
321
  **kwargs: Any,
136
322
  ) -> None:
137
323
  insert_kwargs = insert_kwargs or {}
@@ -146,12 +332,39 @@ class EndeeVectorStore(BasePydanticVectorStore):
146
332
  text_key=text_key,
147
333
  batch_size=batch_size,
148
334
  remove_text_from_metadata=remove_text_from_metadata,
335
+ vocab_size=vocab_size,
336
+ hybrid=hybrid,
337
+ model_name=model_name,
338
+ precision=precision,
339
+ key=key,
149
340
  )
150
341
 
151
- # Use existing endee_index or initialize a new one
152
- self._endee_index = endee_index or self._initialize_endee_index(
153
- api_token, index_name, dimension, space_type
154
- )
342
+ # Initialize index based on hybrid flag
343
+ if endee_index is not None:
344
+ # Use provided index
345
+ self._endee_index = endee_index
346
+ elif hybrid:
347
+ # Initialize hybrid index
348
+ self._endee_index = self._initialize_hybrid_index(
349
+ api_token, index_name, dimension, space_type, vocab_size, precision, key
350
+ )
351
+ else:
352
+ # Initialize regular index
353
+ self._endee_index = self._initialize_endee_index(
354
+ api_token, index_name, dimension, space_type, precision, key
355
+ )
356
+
357
+ # Initialize sparse encoder if model name is provided and hybrid mode is enabled
358
+ if hybrid and model_name:
359
+ _logger.info(f"Initializing sparse encoder with model: {model_name}")
360
+ self._sparse_encoder = get_sparse_encoder(
361
+ model_name=model_name,
362
+ use_fastembed=True, # Default to FastEmbed
363
+ batch_size=batch_size,
364
+ )
365
+ else:
366
+ self._sparse_encoder = None
367
+
155
368
 
156
369
  @classmethod
157
370
  def _initialize_endee_index(
@@ -160,17 +373,19 @@ class EndeeVectorStore(BasePydanticVectorStore):
160
373
  index_name: Optional[str],
161
374
  dimension: Optional[int] = None,
162
375
  space_type: Optional[str] = "cosine",
376
+ precision: Optional[str] = "medium",
377
+ key: Optional[str] = None,
163
378
  ) -> Any:
164
379
  """Initialize Endee index using the current API."""
165
380
  endee = _import_endee()
166
- from endee.endee_client import Endee
381
+ from endee.endee import Endee
167
382
 
168
383
  # Initialize Endee client
169
384
  nd = Endee(token=api_token)
170
385
 
171
386
  try:
172
387
  # Try to get existing index
173
- index = nd.get_index(name=index_name)
388
+ index = nd.get_index(name=index_name, key=key)
174
389
  _logger.info(f"Retrieved existing index: {index_name}")
175
390
  return index
176
391
  except Exception as e:
@@ -185,8 +400,55 @@ class EndeeVectorStore(BasePydanticVectorStore):
185
400
  name=index_name,
186
401
  dimension=dimension,
187
402
  space_type=space_type,
403
+ precision=precision,
404
+ key=key,
405
+ )
406
+ return nd.get_index(name=index_name, key=key)
407
+
408
+ @classmethod
409
+ def _initialize_hybrid_index(
410
+ cls,
411
+ api_token: Optional[str],
412
+ index_name: Optional[str],
413
+ dimension: Optional[int] = None,
414
+ space_type: Optional[str] = "cosine",
415
+ vocab_size: Optional[int] = None,
416
+ precision: Optional[str] = "medium",
417
+ key: Optional[str] = None,
418
+ ) -> Any:
419
+ """Initialize Endee hybrid index using the current API."""
420
+ endee = _import_endee()
421
+ from endee.endee import Endee
422
+
423
+ # Initialize Endee client
424
+ nd = Endee(token=api_token)
425
+
426
+ try:
427
+ # Try to get existing hybrid index
428
+ index = nd.get_hybrid_index(name=index_name, key=key)
429
+ _logger.info(f"Retrieved existing hybrid index: {index_name}")
430
+ return index
431
+ except Exception as e:
432
+ if dimension is None:
433
+ raise ValueError(
434
+ "Must provide dimension when creating a new hybrid index"
435
+ ) from e
436
+ if vocab_size is None:
437
+ raise ValueError(
438
+ "Must provide vocab_size when creating a new hybrid index"
439
+ ) from e
440
+
441
+ # Create a new hybrid index if it doesn't exist
442
+ _logger.info(f"Creating new hybrid index: {index_name}")
443
+ nd.create_hybrid_index(
444
+ name=index_name,
445
+ dimension=dimension,
446
+ space_type=space_type,
447
+ vocab_size=vocab_size,
448
+ precision=precision,
449
+ key=key,
188
450
  )
189
- return nd.get_index(name=index_name)
451
+ return nd.get_hybrid_index(name=index_name, key=key)
190
452
 
191
453
  @classmethod
192
454
  def from_params(
@@ -196,11 +458,44 @@ class EndeeVectorStore(BasePydanticVectorStore):
196
458
  dimension: Optional[int] = None,
197
459
  space_type: str = "cosine",
198
460
  batch_size: int = DEFAULT_BATCH_SIZE,
461
+ hybrid: bool = False,
462
+ vocab_size: Optional[int] = None,
463
+ model_name: Optional[str] = None,
464
+ precision: Optional[str] = "medium",
465
+ key: Optional[str] = None,
199
466
  ) -> "EndeeVectorStore":
200
- """Create EndeeVectorStore from parameters."""
201
- endee_index = cls._initialize_endee_index(
202
- api_token, index_name, dimension, space_type
203
- )
467
+ """Create EndeeVectorStore from parameters.
468
+
469
+ Args:
470
+ api_token: API token for Endee service
471
+ index_name: Name of the index
472
+ dimension: Vector dimension
473
+ space_type: Distance metric ("cosine", "l2", or "ip")
474
+ batch_size: Batch size for operations
475
+ hybrid: If True, create/use a hybrid index (supports both dense and sparse vectors)
476
+ vocab_size: Vocabulary size for hybrid index (required if hybrid=True)
477
+ model_name: Model name or alias for sparse embeddings (e.g., 'splade_pp', 'bert_base')
478
+ Supported models:
479
+ - 'splade_pp': prithivida/Splade_PP_en_v1 (~438 MB)
480
+ - 'splade_cocondenser': naver/splade-cocondenser-ensembledistil (~438 MB)
481
+ - 'bert_base': bert-base-uncased (~420 MB)
482
+ - 'distilbert': distilbert-base-uncased (~256 MB)
483
+ - 'minilm': sentence-transformers/all-MiniLM-L6-v2 (~90 MB)
484
+ - 'mpnet': sentence-transformers/all-mpnet-base-v2 (~420 MB)
485
+ - 'roberta': roberta-base (~501 MB)
486
+ - 'xlm_roberta': xlm-roberta-base (~1.3 GB)
487
+ precision: Precision setting for index ("low", "medium", "high", or None)
488
+ key: Encryption key for encrypting metadata (256-bit hex key, 64 hex characters)
489
+ If provided, metadata will be encrypted using AES-256. Store this key securely.
490
+ """
491
+ if hybrid:
492
+ endee_index = cls._initialize_hybrid_index(
493
+ api_token, index_name, dimension, space_type, vocab_size, precision, key
494
+ )
495
+ else:
496
+ endee_index = cls._initialize_endee_index(
497
+ api_token, index_name, dimension, space_type, precision, key
498
+ )
204
499
 
205
500
  return cls(
206
501
  endee_index=endee_index,
@@ -209,15 +504,30 @@ class EndeeVectorStore(BasePydanticVectorStore):
209
504
  dimension=dimension,
210
505
  space_type=space_type,
211
506
  batch_size=batch_size,
507
+ vocab_size=vocab_size,
508
+ hybrid=hybrid,
509
+ model_name=model_name,
510
+ precision=precision,
511
+ key=key,
212
512
  )
213
513
 
214
514
  @classmethod
215
515
  def class_name(cls) -> str:
216
516
  return "EndeeVectorStore"
217
517
 
518
+ def _compute_sparse_vectors(self, texts: List[str]) -> tuple:
519
+ """Compute sparse vectors for a list of texts."""
520
+ if self._sparse_encoder is None:
521
+ raise ValueError(
522
+ "Sparse encoder not initialized. "
523
+ "Please provide model_name when creating the store with hybrid=True."
524
+ )
525
+ return self._sparse_encoder(texts)
526
+
218
527
  def add(
219
528
  self,
220
529
  nodes: List[BaseNode],
530
+ hybrid: Optional[bool] = None,
221
531
  **add_kwargs: Any,
222
532
  ) -> List[str]:
223
533
  """
@@ -225,11 +535,30 @@ class EndeeVectorStore(BasePydanticVectorStore):
225
535
 
226
536
  Args:
227
537
  nodes: List[BaseNode]: list of nodes with embeddings
538
+ hybrid: If True, compute and include sparse vectors for hybrid search.
539
+ Defaults to self.hybrid if not specified.
228
540
  """
541
+ # Use instance hybrid setting if not explicitly provided
542
+ use_hybrid = hybrid if hybrid is not None else self.hybrid
543
+
229
544
  ids = []
230
545
  entries = []
546
+ texts = []
547
+
548
+ # Collect texts for sparse encoding if hybrid mode
549
+ if use_hybrid:
550
+ for node in nodes:
551
+ text = node.get_content()
552
+ texts.append(text)
553
+
554
+ # Compute sparse vectors in batch
555
+ if self._sparse_encoder is not None and texts:
556
+ sparse_indices, sparse_values = self._compute_sparse_vectors(texts)
557
+ else:
558
+ sparse_indices = [[] for _ in texts]
559
+ sparse_values = [[] for _ in texts]
231
560
 
232
- for node in nodes:
561
+ for i, node in enumerate(nodes):
233
562
  node_id = node.node_id
234
563
  metadata = node_to_metadata_dict(node)
235
564
 
@@ -253,12 +582,24 @@ class EndeeVectorStore(BasePydanticVectorStore):
253
582
  filter_data["feature"] = metadata["feature"]
254
583
 
255
584
 
256
- entry = {
257
- "id": node_id,
258
- "vector": node.get_embedding(),
259
- "meta": metadata,
260
- "filter": filter_data
261
- }
585
+ # Build entry based on hybrid mode
586
+ if use_hybrid:
587
+ entry = {
588
+ "id": node_id,
589
+ "dense_vector": node.get_embedding(),
590
+ "sparse_vector": {
591
+ "indices": sparse_indices[i],
592
+ "values": sparse_values[i]
593
+ },
594
+ "meta": metadata,
595
+ }
596
+ else:
597
+ entry = {
598
+ "id": node_id,
599
+ "vector": node.get_embedding(),
600
+ "meta": metadata,
601
+ "filter": filter_data
602
+ }
262
603
 
263
604
  ids.append(node_id)
264
605
  entries.append(entry)
@@ -287,14 +628,36 @@ class EndeeVectorStore(BasePydanticVectorStore):
287
628
  def client(self) -> Any:
288
629
  """Return Endee index client."""
289
630
  return self._endee_index
631
+
290
632
 
291
- def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
633
+ def query(
634
+ self,
635
+ query: VectorStoreQuery,
636
+ hybrid: Optional[bool] = None,
637
+ sparse_query_text: Optional[str] = None,
638
+ sparse_top_k: Optional[int] = None,
639
+ dense_top_k: Optional[int] = None,
640
+ rrf_k: int = 60,
641
+ **kwargs: Any,
642
+ ) -> VectorStoreQueryResult:
292
643
  """
293
644
  Query index for top k most similar nodes.
294
645
 
295
646
  Args:
296
647
  query: VectorStoreQuery object containing query parameters
648
+ hybrid: If True, perform hybrid search with sparse vectors.
649
+ Defaults to self.hybrid if not specified.
650
+ sparse_query_text: Text to compute sparse vector for query.
651
+ If not provided, uses query.query_str if available.
652
+ sparse_top_k: Top K results from sparse search (for hybrid).
653
+ Defaults to query.similarity_top_k if not specified.
654
+ dense_top_k: Top K results from dense search (for hybrid).
655
+ Defaults to query.similarity_top_k if not specified.
656
+ rrf_k: Reciprocal Rank Fusion parameter (default: 60).
297
657
  """
658
+ # Use instance hybrid setting if not explicitly provided
659
+ use_hybrid = hybrid if hybrid is not None else self.hybrid
660
+
298
661
  if not hasattr(self._endee_index, 'dimension'):
299
662
  # Get dimension from index if available, otherwise try to infer from query
300
663
  try:
@@ -344,14 +707,42 @@ class EndeeVectorStore(BasePydanticVectorStore):
344
707
  # Apply alpha scaling in hybrid mode
345
708
  query_embedding = [v * query.alpha for v in query_embedding]
346
709
 
710
+ # Compute sparse query vector if hybrid mode
711
+ sparse_vector = {"indices": [], "values": []}
712
+
713
+ if use_hybrid:
714
+ query_text = sparse_query_text or getattr(query, 'query_str', None)
715
+ if query_text and self._sparse_encoder is not None:
716
+ sparse_indices_batch, sparse_values_batch = self._compute_sparse_vectors([query_text])
717
+ sparse_vector = {
718
+ "indices": sparse_indices_batch[0],
719
+ "values": sparse_values_batch[0]
720
+ }
721
+
722
+ # Set default top_k values for hybrid search
723
+ use_sparse_top_k = sparse_top_k if sparse_top_k is not None else query.similarity_top_k
724
+ use_dense_top_k = dense_top_k if dense_top_k is not None else query.similarity_top_k
725
+
347
726
  # Execute query
348
727
  try:
349
- results = self._endee_index.query(
350
- vector=query_embedding,
351
- top_k=query.similarity_top_k,
352
- filter=filters if filters else None,
353
- include_vectors=True
354
- )
728
+ if use_hybrid:
729
+ # Hybrid search using RRF (Reciprocal Rank Fusion)
730
+ results = self._endee_index.search(
731
+ dense_vector=query_embedding,
732
+ sparse_vector=sparse_vector,
733
+ sparse_top_k=use_sparse_top_k,
734
+ dense_top_k=use_dense_top_k,
735
+ include_vectors=True,
736
+ rrf_k=rrf_k,
737
+ )
738
+ else:
739
+ # Regular dense query
740
+ results = self._endee_index.query(
741
+ vector=query_embedding,
742
+ top_k=query.similarity_top_k,
743
+ filter=filters if filters else None,
744
+ include_vectors=True
745
+ )
355
746
  except Exception as e:
356
747
  _logger.error(f"Error querying Endee: {e}")
357
748
  return VectorStoreQueryResult(nodes=[], similarities=[], ids=[])
@@ -363,7 +754,7 @@ class EndeeVectorStore(BasePydanticVectorStore):
363
754
 
364
755
  for result in results:
365
756
  node_id = result["id"]
366
- score = result["similarity"]
757
+ score = result.get("similarity", result.get("score", 0.0))
367
758
 
368
759
  # Get metadata from result
369
760
  metadata = result.get("meta", {})
@@ -414,3 +805,5 @@ class EndeeVectorStore(BasePydanticVectorStore):
414
805
  ids.append(node_id)
415
806
 
416
807
  return VectorStoreQueryResult(nodes=nodes, similarities=similarities, ids=ids)
808
+
809
+