endee-llamaindex 0.1.3__py3-none-any.whl → 0.1.5a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,70 @@
1
+ """
2
+ Constants for EndeeVectorStore.
3
+
4
+ This module contains all constants used by the Endee LlamaIndex integration.
5
+ """
6
+
7
+ from llama_index.core.vector_stores.types import FilterOperator
8
+
9
+ # Endee default constants
10
+ # These may be overridden by importing from endee.constants if available
11
+ MAX_VECTORS_PER_BATCH = 1000
12
+ DEFAULT_EF_SEARCH = 128
13
+ MAX_TOP_K_ALLOWED = 512
14
+ MAX_EF_SEARCH_ALLOWED = 1024
15
+ MAX_DIMENSION_ALLOWED = 10000
16
+ MAX_INDEX_NAME_LENGTH_ALLOWED = 48
17
+
18
+ # Try to import constants from endee package to stay in sync
19
+ try:
20
+ from endee.constants import (
21
+ DEFAULT_EF_SEARCH as _DEFAULT_EF_SEARCH,
22
+ MAX_DIMENSION_ALLOWED as _MAX_DIMENSION_ALLOWED,
23
+ MAX_EF_SEARCH_ALLOWED as _MAX_EF_SEARCH_ALLOWED,
24
+ MAX_INDEX_NAME_LENGTH_ALLOWED as _MAX_INDEX_NAME_LENGTH_ALLOWED,
25
+ MAX_TOP_K_ALLOWED as _MAX_TOP_K_ALLOWED,
26
+ MAX_VECTORS_PER_BATCH as _MAX_VECTORS_PER_BATCH,
27
+ )
28
+ # Override defaults with values from endee package
29
+ DEFAULT_EF_SEARCH = _DEFAULT_EF_SEARCH
30
+ MAX_DIMENSION_ALLOWED = _MAX_DIMENSION_ALLOWED
31
+ MAX_EF_SEARCH_ALLOWED = _MAX_EF_SEARCH_ALLOWED
32
+ MAX_INDEX_NAME_LENGTH_ALLOWED = _MAX_INDEX_NAME_LENGTH_ALLOWED
33
+ MAX_TOP_K_ALLOWED = _MAX_TOP_K_ALLOWED
34
+ MAX_VECTORS_PER_BATCH = _MAX_VECTORS_PER_BATCH
35
+ except ImportError:
36
+ pass
37
+
38
+ # Space types and precision types for index creation
39
+ SPACE_TYPES_VALID = ("cosine", "l2", "ip")
40
+ PRECISION_VALID = ("binary", "float16", "float32", "int16d", "int8d")
41
+
42
+ # Space type mapping (aliases)
43
+ SPACE_TYPE_MAP = {
44
+ "cosine": "cosine",
45
+ "l2": "l2",
46
+ "ip": "ip",
47
+ "euclidean": "l2",
48
+ "inner_product": "ip",
49
+ }
50
+
51
+ # Vector store keys
52
+ ID_KEY = "id"
53
+ VECTOR_KEY = "values"
54
+ SPARSE_VECTOR_KEY = "sparse_values"
55
+ METADATA_KEY = "metadata"
56
+
57
+ # Batch size for add(); capped by MAX_VECTORS_PER_BATCH
58
+ DEFAULT_BATCH_SIZE = 100
59
+
60
+ # Supported filter operations: currently only EQ and IN.
61
+ # Map FilterOperator -> endee/backend filter symbol.
62
+ SUPPORTED_FILTER_OPERATORS = (
63
+ FilterOperator.EQ, # eq -> $eq
64
+ FilterOperator.IN, # in -> $in
65
+ )
66
+
67
+ REVERSE_OPERATOR_MAP = {
68
+ FilterOperator.EQ: "$eq",
69
+ FilterOperator.IN: "$in",
70
+ }
endee_llamaindex/utils.py CHANGED
@@ -1,29 +1,8 @@
1
1
  import logging
2
- from collections import Counter
3
- from typing import Any, Callable, Dict, List, Optional, cast
4
- import json
5
-
6
- from llama_index.core.bridge.pydantic import PrivateAttr
7
- from llama_index.core.schema import BaseNode, TextNode
8
- from llama_index.core.vector_stores.types import (
9
- BasePydanticVectorStore,
10
- MetadataFilters,
11
- VectorStoreQuery,
12
- VectorStoreQueryMode,
13
- VectorStoreQueryResult,
14
- )
15
- from llama_index.core.vector_stores.utils import (
16
- DEFAULT_TEXT_KEY,
17
- legacy_metadata_dict_to_node,
18
- metadata_dict_to_node,
19
- node_to_metadata_dict,
20
- )
21
- from llama_index.core.vector_stores.types import MetadataFilter, FilterOperator
2
+ from typing import Any, Callable, List, Optional
22
3
 
23
4
  _logger = logging.getLogger(__name__)
24
5
 
25
- DEFAULT_BATCH_SIZE = 100
26
-
27
6
  # Supported sparse embedding models
28
7
  SUPPORTED_SPARSE_MODELS = {
29
8
  "splade_pp": "prithivida/Splade_PP_en_v1",
@@ -36,23 +15,11 @@ SUPPORTED_SPARSE_MODELS = {
36
15
  "xlm_roberta": "xlm-roberta-base",
37
16
  }
38
17
 
39
- reverse_operator_map = {
40
- FilterOperator.EQ: "$eq",
41
- FilterOperator.NE: "$ne",
42
- FilterOperator.GT: "$gt",
43
- FilterOperator.GTE: "$gte",
44
- FilterOperator.LT: "$lt",
45
- FilterOperator.LTE: "$lte",
46
- FilterOperator.IN: "$in",
47
- FilterOperator.NIN: "$nin",
48
- }
49
-
50
18
 
51
19
  def _import_endee() -> Any:
52
20
  """Import endee module."""
53
21
  try:
54
22
  import endee
55
- from endee.endee import Endee
56
23
  except ImportError as e:
57
24
  raise ImportError(
58
25
  "Could not import endee python package. "
@@ -61,30 +28,6 @@ def _import_endee() -> Any:
61
28
  return endee
62
29
 
63
30
 
64
- def build_dict(input_batch: List[List[int]]) -> List[Dict[str, Any]]:
65
- """
66
- Build a list of sparse dictionaries from a batch of input_ids.
67
- """
68
- sparse_emb = []
69
- for token_ids in input_batch:
70
- indices = []
71
- values = []
72
- d = dict(Counter(token_ids))
73
- for idx in d:
74
- indices.append(idx)
75
- values.append(float(d[idx]))
76
- sparse_emb.append({"indices": indices, "values": values})
77
- return sparse_emb
78
-
79
-
80
- def generate_sparse_vectors(
81
- context_batch: List[str], tokenizer: Callable
82
- ) -> List[Dict[str, Any]]:
83
- """Generate sparse vectors from a batch of contexts."""
84
- inputs = tokenizer(context_batch)["input_ids"]
85
- return build_dict(inputs)
86
-
87
-
88
31
  def _initialize_sparse_encoder_fastembed(
89
32
  model_name: str,
90
33
  batch_size: int = 256,
@@ -98,9 +41,12 @@ def _initialize_sparse_encoder_fastembed(
98
41
  from fastembed.sparse.sparse_text_embedding import SparseTextEmbedding
99
42
  except ImportError as e:
100
43
  raise ImportError(
101
- "Could not import FastEmbed. "
102
- "Please install it with `pip install fastembed` or "
103
- "`pip install fastembed-gpu` for GPU support."
44
+ "FastEmbed is required for hybrid search but not installed.\n"
45
+ "Install options:\n"
46
+ " - CPU: pip install endee-llamaindex[hybrid]\n"
47
+ " - GPU: pip install endee-llamaindex[hybrid-gpu]\n"
48
+ " - Or: pip install fastembed\n"
49
+ "For dense-only search, create vector store without sparse_dim/model_name."
104
50
  ) from e
105
51
 
106
52
  resolved_model_name = SUPPORTED_SPARSE_MODELS.get(model_name, model_name)
@@ -212,529 +158,3 @@ def get_sparse_encoder(
212
158
  )
213
159
  else:
214
160
  return _initialize_sparse_encoder_transformers(model_name=model_name)
215
-
216
-
217
- class EndeeHybridVectorStore(BasePydanticVectorStore):
218
- """
219
- Endee Hybrid Vector Store for combined dense and sparse vector search.
220
-
221
- This class provides hybrid search capabilities using both dense embeddings
222
- and sparse vectors (e.g., SPLADE, BM25-style) for improved retrieval.
223
- """
224
-
225
- stores_text: bool = True
226
- flat_metadata: bool = False
227
-
228
- api_token: Optional[str]
229
- index_name: Optional[str]
230
- space_type: Optional[str]
231
- dimension: Optional[int]
232
- vocab_size: int
233
- insert_kwargs: Optional[Dict]
234
- text_key: str
235
- batch_size: int
236
- remove_text_from_metadata: bool
237
- model_name: Optional[str]
238
- use_fastembed: bool
239
- alpha: float # Weight for dense vs sparse (0=sparse only, 1=dense only)
240
-
241
- _endee_index: Any = PrivateAttr()
242
- _sparse_encoder: Optional[Callable] = PrivateAttr(default=None)
243
-
244
- def __init__(
245
- self,
246
- endee_index: Optional[Any] = None,
247
- api_token: Optional[str] = None,
248
- index_name: Optional[str] = None,
249
- space_type: Optional[str] = "cosine",
250
- dimension: Optional[int] = None,
251
- vocab_size: int = 30522, # Default BERT vocab size
252
- insert_kwargs: Optional[Dict] = None,
253
- text_key: str = DEFAULT_TEXT_KEY,
254
- batch_size: int = DEFAULT_BATCH_SIZE,
255
- remove_text_from_metadata: bool = False,
256
- model_name: Optional[str] = "splade_pp",
257
- use_fastembed: bool = True,
258
- alpha: float = 0.5,
259
- **kwargs: Any,
260
- ) -> None:
261
- insert_kwargs = insert_kwargs or {}
262
-
263
- super().__init__(
264
- index_name=index_name,
265
- api_token=api_token,
266
- space_type=space_type,
267
- dimension=dimension,
268
- vocab_size=vocab_size,
269
- insert_kwargs=insert_kwargs,
270
- text_key=text_key,
271
- batch_size=batch_size,
272
- remove_text_from_metadata=remove_text_from_metadata,
273
- model_name=model_name,
274
- use_fastembed=use_fastembed,
275
- alpha=alpha,
276
- )
277
-
278
- # Initialize hybrid index
279
- if endee_index is not None:
280
- self._endee_index = endee_index
281
- else:
282
- self._endee_index = self._initialize_hybrid_index(
283
- api_token, index_name, dimension, space_type, vocab_size
284
- )
285
-
286
- # Initialize sparse encoder
287
- if model_name:
288
- _logger.info(f"Initializing sparse encoder with model: {model_name}")
289
- self._sparse_encoder = get_sparse_encoder(
290
- model_name=model_name,
291
- use_fastembed=use_fastembed,
292
- batch_size=batch_size,
293
- )
294
- else:
295
- self._sparse_encoder = None
296
-
297
- @classmethod
298
- def _initialize_hybrid_index(
299
- cls,
300
- api_token: Optional[str],
301
- index_name: Optional[str],
302
- dimension: Optional[int] = None,
303
- space_type: Optional[str] = "cosine",
304
- vocab_size: Optional[int] = None,
305
- ) -> Any:
306
- """Initialize Endee hybrid index."""
307
- _import_endee()
308
- from endee.endee import Endee
309
-
310
- nd = Endee(token=api_token)
311
-
312
- try:
313
- index = nd.get_hybrid_index(name=index_name)
314
- _logger.info(f"Retrieved existing hybrid index: {index_name}")
315
- return index
316
- except Exception as e:
317
- if dimension is None:
318
- raise ValueError(
319
- "Must provide dimension when creating a new hybrid index"
320
- ) from e
321
- if vocab_size is None:
322
- raise ValueError(
323
- "Must provide vocab_size when creating a new hybrid index"
324
- ) from e
325
-
326
- _logger.info(f"Creating new hybrid index: {index_name}")
327
- nd.create_hybrid_index(
328
- name=index_name,
329
- dimension=dimension,
330
- space_type=space_type,
331
- vocab_size=vocab_size,
332
- )
333
- return nd.get_hybrid_index(name=index_name)
334
-
335
- @classmethod
336
- def from_params(
337
- cls,
338
- api_token: Optional[str] = None,
339
- index_name: Optional[str] = None,
340
- dimension: Optional[int] = None,
341
- space_type: str = "cosine",
342
- vocab_size: int = 30522,
343
- batch_size: int = DEFAULT_BATCH_SIZE,
344
- model_name: Optional[str] = "splade_pp",
345
- use_fastembed: bool = True,
346
- alpha: float = 0.5,
347
- ) -> "EndeeHybridVectorStore":
348
- """
349
- Create EndeeHybridVectorStore from parameters.
350
-
351
- Args:
352
- api_token: API token for Endee service
353
- index_name: Name of the hybrid index
354
- dimension: Vector dimension for dense embeddings
355
- space_type: Distance metric ("cosine", "l2", or "ip")
356
- vocab_size: Vocabulary size for sparse vectors
357
- batch_size: Batch size for operations
358
- model_name: Model name or alias for sparse embeddings
359
- Supported models:
360
- - 'splade_pp': prithivida/Splade_PP_en_v1
361
- - 'splade_cocondenser': naver/splade-cocondenser-ensembledistil
362
- - 'bert_base': bert-base-uncased
363
- - 'distilbert': distilbert-base-uncased
364
- - 'minilm': sentence-transformers/all-MiniLM-L6-v2
365
- - 'mpnet': sentence-transformers/all-mpnet-base-v2
366
- - 'roberta': roberta-base
367
- - 'xlm_roberta': xlm-roberta-base
368
- use_fastembed: Use FastEmbed for sparse encoding (recommended)
369
- alpha: Weight for hybrid search (0=sparse only, 1=dense only, 0.5=balanced)
370
- """
371
- endee_index = cls._initialize_hybrid_index(
372
- api_token, index_name, dimension, space_type, vocab_size
373
- )
374
-
375
- return cls(
376
- endee_index=endee_index,
377
- api_token=api_token,
378
- index_name=index_name,
379
- dimension=dimension,
380
- space_type=space_type,
381
- vocab_size=vocab_size,
382
- batch_size=batch_size,
383
- model_name=model_name,
384
- use_fastembed=use_fastembed,
385
- alpha=alpha,
386
- )
387
-
388
- @classmethod
389
- def class_name(cls) -> str:
390
- return "EndeeHybridVectorStore"
391
-
392
- def _compute_sparse_vectors(self, texts: List[str]) -> tuple:
393
- """Compute sparse vectors for a list of texts."""
394
- if self._sparse_encoder is None:
395
- raise ValueError(
396
- "Sparse encoder not initialized. "
397
- "Please provide model_name when creating the store."
398
- )
399
- return self._sparse_encoder(texts)
400
-
401
- def add(
402
- self,
403
- nodes: List[BaseNode],
404
- **add_kwargs: Any,
405
- ) -> List[str]:
406
- """
407
- Add nodes to hybrid index with both dense and sparse vectors.
408
-
409
- Args:
410
- nodes: List[BaseNode]: list of nodes with embeddings
411
- """
412
- ids = []
413
- entries = []
414
- texts = []
415
-
416
- # Collect all texts for batch sparse encoding
417
- for node in nodes:
418
- text = node.get_content()
419
- texts.append(text)
420
-
421
- # Compute sparse vectors in batch
422
- if self._sparse_encoder is not None and texts:
423
- sparse_indices, sparse_values = self._compute_sparse_vectors(texts)
424
- else:
425
- sparse_indices = [[] for _ in texts]
426
- sparse_values = [[] for _ in texts]
427
-
428
- for i, node in enumerate(nodes):
429
- node_id = node.node_id
430
- metadata = node_to_metadata_dict(node)
431
-
432
- # Filter values for hybrid index
433
- filter_data = {}
434
- for key in ["file_name", "doc_id", "category", "difficulty",
435
- "language", "field", "type", "feature"]:
436
- if key in metadata:
437
- filter_data[key] = metadata[key]
438
-
439
- entry = {
440
- "id": node_id,
441
- "vector": node.get_embedding(),
442
- "sparse_indices": sparse_indices[i],
443
- "sparse_values": sparse_values[i],
444
- "meta": metadata,
445
- "filter": filter_data
446
- }
447
-
448
- ids.append(node_id)
449
- entries.append(entry)
450
-
451
- # Batch upsert
452
- batch_size = self.batch_size
453
- for i in range(0, len(entries), batch_size):
454
- batch = entries[i : i + batch_size]
455
- self._endee_index.upsert(batch)
456
-
457
- return ids
458
-
459
- def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
460
- """
461
- Delete nodes using ref_doc_id.
462
-
463
- Args:
464
- ref_doc_id (str): The id of the document to delete.
465
- """
466
- try:
467
- self._endee_index.delete_with_filter({"doc_id": ref_doc_id})
468
- except Exception as e:
469
- _logger.error(f"Error deleting vectors for doc_id {ref_doc_id}: {e}")
470
-
471
- def delete_by_ids(self, ids: List[str], **delete_kwargs: Any) -> None:
472
- """
473
- Delete nodes by their IDs.
474
-
475
- Args:
476
- ids: List of node IDs to delete.
477
- """
478
- try:
479
- self._endee_index.delete(ids)
480
- except Exception as e:
481
- _logger.error(f"Error deleting vectors by IDs: {e}")
482
-
483
- def delete_with_filter(self, filter_dict: Dict[str, Any], **delete_kwargs: Any) -> None:
484
- """
485
- Delete nodes matching a filter.
486
-
487
- Args:
488
- filter_dict: Filter dictionary for deletion.
489
- """
490
- try:
491
- self._endee_index.delete_with_filter(filter_dict)
492
- except Exception as e:
493
- _logger.error(f"Error deleting vectors with filter: {e}")
494
-
495
- @property
496
- def client(self) -> Any:
497
- """Return Endee hybrid index client."""
498
- return self._endee_index
499
-
500
- def query(
501
- self,
502
- query: VectorStoreQuery,
503
- sparse_query_text: Optional[str] = None,
504
- alpha: Optional[float] = None,
505
- **kwargs: Any,
506
- ) -> VectorStoreQueryResult:
507
- """
508
- Query hybrid index for top k most similar nodes.
509
-
510
- Args:
511
- query: VectorStoreQuery object containing query parameters
512
- sparse_query_text: Optional text to compute sparse vector for query.
513
- If not provided, uses query.query_str if available.
514
- alpha: Optional weight override for this query (0=sparse only, 1=dense only)
515
- """
516
- # Get dimension
517
- try:
518
- dimension = self._endee_index.describe()["dimension"]
519
- except:
520
- if query.query_embedding is not None:
521
- dimension = len(query.query_embedding)
522
- else:
523
- raise ValueError("Could not determine vector dimension")
524
-
525
- query_embedding = [0.0] * dimension
526
- filters = {}
527
- use_alpha = alpha if alpha is not None else self.alpha
528
-
529
- # Build filters
530
- if query.filters is not None:
531
- for filter_item in query.filters.filters:
532
- if hasattr(filter_item, "key") and hasattr(filter_item, "value") and hasattr(filter_item, "operator"):
533
- op_symbol = reverse_operator_map.get(filter_item.operator)
534
- if not op_symbol:
535
- raise ValueError(f"Unsupported filter operator: {filter_item.operator}")
536
-
537
- if filter_item.key not in filters:
538
- filters[filter_item.key] = {}
539
- filters[filter_item.key][op_symbol] = filter_item.value
540
-
541
- elif isinstance(filter_item, dict):
542
- for key, op_dict in filter_item.items():
543
- if isinstance(op_dict, dict):
544
- for op, val in op_dict.items():
545
- if key not in filters:
546
- filters[key] = {}
547
- filters[key][op] = val
548
- else:
549
- raise ValueError(f"Unsupported filter format: {filter_item}")
550
-
551
- _logger.info(f"Final structured filters: {filters}")
552
-
553
- # Get dense query embedding
554
- if query.query_embedding is not None:
555
- query_embedding = cast(List[float], query.query_embedding)
556
-
557
- # Compute sparse query vector
558
- sparse_indices = []
559
- sparse_values = []
560
-
561
- query_text = sparse_query_text or getattr(query, 'query_str', None)
562
- if query_text and self._sparse_encoder is not None:
563
- sparse_indices_batch, sparse_values_batch = self._compute_sparse_vectors([query_text])
564
- sparse_indices = sparse_indices_batch[0]
565
- sparse_values = sparse_values_batch[0]
566
-
567
- # Execute hybrid query
568
- try:
569
- results = self._endee_index.query(
570
- vector=query_embedding,
571
- sparse_indices=sparse_indices,
572
- sparse_values=sparse_values,
573
- top_k=query.similarity_top_k,
574
- filter=filters if filters else None,
575
- include_vectors=True,
576
- alpha=use_alpha,
577
- )
578
- except Exception as e:
579
- _logger.error(f"Error querying Endee hybrid index: {e}")
580
- return VectorStoreQueryResult(nodes=[], similarities=[], ids=[])
581
-
582
- # Process results
583
- nodes = []
584
- similarities = []
585
- ids = []
586
-
587
- for result in results:
588
- node_id = result["id"]
589
- score = result.get("similarity", result.get("score", 0.0))
590
- metadata = result.get("meta", {})
591
-
592
- if self.flat_metadata:
593
- node = metadata_dict_to_node(
594
- metadata=metadata,
595
- text=metadata.pop(self.text_key, None),
596
- id_=node_id,
597
- )
598
- else:
599
- metadata_dict, node_info, relationships = legacy_metadata_dict_to_node(
600
- metadata=metadata,
601
- text_key=self.text_key,
602
- )
603
-
604
- _node_content_str = metadata.get("_node_content", "{}")
605
- try:
606
- node_content = json.loads(_node_content_str)
607
- except json.JSONDecodeError:
608
- node_content = {}
609
-
610
- text = node_content.get(self.text_key, "")
611
- node = TextNode(
612
- text=text,
613
- metadata=metadata_dict,
614
- relationships=relationships,
615
- node_id=node_id,
616
- )
617
-
618
- for key, val in node_info.items():
619
- if hasattr(node, key):
620
- setattr(node, key, val)
621
-
622
- if "vector" in result:
623
- node.embedding = result["vector"]
624
-
625
- nodes.append(node)
626
- similarities.append(score)
627
- ids.append(node_id)
628
-
629
- return VectorStoreQueryResult(nodes=nodes, similarities=similarities, ids=ids)
630
-
631
- def hybrid_query(
632
- self,
633
- query_text: str,
634
- query_embedding: List[float],
635
- top_k: int = 10,
636
- alpha: Optional[float] = None,
637
- filters: Optional[Dict[str, Any]] = None,
638
- ) -> VectorStoreQueryResult:
639
- """
640
- Direct hybrid query method for convenience.
641
-
642
- Args:
643
- query_text: Text query for sparse vector computation
644
- query_embedding: Dense embedding vector
645
- top_k: Number of results to return
646
- alpha: Weight for hybrid search (0=sparse, 1=dense)
647
- filters: Optional filter dictionary
648
-
649
- Returns:
650
- VectorStoreQueryResult with combined results
651
- """
652
- use_alpha = alpha if alpha is not None else self.alpha
653
-
654
- # Compute sparse vector
655
- sparse_indices = []
656
- sparse_values = []
657
- if self._sparse_encoder is not None:
658
- sparse_indices_batch, sparse_values_batch = self._compute_sparse_vectors([query_text])
659
- sparse_indices = sparse_indices_batch[0]
660
- sparse_values = sparse_values_batch[0]
661
-
662
- try:
663
- results = self._endee_index.query(
664
- vector=query_embedding,
665
- sparse_indices=sparse_indices,
666
- sparse_values=sparse_values,
667
- top_k=top_k,
668
- filter=filters,
669
- include_vectors=True,
670
- alpha=use_alpha,
671
- )
672
- except Exception as e:
673
- _logger.error(f"Error in hybrid query: {e}")
674
- return VectorStoreQueryResult(nodes=[], similarities=[], ids=[])
675
-
676
- nodes = []
677
- similarities = []
678
- ids = []
679
-
680
- for result in results:
681
- node_id = result["id"]
682
- score = result.get("similarity", result.get("score", 0.0))
683
- metadata = result.get("meta", {})
684
-
685
- metadata_dict, node_info, relationships = legacy_metadata_dict_to_node(
686
- metadata=metadata,
687
- text_key=self.text_key,
688
- )
689
-
690
- _node_content_str = metadata.get("_node_content", "{}")
691
- try:
692
- node_content = json.loads(_node_content_str)
693
- except json.JSONDecodeError:
694
- node_content = {}
695
-
696
- text = node_content.get(self.text_key, "")
697
- node = TextNode(
698
- text=text,
699
- metadata=metadata_dict,
700
- relationships=relationships,
701
- node_id=node_id,
702
- )
703
-
704
- for key, val in node_info.items():
705
- if hasattr(node, key):
706
- setattr(node, key, val)
707
-
708
- if "vector" in result:
709
- node.embedding = result["vector"]
710
-
711
- nodes.append(node)
712
- similarities.append(score)
713
- ids.append(node_id)
714
-
715
- return VectorStoreQueryResult(nodes=nodes, similarities=similarities, ids=ids)
716
-
717
- def describe(self) -> Dict[str, Any]:
718
- """Get index description/stats."""
719
- try:
720
- return self._endee_index.describe()
721
- except Exception as e:
722
- _logger.error(f"Error describing index: {e}")
723
- return {}
724
-
725
- def list_ids(self, limit: int = 100) -> List[str]:
726
- """List IDs in the index."""
727
- try:
728
- return self._endee_index.list_ids(limit=limit)
729
- except Exception as e:
730
- _logger.error(f"Error listing IDs: {e}")
731
- return []
732
-
733
- def fetch(self, ids: List[str]) -> List[Dict[str, Any]]:
734
- """Fetch vectors by IDs."""
735
- try:
736
- return self._endee_index.fetch(ids)
737
- except Exception as e:
738
- _logger.error(f"Error fetching vectors: {e}")
739
- return []
740
-