endee-llamaindex 0.1.2__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,70 @@
1
+ """
2
+ Constants for EndeeVectorStore.
3
+
4
+ This module contains all constants used by the Endee LlamaIndex integration.
5
+ """
6
+
7
+ from llama_index.core.vector_stores.types import FilterOperator
8
+
9
+ # Endee default constants
10
+ # These may be overridden by importing from endee.constants if available
11
+ MAX_VECTORS_PER_BATCH = 1000
12
+ DEFAULT_EF_SEARCH = 128
13
+ MAX_TOP_K_ALLOWED = 512
14
+ MAX_EF_SEARCH_ALLOWED = 1024
15
+ MAX_DIMENSION_ALLOWED = 10000
16
+ MAX_INDEX_NAME_LENGTH_ALLOWED = 48
17
+
18
+ # Try to import constants from endee package to stay in sync
19
+ try:
20
+ from endee.constants import (
21
+ DEFAULT_EF_SEARCH as _DEFAULT_EF_SEARCH,
22
+ MAX_DIMENSION_ALLOWED as _MAX_DIMENSION_ALLOWED,
23
+ MAX_EF_SEARCH_ALLOWED as _MAX_EF_SEARCH_ALLOWED,
24
+ MAX_INDEX_NAME_LENGTH_ALLOWED as _MAX_INDEX_NAME_LENGTH_ALLOWED,
25
+ MAX_TOP_K_ALLOWED as _MAX_TOP_K_ALLOWED,
26
+ MAX_VECTORS_PER_BATCH as _MAX_VECTORS_PER_BATCH,
27
+ )
28
+ # Override defaults with values from endee package
29
+ DEFAULT_EF_SEARCH = _DEFAULT_EF_SEARCH
30
+ MAX_DIMENSION_ALLOWED = _MAX_DIMENSION_ALLOWED
31
+ MAX_EF_SEARCH_ALLOWED = _MAX_EF_SEARCH_ALLOWED
32
+ MAX_INDEX_NAME_LENGTH_ALLOWED = _MAX_INDEX_NAME_LENGTH_ALLOWED
33
+ MAX_TOP_K_ALLOWED = _MAX_TOP_K_ALLOWED
34
+ MAX_VECTORS_PER_BATCH = _MAX_VECTORS_PER_BATCH
35
+ except ImportError:
36
+ pass
37
+
38
+ # Space types and precision types for index creation
39
+ SPACE_TYPES_VALID = ("cosine", "l2", "ip")
40
+ PRECISION_VALID = ("binary", "float16", "float32", "int16d", "int8d")
41
+
42
+ # Space type mapping (aliases)
43
+ SPACE_TYPE_MAP = {
44
+ "cosine": "cosine",
45
+ "l2": "l2",
46
+ "ip": "ip",
47
+ "euclidean": "l2",
48
+ "inner_product": "ip",
49
+ }
50
+
51
+ # Vector store keys
52
+ ID_KEY = "id"
53
+ VECTOR_KEY = "values"
54
+ SPARSE_VECTOR_KEY = "sparse_values"
55
+ METADATA_KEY = "metadata"
56
+
57
+ # Batch size for add(); capped by MAX_VECTORS_PER_BATCH
58
+ DEFAULT_BATCH_SIZE = 100
59
+
60
+ # Supported filter operations: currently only EQ and IN.
61
+ # Map FilterOperator -> endee/backend filter symbol.
62
+ SUPPORTED_FILTER_OPERATORS = (
63
+ FilterOperator.EQ, # eq -> $eq
64
+ FilterOperator.IN, # in -> $in
65
+ )
66
+
67
+ REVERSE_OPERATOR_MAP = {
68
+ FilterOperator.EQ: "$eq",
69
+ FilterOperator.IN: "$in",
70
+ }
@@ -0,0 +1,160 @@
1
+ import logging
2
+ from typing import Any, Callable, List, Optional
3
+
4
+ _logger = logging.getLogger(__name__)
5
+
6
+ # Supported sparse embedding models
7
+ SUPPORTED_SPARSE_MODELS = {
8
+ "splade_pp": "prithivida/Splade_PP_en_v1",
9
+ "splade_cocondenser": "naver/splade-cocondenser-ensembledistil",
10
+ "bert_base": "bert-base-uncased",
11
+ "distilbert": "distilbert-base-uncased",
12
+ "minilm": "sentence-transformers/all-MiniLM-L6-v2",
13
+ "mpnet": "sentence-transformers/all-mpnet-base-v2",
14
+ "roberta": "roberta-base",
15
+ "xlm_roberta": "xlm-roberta-base",
16
+ }
17
+
18
+
19
+ def _import_endee() -> Any:
20
+ """Import endee module."""
21
+ try:
22
+ import endee
23
+ except ImportError as e:
24
+ raise ImportError(
25
+ "Could not import endee python package. "
26
+ "Please install it with `pip install endee`."
27
+ ) from e
28
+ return endee
29
+
30
+
31
+ def _initialize_sparse_encoder_fastembed(
32
+ model_name: str,
33
+ batch_size: int = 256,
34
+ cache_dir: Optional[str] = None,
35
+ threads: Optional[int] = None,
36
+ ) -> Callable:
37
+ """
38
+ Initialize a sparse encoder using FastEmbed (recommended for SPLADE models).
39
+ """
40
+ try:
41
+ from fastembed.sparse.sparse_text_embedding import SparseTextEmbedding
42
+ except ImportError as e:
43
+ raise ImportError(
44
+ "FastEmbed is required for hybrid search but not installed.\n"
45
+ "Install options:\n"
46
+ " - CPU: pip install endee-llamaindex[hybrid]\n"
47
+ " - GPU: pip install endee-llamaindex[hybrid-gpu]\n"
48
+ " - Or: pip install fastembed\n"
49
+ "For dense-only search, create vector store without sparse_dim/model_name."
50
+ ) from e
51
+
52
+ resolved_model_name = SUPPORTED_SPARSE_MODELS.get(model_name, model_name)
53
+
54
+ try:
55
+ model = SparseTextEmbedding(
56
+ resolved_model_name,
57
+ cache_dir=cache_dir,
58
+ threads=threads,
59
+ providers=["CUDAExecutionProvider"],
60
+ )
61
+ _logger.info(f"Initialized sparse encoder '{resolved_model_name}' on GPU")
62
+ except Exception:
63
+ model = SparseTextEmbedding(
64
+ resolved_model_name,
65
+ cache_dir=cache_dir,
66
+ threads=threads
67
+ )
68
+ _logger.info(f"Initialized sparse encoder '{resolved_model_name}' on CPU")
69
+
70
+ def compute_vectors(texts: List[str]) -> tuple:
71
+ """Compute sparse vectors (indices, values) for a list of texts."""
72
+ embeddings = model.embed(texts, batch_size=batch_size)
73
+ indices = []
74
+ values = []
75
+ for embedding in embeddings:
76
+ indices.append(embedding.indices.tolist())
77
+ values.append(embedding.values.tolist())
78
+ return indices, values
79
+
80
+ return compute_vectors
81
+
82
+
83
+ def _initialize_sparse_encoder_transformers(model_name: str) -> Callable:
84
+ """
85
+ Initialize a sparse encoder using Transformers library.
86
+ """
87
+ try:
88
+ import torch
89
+ from transformers import AutoModelForMaskedLM, AutoTokenizer
90
+ except ImportError as e:
91
+ raise ImportError(
92
+ "Could not import transformers library. "
93
+ 'Please install transformers with `pip install "transformers[torch]"`'
94
+ ) from e
95
+
96
+ resolved_model_name = SUPPORTED_SPARSE_MODELS.get(model_name, model_name)
97
+
98
+ tokenizer = AutoTokenizer.from_pretrained(resolved_model_name)
99
+ model = AutoModelForMaskedLM.from_pretrained(resolved_model_name)
100
+
101
+ if torch.cuda.is_available():
102
+ model = model.to("cuda")
103
+ _logger.info(f"Initialized sparse encoder '{resolved_model_name}' on GPU")
104
+ else:
105
+ _logger.info(f"Initialized sparse encoder '{resolved_model_name}' on CPU")
106
+
107
+ def compute_vectors(texts: List[str]) -> tuple:
108
+ """Compute sparse vectors from logits."""
109
+ tokens = tokenizer(
110
+ texts,
111
+ truncation=True,
112
+ padding=True,
113
+ max_length=512,
114
+ return_tensors="pt"
115
+ )
116
+
117
+ if torch.cuda.is_available():
118
+ tokens = tokens.to("cuda")
119
+
120
+ with torch.no_grad():
121
+ output = model(**tokens)
122
+ logits, attention_mask = output.logits, tokens.attention_mask
123
+ relu_log = torch.log(1 + torch.relu(logits))
124
+ weighted_log = relu_log * attention_mask.unsqueeze(-1)
125
+ tvecs, _ = torch.max(weighted_log, dim=1)
126
+
127
+ indices = []
128
+ values = []
129
+ for batch in tvecs:
130
+ nz_indices = batch.nonzero(as_tuple=True)[0].tolist()
131
+ indices.append(nz_indices)
132
+ values.append(batch[nz_indices].tolist())
133
+
134
+ return indices, values
135
+
136
+ return compute_vectors
137
+
138
+
139
+ def get_sparse_encoder(
140
+ model_name: Optional[str] = None,
141
+ use_fastembed: bool = True,
142
+ batch_size: int = 256,
143
+ cache_dir: Optional[str] = None,
144
+ threads: Optional[int] = None,
145
+ ) -> Optional[Callable]:
146
+ """
147
+ Get a sparse encoder function for the specified model.
148
+ """
149
+ if model_name is None:
150
+ return None
151
+
152
+ if use_fastembed:
153
+ return _initialize_sparse_encoder_fastembed(
154
+ model_name=model_name,
155
+ batch_size=batch_size,
156
+ cache_dir=cache_dir,
157
+ threads=threads,
158
+ )
159
+ else:
160
+ return _initialize_sparse_encoder_transformers(model_name=model_name)