endee-llamaindex 0.1.2__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- endee_llamaindex/base.py +652 -304
- endee_llamaindex/constants.py +70 -0
- endee_llamaindex/utils.py +160 -0
- endee_llamaindex-0.1.5.dist-info/METADATA +615 -0
- endee_llamaindex-0.1.5.dist-info/RECORD +8 -0
- {endee_llamaindex-0.1.2.dist-info → endee_llamaindex-0.1.5.dist-info}/WHEEL +1 -1
- endee_llamaindex-0.1.2.dist-info/METADATA +0 -140
- endee_llamaindex-0.1.2.dist-info/RECORD +0 -6
- {endee_llamaindex-0.1.2.dist-info → endee_llamaindex-0.1.5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Constants for EndeeVectorStore.
|
|
3
|
+
|
|
4
|
+
This module contains all constants used by the Endee LlamaIndex integration.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from llama_index.core.vector_stores.types import FilterOperator
|
|
8
|
+
|
|
9
|
+
# Endee default constants
|
|
10
|
+
# These may be overridden by importing from endee.constants if available
|
|
11
|
+
MAX_VECTORS_PER_BATCH = 1000
|
|
12
|
+
DEFAULT_EF_SEARCH = 128
|
|
13
|
+
MAX_TOP_K_ALLOWED = 512
|
|
14
|
+
MAX_EF_SEARCH_ALLOWED = 1024
|
|
15
|
+
MAX_DIMENSION_ALLOWED = 10000
|
|
16
|
+
MAX_INDEX_NAME_LENGTH_ALLOWED = 48
|
|
17
|
+
|
|
18
|
+
# Try to import constants from endee package to stay in sync
|
|
19
|
+
try:
|
|
20
|
+
from endee.constants import (
|
|
21
|
+
DEFAULT_EF_SEARCH as _DEFAULT_EF_SEARCH,
|
|
22
|
+
MAX_DIMENSION_ALLOWED as _MAX_DIMENSION_ALLOWED,
|
|
23
|
+
MAX_EF_SEARCH_ALLOWED as _MAX_EF_SEARCH_ALLOWED,
|
|
24
|
+
MAX_INDEX_NAME_LENGTH_ALLOWED as _MAX_INDEX_NAME_LENGTH_ALLOWED,
|
|
25
|
+
MAX_TOP_K_ALLOWED as _MAX_TOP_K_ALLOWED,
|
|
26
|
+
MAX_VECTORS_PER_BATCH as _MAX_VECTORS_PER_BATCH,
|
|
27
|
+
)
|
|
28
|
+
# Override defaults with values from endee package
|
|
29
|
+
DEFAULT_EF_SEARCH = _DEFAULT_EF_SEARCH
|
|
30
|
+
MAX_DIMENSION_ALLOWED = _MAX_DIMENSION_ALLOWED
|
|
31
|
+
MAX_EF_SEARCH_ALLOWED = _MAX_EF_SEARCH_ALLOWED
|
|
32
|
+
MAX_INDEX_NAME_LENGTH_ALLOWED = _MAX_INDEX_NAME_LENGTH_ALLOWED
|
|
33
|
+
MAX_TOP_K_ALLOWED = _MAX_TOP_K_ALLOWED
|
|
34
|
+
MAX_VECTORS_PER_BATCH = _MAX_VECTORS_PER_BATCH
|
|
35
|
+
except ImportError:
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
# Space types and precision types for index creation
|
|
39
|
+
SPACE_TYPES_VALID = ("cosine", "l2", "ip")
|
|
40
|
+
PRECISION_VALID = ("binary", "float16", "float32", "int16d", "int8d")
|
|
41
|
+
|
|
42
|
+
# Space type mapping (aliases)
|
|
43
|
+
SPACE_TYPE_MAP = {
|
|
44
|
+
"cosine": "cosine",
|
|
45
|
+
"l2": "l2",
|
|
46
|
+
"ip": "ip",
|
|
47
|
+
"euclidean": "l2",
|
|
48
|
+
"inner_product": "ip",
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
# Vector store keys
|
|
52
|
+
ID_KEY = "id"
|
|
53
|
+
VECTOR_KEY = "values"
|
|
54
|
+
SPARSE_VECTOR_KEY = "sparse_values"
|
|
55
|
+
METADATA_KEY = "metadata"
|
|
56
|
+
|
|
57
|
+
# Batch size for add(); capped by MAX_VECTORS_PER_BATCH
|
|
58
|
+
DEFAULT_BATCH_SIZE = 100
|
|
59
|
+
|
|
60
|
+
# Supported filter operations: currently only EQ and IN.
|
|
61
|
+
# Map FilterOperator -> endee/backend filter symbol.
|
|
62
|
+
SUPPORTED_FILTER_OPERATORS = (
|
|
63
|
+
FilterOperator.EQ, # eq -> $eq
|
|
64
|
+
FilterOperator.IN, # in -> $in
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
REVERSE_OPERATOR_MAP = {
|
|
68
|
+
FilterOperator.EQ: "$eq",
|
|
69
|
+
FilterOperator.IN: "$in",
|
|
70
|
+
}
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any, Callable, List, Optional
|
|
3
|
+
|
|
4
|
+
_logger = logging.getLogger(__name__)
|
|
5
|
+
|
|
6
|
+
# Supported sparse embedding models
|
|
7
|
+
SUPPORTED_SPARSE_MODELS = {
|
|
8
|
+
"splade_pp": "prithivida/Splade_PP_en_v1",
|
|
9
|
+
"splade_cocondenser": "naver/splade-cocondenser-ensembledistil",
|
|
10
|
+
"bert_base": "bert-base-uncased",
|
|
11
|
+
"distilbert": "distilbert-base-uncased",
|
|
12
|
+
"minilm": "sentence-transformers/all-MiniLM-L6-v2",
|
|
13
|
+
"mpnet": "sentence-transformers/all-mpnet-base-v2",
|
|
14
|
+
"roberta": "roberta-base",
|
|
15
|
+
"xlm_roberta": "xlm-roberta-base",
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _import_endee() -> Any:
|
|
20
|
+
"""Import endee module."""
|
|
21
|
+
try:
|
|
22
|
+
import endee
|
|
23
|
+
except ImportError as e:
|
|
24
|
+
raise ImportError(
|
|
25
|
+
"Could not import endee python package. "
|
|
26
|
+
"Please install it with `pip install endee`."
|
|
27
|
+
) from e
|
|
28
|
+
return endee
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _initialize_sparse_encoder_fastembed(
|
|
32
|
+
model_name: str,
|
|
33
|
+
batch_size: int = 256,
|
|
34
|
+
cache_dir: Optional[str] = None,
|
|
35
|
+
threads: Optional[int] = None,
|
|
36
|
+
) -> Callable:
|
|
37
|
+
"""
|
|
38
|
+
Initialize a sparse encoder using FastEmbed (recommended for SPLADE models).
|
|
39
|
+
"""
|
|
40
|
+
try:
|
|
41
|
+
from fastembed.sparse.sparse_text_embedding import SparseTextEmbedding
|
|
42
|
+
except ImportError as e:
|
|
43
|
+
raise ImportError(
|
|
44
|
+
"FastEmbed is required for hybrid search but not installed.\n"
|
|
45
|
+
"Install options:\n"
|
|
46
|
+
" - CPU: pip install endee-llamaindex[hybrid]\n"
|
|
47
|
+
" - GPU: pip install endee-llamaindex[hybrid-gpu]\n"
|
|
48
|
+
" - Or: pip install fastembed\n"
|
|
49
|
+
"For dense-only search, create vector store without sparse_dim/model_name."
|
|
50
|
+
) from e
|
|
51
|
+
|
|
52
|
+
resolved_model_name = SUPPORTED_SPARSE_MODELS.get(model_name, model_name)
|
|
53
|
+
|
|
54
|
+
try:
|
|
55
|
+
model = SparseTextEmbedding(
|
|
56
|
+
resolved_model_name,
|
|
57
|
+
cache_dir=cache_dir,
|
|
58
|
+
threads=threads,
|
|
59
|
+
providers=["CUDAExecutionProvider"],
|
|
60
|
+
)
|
|
61
|
+
_logger.info(f"Initialized sparse encoder '{resolved_model_name}' on GPU")
|
|
62
|
+
except Exception:
|
|
63
|
+
model = SparseTextEmbedding(
|
|
64
|
+
resolved_model_name,
|
|
65
|
+
cache_dir=cache_dir,
|
|
66
|
+
threads=threads
|
|
67
|
+
)
|
|
68
|
+
_logger.info(f"Initialized sparse encoder '{resolved_model_name}' on CPU")
|
|
69
|
+
|
|
70
|
+
def compute_vectors(texts: List[str]) -> tuple:
|
|
71
|
+
"""Compute sparse vectors (indices, values) for a list of texts."""
|
|
72
|
+
embeddings = model.embed(texts, batch_size=batch_size)
|
|
73
|
+
indices = []
|
|
74
|
+
values = []
|
|
75
|
+
for embedding in embeddings:
|
|
76
|
+
indices.append(embedding.indices.tolist())
|
|
77
|
+
values.append(embedding.values.tolist())
|
|
78
|
+
return indices, values
|
|
79
|
+
|
|
80
|
+
return compute_vectors
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _initialize_sparse_encoder_transformers(model_name: str) -> Callable:
|
|
84
|
+
"""
|
|
85
|
+
Initialize a sparse encoder using Transformers library.
|
|
86
|
+
"""
|
|
87
|
+
try:
|
|
88
|
+
import torch
|
|
89
|
+
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
|
90
|
+
except ImportError as e:
|
|
91
|
+
raise ImportError(
|
|
92
|
+
"Could not import transformers library. "
|
|
93
|
+
'Please install transformers with `pip install "transformers[torch]"`'
|
|
94
|
+
) from e
|
|
95
|
+
|
|
96
|
+
resolved_model_name = SUPPORTED_SPARSE_MODELS.get(model_name, model_name)
|
|
97
|
+
|
|
98
|
+
tokenizer = AutoTokenizer.from_pretrained(resolved_model_name)
|
|
99
|
+
model = AutoModelForMaskedLM.from_pretrained(resolved_model_name)
|
|
100
|
+
|
|
101
|
+
if torch.cuda.is_available():
|
|
102
|
+
model = model.to("cuda")
|
|
103
|
+
_logger.info(f"Initialized sparse encoder '{resolved_model_name}' on GPU")
|
|
104
|
+
else:
|
|
105
|
+
_logger.info(f"Initialized sparse encoder '{resolved_model_name}' on CPU")
|
|
106
|
+
|
|
107
|
+
def compute_vectors(texts: List[str]) -> tuple:
|
|
108
|
+
"""Compute sparse vectors from logits."""
|
|
109
|
+
tokens = tokenizer(
|
|
110
|
+
texts,
|
|
111
|
+
truncation=True,
|
|
112
|
+
padding=True,
|
|
113
|
+
max_length=512,
|
|
114
|
+
return_tensors="pt"
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
if torch.cuda.is_available():
|
|
118
|
+
tokens = tokens.to("cuda")
|
|
119
|
+
|
|
120
|
+
with torch.no_grad():
|
|
121
|
+
output = model(**tokens)
|
|
122
|
+
logits, attention_mask = output.logits, tokens.attention_mask
|
|
123
|
+
relu_log = torch.log(1 + torch.relu(logits))
|
|
124
|
+
weighted_log = relu_log * attention_mask.unsqueeze(-1)
|
|
125
|
+
tvecs, _ = torch.max(weighted_log, dim=1)
|
|
126
|
+
|
|
127
|
+
indices = []
|
|
128
|
+
values = []
|
|
129
|
+
for batch in tvecs:
|
|
130
|
+
nz_indices = batch.nonzero(as_tuple=True)[0].tolist()
|
|
131
|
+
indices.append(nz_indices)
|
|
132
|
+
values.append(batch[nz_indices].tolist())
|
|
133
|
+
|
|
134
|
+
return indices, values
|
|
135
|
+
|
|
136
|
+
return compute_vectors
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def get_sparse_encoder(
|
|
140
|
+
model_name: Optional[str] = None,
|
|
141
|
+
use_fastembed: bool = True,
|
|
142
|
+
batch_size: int = 256,
|
|
143
|
+
cache_dir: Optional[str] = None,
|
|
144
|
+
threads: Optional[int] = None,
|
|
145
|
+
) -> Optional[Callable]:
|
|
146
|
+
"""
|
|
147
|
+
Get a sparse encoder function for the specified model.
|
|
148
|
+
"""
|
|
149
|
+
if model_name is None:
|
|
150
|
+
return None
|
|
151
|
+
|
|
152
|
+
if use_fastembed:
|
|
153
|
+
return _initialize_sparse_encoder_fastembed(
|
|
154
|
+
model_name=model_name,
|
|
155
|
+
batch_size=batch_size,
|
|
156
|
+
cache_dir=cache_dir,
|
|
157
|
+
threads=threads,
|
|
158
|
+
)
|
|
159
|
+
else:
|
|
160
|
+
return _initialize_sparse_encoder_transformers(model_name=model_name)
|