headroom-ai 0.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- headroom/__init__.py +212 -0
- headroom/cache/__init__.py +76 -0
- headroom/cache/anthropic.py +517 -0
- headroom/cache/base.py +342 -0
- headroom/cache/compression_feedback.py +613 -0
- headroom/cache/compression_store.py +814 -0
- headroom/cache/dynamic_detector.py +1026 -0
- headroom/cache/google.py +884 -0
- headroom/cache/openai.py +584 -0
- headroom/cache/registry.py +175 -0
- headroom/cache/semantic.py +451 -0
- headroom/ccr/__init__.py +77 -0
- headroom/ccr/context_tracker.py +582 -0
- headroom/ccr/mcp_server.py +319 -0
- headroom/ccr/response_handler.py +772 -0
- headroom/ccr/tool_injection.py +415 -0
- headroom/cli.py +219 -0
- headroom/client.py +977 -0
- headroom/compression/__init__.py +42 -0
- headroom/compression/detector.py +424 -0
- headroom/compression/handlers/__init__.py +22 -0
- headroom/compression/handlers/base.py +219 -0
- headroom/compression/handlers/code_handler.py +506 -0
- headroom/compression/handlers/json_handler.py +418 -0
- headroom/compression/masks.py +345 -0
- headroom/compression/universal.py +465 -0
- headroom/config.py +474 -0
- headroom/exceptions.py +192 -0
- headroom/integrations/__init__.py +159 -0
- headroom/integrations/agno/__init__.py +53 -0
- headroom/integrations/agno/hooks.py +345 -0
- headroom/integrations/agno/model.py +625 -0
- headroom/integrations/agno/providers.py +154 -0
- headroom/integrations/langchain/__init__.py +106 -0
- headroom/integrations/langchain/agents.py +326 -0
- headroom/integrations/langchain/chat_model.py +1002 -0
- headroom/integrations/langchain/langsmith.py +324 -0
- headroom/integrations/langchain/memory.py +319 -0
- headroom/integrations/langchain/providers.py +200 -0
- headroom/integrations/langchain/retriever.py +371 -0
- headroom/integrations/langchain/streaming.py +341 -0
- headroom/integrations/mcp/__init__.py +37 -0
- headroom/integrations/mcp/server.py +533 -0
- headroom/memory/__init__.py +37 -0
- headroom/memory/extractor.py +390 -0
- headroom/memory/fast_store.py +621 -0
- headroom/memory/fast_wrapper.py +311 -0
- headroom/memory/inline_extractor.py +229 -0
- headroom/memory/store.py +434 -0
- headroom/memory/worker.py +260 -0
- headroom/memory/wrapper.py +321 -0
- headroom/models/__init__.py +39 -0
- headroom/models/registry.py +687 -0
- headroom/parser.py +293 -0
- headroom/pricing/__init__.py +51 -0
- headroom/pricing/anthropic_prices.py +81 -0
- headroom/pricing/litellm_pricing.py +113 -0
- headroom/pricing/openai_prices.py +91 -0
- headroom/pricing/registry.py +188 -0
- headroom/providers/__init__.py +61 -0
- headroom/providers/anthropic.py +621 -0
- headroom/providers/base.py +131 -0
- headroom/providers/cohere.py +362 -0
- headroom/providers/google.py +427 -0
- headroom/providers/litellm.py +297 -0
- headroom/providers/openai.py +566 -0
- headroom/providers/openai_compatible.py +521 -0
- headroom/proxy/__init__.py +19 -0
- headroom/proxy/server.py +2683 -0
- headroom/py.typed +0 -0
- headroom/relevance/__init__.py +124 -0
- headroom/relevance/base.py +106 -0
- headroom/relevance/bm25.py +255 -0
- headroom/relevance/embedding.py +255 -0
- headroom/relevance/hybrid.py +259 -0
- headroom/reporting/__init__.py +5 -0
- headroom/reporting/generator.py +549 -0
- headroom/storage/__init__.py +41 -0
- headroom/storage/base.py +125 -0
- headroom/storage/jsonl.py +220 -0
- headroom/storage/sqlite.py +289 -0
- headroom/telemetry/__init__.py +91 -0
- headroom/telemetry/collector.py +764 -0
- headroom/telemetry/models.py +880 -0
- headroom/telemetry/toin.py +1579 -0
- headroom/tokenizer.py +80 -0
- headroom/tokenizers/__init__.py +75 -0
- headroom/tokenizers/base.py +210 -0
- headroom/tokenizers/estimator.py +198 -0
- headroom/tokenizers/huggingface.py +317 -0
- headroom/tokenizers/mistral.py +245 -0
- headroom/tokenizers/registry.py +398 -0
- headroom/tokenizers/tiktoken_counter.py +248 -0
- headroom/transforms/__init__.py +106 -0
- headroom/transforms/base.py +57 -0
- headroom/transforms/cache_aligner.py +357 -0
- headroom/transforms/code_compressor.py +1313 -0
- headroom/transforms/content_detector.py +335 -0
- headroom/transforms/content_router.py +1158 -0
- headroom/transforms/llmlingua_compressor.py +638 -0
- headroom/transforms/log_compressor.py +529 -0
- headroom/transforms/pipeline.py +297 -0
- headroom/transforms/rolling_window.py +350 -0
- headroom/transforms/search_compressor.py +365 -0
- headroom/transforms/smart_crusher.py +2682 -0
- headroom/transforms/text_compressor.py +259 -0
- headroom/transforms/tool_crusher.py +338 -0
- headroom/utils.py +215 -0
- headroom_ai-0.2.13.dist-info/METADATA +315 -0
- headroom_ai-0.2.13.dist-info/RECORD +114 -0
- headroom_ai-0.2.13.dist-info/WHEEL +4 -0
- headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
- headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
- headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
"""Embedding-based relevance scorer for Headroom SDK.
|
|
2
|
+
|
|
3
|
+
This module provides semantic relevance scoring using sentence embeddings.
|
|
4
|
+
Requires the optional `sentence-transformers` dependency.
|
|
5
|
+
|
|
6
|
+
Key features:
|
|
7
|
+
- Semantic understanding ("errors" matches "failed", "issues")
|
|
8
|
+
- Handles paraphrases and synonyms
|
|
9
|
+
- Uses lightweight all-MiniLM-L6-v2 model by default (22M params)
|
|
10
|
+
- Batch encoding for efficiency
|
|
11
|
+
|
|
12
|
+
Install with: pip install headroom[relevance]
|
|
13
|
+
|
|
14
|
+
Limitations:
|
|
15
|
+
- Requires ~500MB for model download on first use
|
|
16
|
+
- ~5-10ms per batch (slower than BM25)
|
|
17
|
+
- May miss exact ID matches that BM25 catches
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import logging
|
|
23
|
+
from typing import TYPE_CHECKING
|
|
24
|
+
|
|
25
|
+
from .base import RelevanceScore, RelevanceScorer
|
|
26
|
+
|
|
27
|
+
# numpy is an optional dependency - import lazily
|
|
28
|
+
_numpy = None
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _get_numpy():
|
|
32
|
+
"""Lazily import numpy."""
|
|
33
|
+
global _numpy
|
|
34
|
+
if _numpy is None:
|
|
35
|
+
try:
|
|
36
|
+
import numpy as np
|
|
37
|
+
|
|
38
|
+
_numpy = np
|
|
39
|
+
except ImportError as e:
|
|
40
|
+
raise ImportError(
|
|
41
|
+
"numpy is required for EmbeddingScorer. "
|
|
42
|
+
"Install with: pip install headroom[relevance]"
|
|
43
|
+
) from e
|
|
44
|
+
return _numpy
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
if TYPE_CHECKING:
|
|
48
|
+
from sentence_transformers import SentenceTransformer
|
|
49
|
+
|
|
50
|
+
logger = logging.getLogger(__name__)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _cosine_similarity(a, b) -> float:
|
|
54
|
+
"""Compute cosine similarity between two vectors.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
a: First vector (numpy array).
|
|
58
|
+
b: Second vector (numpy array).
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
Cosine similarity in range [-1, 1], clamped to [0, 1].
|
|
62
|
+
"""
|
|
63
|
+
np = _get_numpy()
|
|
64
|
+
norm_a = np.linalg.norm(a)
|
|
65
|
+
norm_b = np.linalg.norm(b)
|
|
66
|
+
|
|
67
|
+
if norm_a == 0 or norm_b == 0:
|
|
68
|
+
return 0.0
|
|
69
|
+
|
|
70
|
+
similarity = float(np.dot(a, b) / (norm_a * norm_b))
|
|
71
|
+
# Clamp to [0, 1] since we only care about positive similarity
|
|
72
|
+
return max(0.0, min(1.0, similarity))
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class EmbeddingScorer(RelevanceScorer):
|
|
76
|
+
"""Semantic relevance scorer using sentence embeddings.
|
|
77
|
+
|
|
78
|
+
Uses sentence-transformers to compute dense embeddings and cosine similarity.
|
|
79
|
+
The default model (all-MiniLM-L6-v2) offers a good balance of speed and quality.
|
|
80
|
+
|
|
81
|
+
Example:
|
|
82
|
+
scorer = EmbeddingScorer()
|
|
83
|
+
score = scorer.score(
|
|
84
|
+
'{"status": "failed", "error": "connection refused"}',
|
|
85
|
+
"show me the errors"
|
|
86
|
+
)
|
|
87
|
+
# score.score > 0.5 (semantic match between "failed"/"error" and "errors")
|
|
88
|
+
|
|
89
|
+
Note:
|
|
90
|
+
Requires sentence-transformers: pip install headroom[relevance]
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
_model_cache: dict[str, SentenceTransformer] = {}
|
|
94
|
+
|
|
95
|
+
def __init__(
|
|
96
|
+
self,
|
|
97
|
+
model_name: str = "all-MiniLM-L6-v2",
|
|
98
|
+
device: str | None = None,
|
|
99
|
+
cache_model: bool = True,
|
|
100
|
+
):
|
|
101
|
+
"""Initialize embedding scorer.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
model_name: Sentence transformer model name.
|
|
105
|
+
Recommended models:
|
|
106
|
+
- "all-MiniLM-L6-v2": Fast, good quality (default)
|
|
107
|
+
- "all-mpnet-base-v2": Best quality, slower
|
|
108
|
+
- "paraphrase-MiniLM-L6-v2": Good for paraphrase detection
|
|
109
|
+
device: Device to use ('cpu', 'cuda', 'mps', or None for auto).
|
|
110
|
+
cache_model: If True, cache loaded models across instances.
|
|
111
|
+
"""
|
|
112
|
+
self.model_name = model_name
|
|
113
|
+
self.device = device
|
|
114
|
+
self.cache_model = cache_model
|
|
115
|
+
self._model: SentenceTransformer | None = None
|
|
116
|
+
self._available: bool | None = None
|
|
117
|
+
|
|
118
|
+
@classmethod
|
|
119
|
+
def is_available(cls) -> bool:
|
|
120
|
+
"""Check if sentence-transformers is installed.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
True if the package is available.
|
|
124
|
+
"""
|
|
125
|
+
try:
|
|
126
|
+
import sentence_transformers # noqa: F401
|
|
127
|
+
|
|
128
|
+
return True
|
|
129
|
+
except ImportError:
|
|
130
|
+
return False
|
|
131
|
+
|
|
132
|
+
def _get_model(self) -> SentenceTransformer:
|
|
133
|
+
"""Get or load the sentence transformer model.
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
Loaded SentenceTransformer model.
|
|
137
|
+
|
|
138
|
+
Raises:
|
|
139
|
+
RuntimeError: If sentence-transformers is not installed.
|
|
140
|
+
"""
|
|
141
|
+
if self._model is not None:
|
|
142
|
+
return self._model
|
|
143
|
+
|
|
144
|
+
if not self.is_available():
|
|
145
|
+
raise RuntimeError(
|
|
146
|
+
"EmbeddingScorer requires sentence-transformers. "
|
|
147
|
+
"Install with: pip install headroom[relevance]"
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
# Check cache
|
|
151
|
+
if self.cache_model and self.model_name in self._model_cache:
|
|
152
|
+
self._model = self._model_cache[self.model_name]
|
|
153
|
+
return self._model
|
|
154
|
+
|
|
155
|
+
# Load model
|
|
156
|
+
from sentence_transformers import SentenceTransformer
|
|
157
|
+
|
|
158
|
+
logger.info(f"Loading sentence transformer model: {self.model_name}")
|
|
159
|
+
self._model = SentenceTransformer(self.model_name, device=self.device)
|
|
160
|
+
|
|
161
|
+
if self.cache_model:
|
|
162
|
+
self._model_cache[self.model_name] = self._model
|
|
163
|
+
|
|
164
|
+
return self._model
|
|
165
|
+
|
|
166
|
+
def _encode(self, texts: list[str]):
|
|
167
|
+
"""Encode texts to embeddings.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
texts: List of texts to encode.
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
Array of embeddings, shape (len(texts), embedding_dim).
|
|
174
|
+
"""
|
|
175
|
+
model = self._get_model()
|
|
176
|
+
# normalize_embeddings=True ensures unit vectors for fast cosine via dot product
|
|
177
|
+
embeddings = model.encode(
|
|
178
|
+
texts,
|
|
179
|
+
convert_to_numpy=True,
|
|
180
|
+
normalize_embeddings=True,
|
|
181
|
+
show_progress_bar=False,
|
|
182
|
+
)
|
|
183
|
+
return embeddings
|
|
184
|
+
|
|
185
|
+
def score(self, item: str, context: str) -> RelevanceScore:
|
|
186
|
+
"""Score item relevance to context using embeddings.
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
item: Item text.
|
|
190
|
+
context: Query context.
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
RelevanceScore with embedding-based similarity.
|
|
194
|
+
"""
|
|
195
|
+
if not item or not context:
|
|
196
|
+
return RelevanceScore(score=0.0, reason="Embedding: empty input")
|
|
197
|
+
|
|
198
|
+
embeddings = self._encode([item, context])
|
|
199
|
+
similarity = _cosine_similarity(embeddings[0], embeddings[1])
|
|
200
|
+
|
|
201
|
+
return RelevanceScore(
|
|
202
|
+
score=similarity,
|
|
203
|
+
reason=f"Embedding: semantic similarity {similarity:.2f}",
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
def score_batch(self, items: list[str], context: str) -> list[RelevanceScore]:
|
|
207
|
+
"""Score multiple items efficiently using batch encoding.
|
|
208
|
+
|
|
209
|
+
This is much faster than scoring items individually since:
|
|
210
|
+
1. Context is encoded only once
|
|
211
|
+
2. Items are encoded in a single batch
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
items: List of items to score.
|
|
215
|
+
context: Query context.
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
List of RelevanceScore objects.
|
|
219
|
+
"""
|
|
220
|
+
if not items:
|
|
221
|
+
return []
|
|
222
|
+
|
|
223
|
+
if not context:
|
|
224
|
+
return [RelevanceScore(score=0.0, reason="Embedding: empty context") for _ in items]
|
|
225
|
+
|
|
226
|
+
# Encode all texts in one batch
|
|
227
|
+
all_texts = items + [context]
|
|
228
|
+
embeddings = self._encode(all_texts)
|
|
229
|
+
|
|
230
|
+
# Last embedding is the context
|
|
231
|
+
context_emb = embeddings[-1]
|
|
232
|
+
item_embs = embeddings[:-1]
|
|
233
|
+
|
|
234
|
+
# Compute similarities
|
|
235
|
+
results = []
|
|
236
|
+
for emb in item_embs:
|
|
237
|
+
similarity = _cosine_similarity(emb, context_emb)
|
|
238
|
+
results.append(
|
|
239
|
+
RelevanceScore(
|
|
240
|
+
score=similarity,
|
|
241
|
+
reason=f"Embedding: {similarity:.2f}",
|
|
242
|
+
)
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
return results
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
# Convenience function for checking availability without instantiation
|
|
249
|
+
def embedding_available() -> bool:
|
|
250
|
+
"""Check if embedding scorer is available.
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
True if sentence-transformers is installed.
|
|
254
|
+
"""
|
|
255
|
+
return EmbeddingScorer.is_available()
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
"""Hybrid relevance scorer combining BM25 and embeddings.
|
|
2
|
+
|
|
3
|
+
This module provides a hybrid scorer that combines BM25 (keyword matching)
|
|
4
|
+
with embedding-based semantic similarity. Uses adaptive alpha tuning to
|
|
5
|
+
automatically adjust the balance based on query characteristics.
|
|
6
|
+
|
|
7
|
+
Key features:
|
|
8
|
+
- Best of both worlds: exact ID matching + semantic understanding
|
|
9
|
+
- Adaptive alpha: increases BM25 weight for UUID/ID-heavy queries
|
|
10
|
+
- Graceful degradation: falls back to BM25 if embeddings unavailable
|
|
11
|
+
- Research-backed: Dynamic Alpha Tuning gives +2-7.5% gains (Hsu et al., 2025)
|
|
12
|
+
|
|
13
|
+
Recommended for production use where accuracy matters.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import re
|
|
19
|
+
|
|
20
|
+
from .base import RelevanceScore, RelevanceScorer
|
|
21
|
+
from .bm25 import BM25Scorer
|
|
22
|
+
from .embedding import EmbeddingScorer
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class HybridScorer(RelevanceScorer):
|
|
26
|
+
"""Hybrid BM25 + Embedding scorer with adaptive fusion.
|
|
27
|
+
|
|
28
|
+
Combines keyword matching (BM25) with semantic similarity (embeddings)
|
|
29
|
+
using score fusion. The fusion weight (alpha) can be:
|
|
30
|
+
|
|
31
|
+
1. Fixed: Use a constant alpha for all queries
|
|
32
|
+
2. Adaptive: Automatically adjust alpha based on query characteristics
|
|
33
|
+
|
|
34
|
+
Adaptive alpha increases BM25 weight when the query contains:
|
|
35
|
+
- UUIDs (exact match critical)
|
|
36
|
+
- Numeric IDs (4+ digits)
|
|
37
|
+
- Specific hostnames or email addresses
|
|
38
|
+
|
|
39
|
+
Example:
|
|
40
|
+
# Create hybrid scorer with adaptive alpha
|
|
41
|
+
scorer = HybridScorer(adaptive=True)
|
|
42
|
+
|
|
43
|
+
# UUID query: alpha ~0.8 (favor BM25)
|
|
44
|
+
score1 = scorer.score(item, "find 550e8400-e29b-41d4-a716-446655440000")
|
|
45
|
+
|
|
46
|
+
# Semantic query: alpha ~0.5 (balanced)
|
|
47
|
+
score2 = scorer.score(item, "show me the failed requests")
|
|
48
|
+
|
|
49
|
+
If sentence-transformers is not installed, falls back to pure BM25.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
# Patterns that indicate exact match is important
|
|
53
|
+
_UUID_PATTERN = re.compile(
|
|
54
|
+
r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}"
|
|
55
|
+
)
|
|
56
|
+
_NUMERIC_ID_PATTERN = re.compile(r"\b\d{4,}\b")
|
|
57
|
+
_HOSTNAME_PATTERN = re.compile(
|
|
58
|
+
r"\b[a-zA-Z0-9][-a-zA-Z0-9]*\.[a-zA-Z0-9][-a-zA-Z0-9]*(?:\.[a-zA-Z]{2,})?\b"
|
|
59
|
+
)
|
|
60
|
+
_EMAIL_PATTERN = re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b")
|
|
61
|
+
|
|
62
|
+
def __init__(
|
|
63
|
+
self,
|
|
64
|
+
alpha: float = 0.5,
|
|
65
|
+
adaptive: bool = True,
|
|
66
|
+
bm25_scorer: BM25Scorer | None = None,
|
|
67
|
+
embedding_scorer: EmbeddingScorer | None = None,
|
|
68
|
+
):
|
|
69
|
+
"""Initialize hybrid scorer.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
alpha: Base fusion weight for BM25 (default 0.5).
|
|
73
|
+
Combined score = alpha * BM25 + (1 - alpha) * Embedding.
|
|
74
|
+
adaptive: If True, adjust alpha per query based on patterns.
|
|
75
|
+
bm25_scorer: Custom BM25 scorer instance (uses default if None).
|
|
76
|
+
embedding_scorer: Custom embedding scorer (uses default if None).
|
|
77
|
+
"""
|
|
78
|
+
self.base_alpha = alpha
|
|
79
|
+
self.adaptive = adaptive
|
|
80
|
+
|
|
81
|
+
# Initialize scorers
|
|
82
|
+
self.bm25 = bm25_scorer or BM25Scorer()
|
|
83
|
+
|
|
84
|
+
# Embedding scorer with graceful fallback
|
|
85
|
+
self.embedding: EmbeddingScorer | None = None
|
|
86
|
+
if embedding_scorer is not None:
|
|
87
|
+
self.embedding = embedding_scorer
|
|
88
|
+
self._embedding_available = True
|
|
89
|
+
elif EmbeddingScorer.is_available():
|
|
90
|
+
self.embedding = EmbeddingScorer()
|
|
91
|
+
self._embedding_available = True
|
|
92
|
+
else:
|
|
93
|
+
self._embedding_available = False
|
|
94
|
+
|
|
95
|
+
@classmethod
|
|
96
|
+
def is_available(cls) -> bool:
|
|
97
|
+
"""Check if hybrid scoring is available.
|
|
98
|
+
|
|
99
|
+
Note: HybridScorer is always available (falls back to BM25).
|
|
100
|
+
Use has_embedding_support() to check if embeddings are available.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
Always True.
|
|
104
|
+
"""
|
|
105
|
+
return True
|
|
106
|
+
|
|
107
|
+
def has_embedding_support(self) -> bool:
|
|
108
|
+
"""Check if embedding scoring is available.
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
True if sentence-transformers is installed.
|
|
112
|
+
"""
|
|
113
|
+
return self._embedding_available
|
|
114
|
+
|
|
115
|
+
def _compute_alpha(self, context: str) -> float:
|
|
116
|
+
"""Compute adaptive alpha based on query characteristics.
|
|
117
|
+
|
|
118
|
+
Higher alpha = more BM25 weight (exact matching).
|
|
119
|
+
Lower alpha = more embedding weight (semantic matching).
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
context: Query context.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
Alpha value in [0.3, 0.9].
|
|
126
|
+
"""
|
|
127
|
+
if not self.adaptive:
|
|
128
|
+
return self.base_alpha
|
|
129
|
+
|
|
130
|
+
context_lower = context.lower()
|
|
131
|
+
|
|
132
|
+
# Count patterns that need exact matching
|
|
133
|
+
uuid_count = len(self._UUID_PATTERN.findall(context))
|
|
134
|
+
id_count = len(self._NUMERIC_ID_PATTERN.findall(context))
|
|
135
|
+
hostname_count = len(self._HOSTNAME_PATTERN.findall(context_lower))
|
|
136
|
+
email_count = len(self._EMAIL_PATTERN.findall(context_lower))
|
|
137
|
+
|
|
138
|
+
# Adjust alpha based on pattern counts
|
|
139
|
+
alpha = self.base_alpha
|
|
140
|
+
|
|
141
|
+
if uuid_count > 0:
|
|
142
|
+
alpha = max(alpha, 0.85) # UUIDs need exact match
|
|
143
|
+
elif id_count >= 2:
|
|
144
|
+
alpha = max(alpha, 0.75) # Multiple IDs suggest lookup
|
|
145
|
+
elif id_count == 1:
|
|
146
|
+
alpha = max(alpha, 0.65)
|
|
147
|
+
elif hostname_count > 0 or email_count > 0:
|
|
148
|
+
alpha = max(alpha, 0.6)
|
|
149
|
+
|
|
150
|
+
# Clamp to valid range
|
|
151
|
+
return max(0.3, min(0.9, alpha))
|
|
152
|
+
|
|
153
|
+
def score(self, item: str, context: str) -> RelevanceScore:
|
|
154
|
+
"""Score item using hybrid BM25 + embedding fusion.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
item: Item text.
|
|
158
|
+
context: Query context.
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
RelevanceScore with combined score.
|
|
162
|
+
"""
|
|
163
|
+
# Get BM25 score
|
|
164
|
+
bm25_result = self.bm25.score(item, context)
|
|
165
|
+
|
|
166
|
+
# If embeddings unavailable, boost BM25 scores since they're inherently lower
|
|
167
|
+
# This ensures reasonable matching even without semantic understanding
|
|
168
|
+
if not self._embedding_available or self.embedding is None:
|
|
169
|
+
# Boost BM25 score: if there's ANY match, ensure it's above typical threshold
|
|
170
|
+
# This compensates for BM25's low scores on single-term matches
|
|
171
|
+
boosted_score = bm25_result.score
|
|
172
|
+
if bm25_result.matched_terms:
|
|
173
|
+
# Ensure matched items get at least 0.3 score
|
|
174
|
+
boosted_score = max(boosted_score, 0.3)
|
|
175
|
+
# Additional boost for multiple matches
|
|
176
|
+
if len(bm25_result.matched_terms) >= 2:
|
|
177
|
+
boosted_score = min(1.0, boosted_score + 0.2)
|
|
178
|
+
return RelevanceScore(
|
|
179
|
+
score=boosted_score,
|
|
180
|
+
reason=f"Hybrid (BM25 only, boosted): {bm25_result.reason}",
|
|
181
|
+
matched_terms=bm25_result.matched_terms,
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
# Get embedding score
|
|
185
|
+
emb_result = self.embedding.score(item, context)
|
|
186
|
+
|
|
187
|
+
# Compute adaptive alpha
|
|
188
|
+
alpha = self._compute_alpha(context)
|
|
189
|
+
|
|
190
|
+
# Combine scores
|
|
191
|
+
combined_score = alpha * bm25_result.score + (1 - alpha) * emb_result.score
|
|
192
|
+
|
|
193
|
+
return RelevanceScore(
|
|
194
|
+
score=combined_score,
|
|
195
|
+
reason=(
|
|
196
|
+
f"Hybrid (α={alpha:.2f}): "
|
|
197
|
+
f"BM25={bm25_result.score:.2f}, "
|
|
198
|
+
f"Semantic={emb_result.score:.2f}"
|
|
199
|
+
),
|
|
200
|
+
matched_terms=bm25_result.matched_terms,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
def score_batch(self, items: list[str], context: str) -> list[RelevanceScore]:
|
|
204
|
+
"""Score multiple items using hybrid fusion.
|
|
205
|
+
|
|
206
|
+
Efficiently batches BM25 and embedding scoring.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
items: List of items to score.
|
|
210
|
+
context: Query context.
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
List of RelevanceScore objects.
|
|
214
|
+
"""
|
|
215
|
+
if not items:
|
|
216
|
+
return []
|
|
217
|
+
|
|
218
|
+
# Get BM25 scores
|
|
219
|
+
bm25_results = self.bm25.score_batch(items, context)
|
|
220
|
+
|
|
221
|
+
# If embeddings unavailable, boost BM25 scores and return
|
|
222
|
+
if not self._embedding_available or self.embedding is None:
|
|
223
|
+
boosted_results = []
|
|
224
|
+
for r in bm25_results:
|
|
225
|
+
boosted_score = r.score
|
|
226
|
+
if r.matched_terms:
|
|
227
|
+
# Ensure matched items get at least 0.3 score
|
|
228
|
+
boosted_score = max(boosted_score, 0.3)
|
|
229
|
+
# Additional boost for multiple matches
|
|
230
|
+
if len(r.matched_terms) >= 2:
|
|
231
|
+
boosted_score = min(1.0, boosted_score + 0.2)
|
|
232
|
+
boosted_results.append(
|
|
233
|
+
RelevanceScore(
|
|
234
|
+
score=boosted_score,
|
|
235
|
+
reason=f"Hybrid (BM25 only, boosted): {r.reason}",
|
|
236
|
+
matched_terms=r.matched_terms,
|
|
237
|
+
)
|
|
238
|
+
)
|
|
239
|
+
return boosted_results
|
|
240
|
+
|
|
241
|
+
# Get embedding scores
|
|
242
|
+
emb_results = self.embedding.score_batch(items, context)
|
|
243
|
+
|
|
244
|
+
# Compute adaptive alpha (same for all items in batch)
|
|
245
|
+
alpha = self._compute_alpha(context)
|
|
246
|
+
|
|
247
|
+
# Combine scores
|
|
248
|
+
results = []
|
|
249
|
+
for bm25_r, emb_r in zip(bm25_results, emb_results):
|
|
250
|
+
combined = alpha * bm25_r.score + (1 - alpha) * emb_r.score
|
|
251
|
+
results.append(
|
|
252
|
+
RelevanceScore(
|
|
253
|
+
score=combined,
|
|
254
|
+
reason=f"Hybrid (α={alpha:.2f}): BM25={bm25_r.score:.2f}, Emb={emb_r.score:.2f}",
|
|
255
|
+
matched_terms=bm25_r.matched_terms,
|
|
256
|
+
)
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
return results
|