headroom-ai 0.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- headroom/__init__.py +212 -0
- headroom/cache/__init__.py +76 -0
- headroom/cache/anthropic.py +517 -0
- headroom/cache/base.py +342 -0
- headroom/cache/compression_feedback.py +613 -0
- headroom/cache/compression_store.py +814 -0
- headroom/cache/dynamic_detector.py +1026 -0
- headroom/cache/google.py +884 -0
- headroom/cache/openai.py +584 -0
- headroom/cache/registry.py +175 -0
- headroom/cache/semantic.py +451 -0
- headroom/ccr/__init__.py +77 -0
- headroom/ccr/context_tracker.py +582 -0
- headroom/ccr/mcp_server.py +319 -0
- headroom/ccr/response_handler.py +772 -0
- headroom/ccr/tool_injection.py +415 -0
- headroom/cli.py +219 -0
- headroom/client.py +977 -0
- headroom/compression/__init__.py +42 -0
- headroom/compression/detector.py +424 -0
- headroom/compression/handlers/__init__.py +22 -0
- headroom/compression/handlers/base.py +219 -0
- headroom/compression/handlers/code_handler.py +506 -0
- headroom/compression/handlers/json_handler.py +418 -0
- headroom/compression/masks.py +345 -0
- headroom/compression/universal.py +465 -0
- headroom/config.py +474 -0
- headroom/exceptions.py +192 -0
- headroom/integrations/__init__.py +159 -0
- headroom/integrations/agno/__init__.py +53 -0
- headroom/integrations/agno/hooks.py +345 -0
- headroom/integrations/agno/model.py +625 -0
- headroom/integrations/agno/providers.py +154 -0
- headroom/integrations/langchain/__init__.py +106 -0
- headroom/integrations/langchain/agents.py +326 -0
- headroom/integrations/langchain/chat_model.py +1002 -0
- headroom/integrations/langchain/langsmith.py +324 -0
- headroom/integrations/langchain/memory.py +319 -0
- headroom/integrations/langchain/providers.py +200 -0
- headroom/integrations/langchain/retriever.py +371 -0
- headroom/integrations/langchain/streaming.py +341 -0
- headroom/integrations/mcp/__init__.py +37 -0
- headroom/integrations/mcp/server.py +533 -0
- headroom/memory/__init__.py +37 -0
- headroom/memory/extractor.py +390 -0
- headroom/memory/fast_store.py +621 -0
- headroom/memory/fast_wrapper.py +311 -0
- headroom/memory/inline_extractor.py +229 -0
- headroom/memory/store.py +434 -0
- headroom/memory/worker.py +260 -0
- headroom/memory/wrapper.py +321 -0
- headroom/models/__init__.py +39 -0
- headroom/models/registry.py +687 -0
- headroom/parser.py +293 -0
- headroom/pricing/__init__.py +51 -0
- headroom/pricing/anthropic_prices.py +81 -0
- headroom/pricing/litellm_pricing.py +113 -0
- headroom/pricing/openai_prices.py +91 -0
- headroom/pricing/registry.py +188 -0
- headroom/providers/__init__.py +61 -0
- headroom/providers/anthropic.py +621 -0
- headroom/providers/base.py +131 -0
- headroom/providers/cohere.py +362 -0
- headroom/providers/google.py +427 -0
- headroom/providers/litellm.py +297 -0
- headroom/providers/openai.py +566 -0
- headroom/providers/openai_compatible.py +521 -0
- headroom/proxy/__init__.py +19 -0
- headroom/proxy/server.py +2683 -0
- headroom/py.typed +0 -0
- headroom/relevance/__init__.py +124 -0
- headroom/relevance/base.py +106 -0
- headroom/relevance/bm25.py +255 -0
- headroom/relevance/embedding.py +255 -0
- headroom/relevance/hybrid.py +259 -0
- headroom/reporting/__init__.py +5 -0
- headroom/reporting/generator.py +549 -0
- headroom/storage/__init__.py +41 -0
- headroom/storage/base.py +125 -0
- headroom/storage/jsonl.py +220 -0
- headroom/storage/sqlite.py +289 -0
- headroom/telemetry/__init__.py +91 -0
- headroom/telemetry/collector.py +764 -0
- headroom/telemetry/models.py +880 -0
- headroom/telemetry/toin.py +1579 -0
- headroom/tokenizer.py +80 -0
- headroom/tokenizers/__init__.py +75 -0
- headroom/tokenizers/base.py +210 -0
- headroom/tokenizers/estimator.py +198 -0
- headroom/tokenizers/huggingface.py +317 -0
- headroom/tokenizers/mistral.py +245 -0
- headroom/tokenizers/registry.py +398 -0
- headroom/tokenizers/tiktoken_counter.py +248 -0
- headroom/transforms/__init__.py +106 -0
- headroom/transforms/base.py +57 -0
- headroom/transforms/cache_aligner.py +357 -0
- headroom/transforms/code_compressor.py +1313 -0
- headroom/transforms/content_detector.py +335 -0
- headroom/transforms/content_router.py +1158 -0
- headroom/transforms/llmlingua_compressor.py +638 -0
- headroom/transforms/log_compressor.py +529 -0
- headroom/transforms/pipeline.py +297 -0
- headroom/transforms/rolling_window.py +350 -0
- headroom/transforms/search_compressor.py +365 -0
- headroom/transforms/smart_crusher.py +2682 -0
- headroom/transforms/text_compressor.py +259 -0
- headroom/transforms/tool_crusher.py +338 -0
- headroom/utils.py +215 -0
- headroom_ai-0.2.13.dist-info/METADATA +315 -0
- headroom_ai-0.2.13.dist-info/RECORD +114 -0
- headroom_ai-0.2.13.dist-info/WHEEL +4 -0
- headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
- headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
- headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
headroom/py.typed
ADDED
|
File without changes
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"""Relevance scoring module for Headroom SDK.
|
|
2
|
+
|
|
3
|
+
This module provides a unified interface for computing item relevance against
|
|
4
|
+
query contexts. All scorers implement the RelevanceScorer protocol:
|
|
5
|
+
|
|
6
|
+
relevance(item, context) -> RelevanceScore
|
|
7
|
+
|
|
8
|
+
Available scorers:
|
|
9
|
+
|
|
10
|
+
1. HybridScorer (DEFAULT - recommended)
|
|
11
|
+
- Combines BM25 + embeddings for best accuracy
|
|
12
|
+
- Adaptive alpha: more BM25 for UUIDs, more semantic for natural language
|
|
13
|
+
- Falls back gracefully to BM25 if sentence-transformers not installed
|
|
14
|
+
- Install for full support: pip install headroom[relevance]
|
|
15
|
+
|
|
16
|
+
2. BM25Scorer (zero dependencies)
|
|
17
|
+
- Fast keyword matching
|
|
18
|
+
- Good for exact UUIDs, IDs, specific terms
|
|
19
|
+
- May miss semantic matches ("errors" won't match "failed")
|
|
20
|
+
|
|
21
|
+
3. EmbeddingScorer (requires sentence-transformers)
|
|
22
|
+
- Pure semantic similarity
|
|
23
|
+
- Best for natural language queries
|
|
24
|
+
- Install: pip install headroom[relevance]
|
|
25
|
+
|
|
26
|
+
WHY HYBRID IS DEFAULT:
|
|
27
|
+
- Missing important items during compression is catastrophic
|
|
28
|
+
- BM25 alone gives low scores for single-term matches (e.g., "Alice" = 0.07)
|
|
29
|
+
- Semantic matching catches "errors" -> "failed", "issues", etc.
|
|
30
|
+
- 5-10ms latency is acceptable vs. losing critical data
|
|
31
|
+
|
|
32
|
+
Example usage:
|
|
33
|
+
from headroom.relevance import HybridScorer, create_scorer
|
|
34
|
+
|
|
35
|
+
# Default: Hybrid (recommended)
|
|
36
|
+
scorer = create_scorer() # or HybridScorer()
|
|
37
|
+
|
|
38
|
+
# Zero-dependency fallback
|
|
39
|
+
scorer = create_scorer("bm25")
|
|
40
|
+
|
|
41
|
+
# Score items
|
|
42
|
+
items = [
|
|
43
|
+
'{"id": "123", "name": "Alice"}',
|
|
44
|
+
'{"id": "456", "name": "Bob"}',
|
|
45
|
+
]
|
|
46
|
+
scores = scorer.score_batch(items, "find user 123")
|
|
47
|
+
# scores[0].score > scores[1].score
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
from typing import Any
|
|
51
|
+
|
|
52
|
+
from .base import RelevanceScore, RelevanceScorer
|
|
53
|
+
from .bm25 import BM25Scorer
|
|
54
|
+
from .embedding import EmbeddingScorer, embedding_available
|
|
55
|
+
from .hybrid import HybridScorer
|
|
56
|
+
|
|
57
|
+
__all__ = [
|
|
58
|
+
# Base types
|
|
59
|
+
"RelevanceScore",
|
|
60
|
+
"RelevanceScorer",
|
|
61
|
+
# Scorers
|
|
62
|
+
"BM25Scorer",
|
|
63
|
+
"EmbeddingScorer",
|
|
64
|
+
"HybridScorer",
|
|
65
|
+
# Utilities
|
|
66
|
+
"embedding_available",
|
|
67
|
+
# Factory function
|
|
68
|
+
"create_scorer",
|
|
69
|
+
]
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def create_scorer(
|
|
73
|
+
tier: str = "hybrid",
|
|
74
|
+
**kwargs: Any,
|
|
75
|
+
) -> RelevanceScorer:
|
|
76
|
+
"""Factory function to create a relevance scorer.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
tier: Scorer tier to create:
|
|
80
|
+
- "hybrid": Hybrid BM25 + embedding (DEFAULT, recommended)
|
|
81
|
+
- "bm25": BM25 keyword scorer (zero deps, fast)
|
|
82
|
+
- "embedding": Embedding scorer (requires sentence-transformers)
|
|
83
|
+
**kwargs: Additional arguments passed to scorer constructor.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
RelevanceScorer instance.
|
|
87
|
+
|
|
88
|
+
Raises:
|
|
89
|
+
ValueError: If tier is unknown.
|
|
90
|
+
RuntimeError: If tier requires unavailable dependencies.
|
|
91
|
+
|
|
92
|
+
Note:
|
|
93
|
+
HybridScorer gracefully falls back to BM25 if sentence-transformers
|
|
94
|
+
is not installed, so it's safe to use as the default.
|
|
95
|
+
|
|
96
|
+
Example:
|
|
97
|
+
# Create default hybrid scorer (recommended)
|
|
98
|
+
scorer = create_scorer()
|
|
99
|
+
|
|
100
|
+
# Create BM25 scorer for zero-dependency environments
|
|
101
|
+
scorer = create_scorer("bm25")
|
|
102
|
+
|
|
103
|
+
# Create hybrid scorer with custom alpha
|
|
104
|
+
scorer = create_scorer("hybrid", alpha=0.6, adaptive=True)
|
|
105
|
+
"""
|
|
106
|
+
tier = tier.lower()
|
|
107
|
+
|
|
108
|
+
if tier == "bm25":
|
|
109
|
+
return BM25Scorer(**kwargs)
|
|
110
|
+
|
|
111
|
+
elif tier == "embedding":
|
|
112
|
+
if not EmbeddingScorer.is_available():
|
|
113
|
+
raise RuntimeError(
|
|
114
|
+
"EmbeddingScorer requires sentence-transformers. "
|
|
115
|
+
"Install with: pip install headroom[relevance]"
|
|
116
|
+
)
|
|
117
|
+
return EmbeddingScorer(**kwargs)
|
|
118
|
+
|
|
119
|
+
elif tier == "hybrid":
|
|
120
|
+
return HybridScorer(**kwargs)
|
|
121
|
+
|
|
122
|
+
else:
|
|
123
|
+
valid_tiers = ["bm25", "embedding", "hybrid"]
|
|
124
|
+
raise ValueError(f"Unknown scorer tier: {tier}. Valid tiers: {valid_tiers}")
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""Base protocol for relevance scoring in Headroom SDK.
|
|
2
|
+
|
|
3
|
+
This module defines the RelevanceScorer protocol - a unified interface for
|
|
4
|
+
computing item relevance against a query context. All transforms that make
|
|
5
|
+
keep/drop decisions can use this abstraction.
|
|
6
|
+
|
|
7
|
+
The pattern: relevance(item, context) -> float [0.0, 1.0]
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from abc import ABC, abstractmethod
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class RelevanceScore:
|
|
18
|
+
"""Relevance score with explainability.
|
|
19
|
+
|
|
20
|
+
Attributes:
|
|
21
|
+
score: Relevance score from 0.0 (irrelevant) to 1.0 (highly relevant).
|
|
22
|
+
reason: Human-readable explanation of the score.
|
|
23
|
+
matched_terms: List of terms that contributed to the match (for debugging).
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
score: float
|
|
27
|
+
reason: str = ""
|
|
28
|
+
matched_terms: list[str] = field(default_factory=list)
|
|
29
|
+
|
|
30
|
+
def __post_init__(self) -> None:
|
|
31
|
+
"""Clamp score to valid range."""
|
|
32
|
+
self.score = max(0.0, min(1.0, self.score))
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class RelevanceScorer(ABC):
|
|
36
|
+
"""Abstract base class for relevance scoring.
|
|
37
|
+
|
|
38
|
+
All relevance scorers must implement:
|
|
39
|
+
- score(): Score a single item against context
|
|
40
|
+
- score_batch(): Score multiple items efficiently
|
|
41
|
+
|
|
42
|
+
Example usage:
|
|
43
|
+
scorer = BM25Scorer()
|
|
44
|
+
items = ['{"id": "123", "name": "Alice"}', '{"id": "456", "name": "Bob"}']
|
|
45
|
+
context = "find user 123"
|
|
46
|
+
scores = scorer.score_batch(items, context)
|
|
47
|
+
# scores[0].score > scores[1].score (item 0 matches "123")
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
@abstractmethod
|
|
51
|
+
def score(self, item: str, context: str) -> RelevanceScore:
|
|
52
|
+
"""Score a single item's relevance to the context.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
item: String representation of the item (typically JSON).
|
|
56
|
+
context: Query context (user message, tool call args, etc.).
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
RelevanceScore with score [0.0, 1.0] and explanation.
|
|
60
|
+
"""
|
|
61
|
+
pass
|
|
62
|
+
|
|
63
|
+
@abstractmethod
|
|
64
|
+
def score_batch(self, items: list[str], context: str) -> list[RelevanceScore]:
|
|
65
|
+
"""Score multiple items efficiently.
|
|
66
|
+
|
|
67
|
+
Default implementation calls score() for each item.
|
|
68
|
+
Subclasses should override for batch-optimized implementations.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
items: List of string representations of items.
|
|
72
|
+
context: Query context to score against.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
List of RelevanceScore objects, one per item.
|
|
76
|
+
"""
|
|
77
|
+
pass
|
|
78
|
+
|
|
79
|
+
@classmethod
|
|
80
|
+
def is_available(cls) -> bool:
|
|
81
|
+
"""Check if this scorer is available (dependencies installed).
|
|
82
|
+
|
|
83
|
+
Override in subclasses that have optional dependencies.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
True if the scorer can be instantiated.
|
|
87
|
+
"""
|
|
88
|
+
return True
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def default_batch_score(
|
|
92
|
+
scorer: RelevanceScorer, items: list[str], context: str
|
|
93
|
+
) -> list[RelevanceScore]:
|
|
94
|
+
"""Default batch implementation that calls score() per item.
|
|
95
|
+
|
|
96
|
+
Use this as a fallback for scorers that don't have optimized batching.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
scorer: The scorer instance.
|
|
100
|
+
items: List of items to score.
|
|
101
|
+
context: Query context.
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
List of scores.
|
|
105
|
+
"""
|
|
106
|
+
return [scorer.score(item, context) for item in items]
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
"""BM25 relevance scorer for Headroom SDK.
|
|
2
|
+
|
|
3
|
+
This module provides a BM25-based relevance scorer with ZERO external dependencies.
|
|
4
|
+
BM25 (Best Match 25) is a bag-of-words retrieval function that ranks documents
|
|
5
|
+
based on query term frequency.
|
|
6
|
+
|
|
7
|
+
Key features:
|
|
8
|
+
- Zero dependencies (pure Python)
|
|
9
|
+
- Fast execution (~0ms per item)
|
|
10
|
+
- Excellent for exact matches (UUIDs, IDs, specific terms)
|
|
11
|
+
- Returns matched terms for explainability
|
|
12
|
+
|
|
13
|
+
Limitations:
|
|
14
|
+
- No semantic understanding ("errors" won't match "failed")
|
|
15
|
+
- Sensitive to tokenization
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import math
|
|
21
|
+
import re
|
|
22
|
+
from collections import Counter
|
|
23
|
+
|
|
24
|
+
from .base import RelevanceScore, RelevanceScorer
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class BM25Scorer(RelevanceScorer):
|
|
28
|
+
"""BM25 keyword relevance scorer.
|
|
29
|
+
|
|
30
|
+
Zero dependencies, instant execution. Excellent for exact ID/UUID matching.
|
|
31
|
+
|
|
32
|
+
BM25 formula:
|
|
33
|
+
score(D, Q) = sum over q in Q of:
|
|
34
|
+
IDF(q) * (f(q,D) * (k1 + 1)) / (f(q,D) + k1 * (1 - b + b * |D|/avgdl))
|
|
35
|
+
|
|
36
|
+
Where:
|
|
37
|
+
- f(q,D) = frequency of term q in document D
|
|
38
|
+
- |D| = length of document D
|
|
39
|
+
- avgdl = average document length
|
|
40
|
+
- k1, b = tuning parameters
|
|
41
|
+
|
|
42
|
+
Example:
|
|
43
|
+
scorer = BM25Scorer()
|
|
44
|
+
score = scorer.score(
|
|
45
|
+
'{"id": "550e8400-e29b-41d4-a716-446655440000", "name": "Alice"}',
|
|
46
|
+
"find record 550e8400-e29b-41d4-a716-446655440000"
|
|
47
|
+
)
|
|
48
|
+
# score.score > 0.5 (UUID matches exactly)
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
# Tokenization pattern: alphanumeric sequences, UUIDs, numeric IDs
|
|
52
|
+
_TOKEN_PATTERN = re.compile(
|
|
53
|
+
r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}" # UUIDs
|
|
54
|
+
r"|\b\d{4,}\b" # Numeric IDs (4+ digits)
|
|
55
|
+
r"|[a-zA-Z0-9_]+" # Alphanumeric tokens
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
def __init__(
|
|
59
|
+
self,
|
|
60
|
+
k1: float = 1.5,
|
|
61
|
+
b: float = 0.75,
|
|
62
|
+
normalize_score: bool = True,
|
|
63
|
+
max_score: float = 10.0,
|
|
64
|
+
):
|
|
65
|
+
"""Initialize BM25 scorer.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
k1: Term frequency saturation parameter (default 1.5).
|
|
69
|
+
Higher values increase term frequency impact.
|
|
70
|
+
b: Length normalization parameter (default 0.75).
|
|
71
|
+
0 = no length normalization, 1 = full normalization.
|
|
72
|
+
normalize_score: If True, normalize score to [0, 1].
|
|
73
|
+
max_score: Maximum raw score for normalization.
|
|
74
|
+
"""
|
|
75
|
+
self.k1 = k1
|
|
76
|
+
self.b = b
|
|
77
|
+
self.normalize_score = normalize_score
|
|
78
|
+
self.max_score = max_score
|
|
79
|
+
|
|
80
|
+
def _tokenize(self, text: str) -> list[str]:
|
|
81
|
+
"""Tokenize text into terms.
|
|
82
|
+
|
|
83
|
+
Preserves:
|
|
84
|
+
- UUIDs as single tokens
|
|
85
|
+
- Numeric IDs
|
|
86
|
+
- Alphanumeric words
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
text: Text to tokenize.
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
List of lowercase tokens.
|
|
93
|
+
"""
|
|
94
|
+
if not text:
|
|
95
|
+
return []
|
|
96
|
+
|
|
97
|
+
tokens = self._TOKEN_PATTERN.findall(text.lower())
|
|
98
|
+
return tokens
|
|
99
|
+
|
|
100
|
+
def _compute_idf(self, term: str, doc_count: int, doc_freq: int) -> float:
|
|
101
|
+
"""Compute inverse document frequency.
|
|
102
|
+
|
|
103
|
+
Uses the standard BM25 IDF formula:
|
|
104
|
+
IDF = log((N - n + 0.5) / (n + 0.5) + 1)
|
|
105
|
+
|
|
106
|
+
Where N = total docs, n = docs containing term.
|
|
107
|
+
|
|
108
|
+
For single-document scoring, we use a simplified version.
|
|
109
|
+
"""
|
|
110
|
+
if doc_freq == 0:
|
|
111
|
+
return 0.0
|
|
112
|
+
|
|
113
|
+
# Simplified IDF for single-document case
|
|
114
|
+
# Term present = higher IDF, term absent = 0
|
|
115
|
+
return math.log(2.0) # Constant since we have single document
|
|
116
|
+
|
|
117
|
+
def _bm25_score(
|
|
118
|
+
self,
|
|
119
|
+
doc_tokens: list[str],
|
|
120
|
+
query_tokens: list[str],
|
|
121
|
+
avg_doc_len: float | None = None,
|
|
122
|
+
) -> tuple[float, list[str]]:
|
|
123
|
+
"""Compute BM25 score between document and query.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
doc_tokens: Tokenized document.
|
|
127
|
+
query_tokens: Tokenized query.
|
|
128
|
+
avg_doc_len: Average document length (optional).
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
Tuple of (score, matched_terms).
|
|
132
|
+
"""
|
|
133
|
+
if not doc_tokens or not query_tokens:
|
|
134
|
+
return 0.0, []
|
|
135
|
+
|
|
136
|
+
doc_len = len(doc_tokens)
|
|
137
|
+
avgdl = avg_doc_len or doc_len
|
|
138
|
+
|
|
139
|
+
doc_freq = Counter(doc_tokens)
|
|
140
|
+
query_freq = Counter(query_tokens)
|
|
141
|
+
|
|
142
|
+
score = 0.0
|
|
143
|
+
matched_terms: list[str] = []
|
|
144
|
+
|
|
145
|
+
for term, qf in query_freq.items():
|
|
146
|
+
if term not in doc_freq:
|
|
147
|
+
continue
|
|
148
|
+
|
|
149
|
+
f = doc_freq[term]
|
|
150
|
+
matched_terms.append(term)
|
|
151
|
+
|
|
152
|
+
# BM25 term score
|
|
153
|
+
idf = math.log(2.0) # Simplified for single doc
|
|
154
|
+
numerator = f * (self.k1 + 1)
|
|
155
|
+
denominator = f + self.k1 * (1 - self.b + self.b * doc_len / avgdl)
|
|
156
|
+
|
|
157
|
+
term_score = idf * numerator / denominator
|
|
158
|
+
score += term_score * qf # Weight by query frequency
|
|
159
|
+
|
|
160
|
+
return score, matched_terms
|
|
161
|
+
|
|
162
|
+
def score(self, item: str, context: str) -> RelevanceScore:
|
|
163
|
+
"""Score item relevance to context using BM25.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
item: Item text (typically JSON string).
|
|
167
|
+
context: Query context.
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
RelevanceScore with BM25-based score.
|
|
171
|
+
"""
|
|
172
|
+
item_tokens = self._tokenize(item)
|
|
173
|
+
context_tokens = self._tokenize(context)
|
|
174
|
+
|
|
175
|
+
raw_score, matched = self._bm25_score(item_tokens, context_tokens)
|
|
176
|
+
|
|
177
|
+
# Normalize to [0, 1]
|
|
178
|
+
if self.normalize_score:
|
|
179
|
+
normalized = min(1.0, raw_score / self.max_score)
|
|
180
|
+
else:
|
|
181
|
+
normalized = raw_score
|
|
182
|
+
|
|
183
|
+
# Bonus for exact long-token matches (UUIDs, long IDs)
|
|
184
|
+
# These are high-value matches that should be preserved
|
|
185
|
+
long_matches = [t for t in matched if len(t) >= 8]
|
|
186
|
+
if long_matches:
|
|
187
|
+
normalized = min(1.0, normalized + 0.3)
|
|
188
|
+
|
|
189
|
+
match_count = len(matched)
|
|
190
|
+
if match_count == 0:
|
|
191
|
+
reason = "BM25: no term matches"
|
|
192
|
+
elif match_count == 1:
|
|
193
|
+
reason = f"BM25: matched '{matched[0]}'"
|
|
194
|
+
else:
|
|
195
|
+
reason = f"BM25: matched {match_count} terms ({', '.join(matched[:3])}{'...' if match_count > 3 else ''})"
|
|
196
|
+
|
|
197
|
+
return RelevanceScore(
|
|
198
|
+
score=normalized,
|
|
199
|
+
reason=reason,
|
|
200
|
+
matched_terms=matched[:10], # Limit for readability
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
def score_batch(self, items: list[str], context: str) -> list[RelevanceScore]:
|
|
204
|
+
"""Score multiple items.
|
|
205
|
+
|
|
206
|
+
BM25 is fast enough that sequential scoring is efficient.
|
|
207
|
+
Could be optimized with vectorization if needed.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
items: List of items to score.
|
|
211
|
+
context: Query context.
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
List of RelevanceScore objects.
|
|
215
|
+
"""
|
|
216
|
+
# Pre-tokenize context once
|
|
217
|
+
context_tokens = self._tokenize(context)
|
|
218
|
+
|
|
219
|
+
if not context_tokens:
|
|
220
|
+
return [RelevanceScore(score=0.0, reason="BM25: empty context") for _ in items]
|
|
221
|
+
|
|
222
|
+
# Compute average document length for normalization
|
|
223
|
+
all_tokens = [self._tokenize(item) for item in items]
|
|
224
|
+
avg_len = sum(len(t) for t in all_tokens) / max(len(items), 1)
|
|
225
|
+
|
|
226
|
+
results = []
|
|
227
|
+
for item_tokens in all_tokens:
|
|
228
|
+
raw_score, matched = self._bm25_score(item_tokens, context_tokens, avg_doc_len=avg_len)
|
|
229
|
+
|
|
230
|
+
# Normalize
|
|
231
|
+
if self.normalize_score:
|
|
232
|
+
normalized = min(1.0, raw_score / self.max_score)
|
|
233
|
+
else:
|
|
234
|
+
normalized = raw_score
|
|
235
|
+
|
|
236
|
+
# Bonus for long matches
|
|
237
|
+
long_matches = [t for t in matched if len(t) >= 8]
|
|
238
|
+
if long_matches:
|
|
239
|
+
normalized = min(1.0, normalized + 0.3)
|
|
240
|
+
|
|
241
|
+
match_count = len(matched)
|
|
242
|
+
if match_count == 0:
|
|
243
|
+
reason = "BM25: no matches"
|
|
244
|
+
else:
|
|
245
|
+
reason = f"BM25: {match_count} terms"
|
|
246
|
+
|
|
247
|
+
results.append(
|
|
248
|
+
RelevanceScore(
|
|
249
|
+
score=normalized,
|
|
250
|
+
reason=reason,
|
|
251
|
+
matched_terms=matched[:5],
|
|
252
|
+
)
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
return results
|