halflife-rag 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. halflife_rag-0.1.0/PKG-INFO +15 -0
  2. halflife_rag-0.1.0/README.md +140 -0
  3. halflife_rag-0.1.0/api/main.py +142 -0
  4. halflife_rag-0.1.0/engine/classifier/doc_type.py +54 -0
  5. halflife_rag-0.1.0/engine/classifier/query_intent.py +91 -0
  6. halflife_rag-0.1.0/engine/decay/base.py +14 -0
  7. halflife_rag-0.1.0/engine/decay/exponential.py +18 -0
  8. halflife_rag-0.1.0/engine/decay/learned.py +46 -0
  9. halflife_rag-0.1.0/engine/decay/learned_model.py +249 -0
  10. halflife_rag-0.1.0/engine/decay/piecewise.py +21 -0
  11. halflife_rag-0.1.0/engine/decay/registry.py +16 -0
  12. halflife_rag-0.1.0/engine/events/bus.py +33 -0
  13. halflife_rag-0.1.0/engine/feedback/updater.py +43 -0
  14. halflife_rag-0.1.0/engine/fusion/consistency.py +46 -0
  15. halflife_rag-0.1.0/engine/fusion/reranker.py +199 -0
  16. halflife_rag-0.1.0/engine/ingestion/pipeline.py +204 -0
  17. halflife_rag-0.1.0/engine/store/redis_store.py +149 -0
  18. halflife_rag-0.1.0/halflife_rag.egg-info/PKG-INFO +15 -0
  19. halflife_rag-0.1.0/halflife_rag.egg-info/SOURCES.txt +33 -0
  20. halflife_rag-0.1.0/halflife_rag.egg-info/dependency_links.txt +1 -0
  21. halflife_rag-0.1.0/halflife_rag.egg-info/entry_points.txt +2 -0
  22. halflife_rag-0.1.0/halflife_rag.egg-info/requires.txt +9 -0
  23. halflife_rag-0.1.0/halflife_rag.egg-info/top_level.txt +3 -0
  24. halflife_rag-0.1.0/pyproject.toml +30 -0
  25. halflife_rag-0.1.0/scripts/benchmark.py +461 -0
  26. halflife_rag-0.1.0/scripts/cli.py +63 -0
  27. halflife_rag-0.1.0/scripts/corpus.py +445 -0
  28. halflife_rag-0.1.0/scripts/generate_benchmark_data.py +103 -0
  29. halflife_rag-0.1.0/scripts/quickstart.py +98 -0
  30. halflife_rag-0.1.0/scripts/run_benchmark.py +24 -0
  31. halflife_rag-0.1.0/scripts/train_mlp.py +335 -0
  32. halflife_rag-0.1.0/scripts/visualize_decay.py +51 -0
  33. halflife_rag-0.1.0/setup.cfg +4 -0
  34. halflife_rag-0.1.0/tests/test_benchmark.py +393 -0
  35. halflife_rag-0.1.0/tests/test_decay.py +46 -0
@@ -0,0 +1,15 @@
1
+ Metadata-Version: 2.4
2
+ Name: halflife-rag
3
+ Version: 0.1.0
4
+ Summary: Temporal-aware re-ranking engine for RAG
5
+ Author-email: Your Name <you@example.com>
6
+ Requires-Python: >=3.10
7
+ Requires-Dist: fastapi
8
+ Requires-Dist: uvicorn
9
+ Requires-Dist: redis
10
+ Requires-Dist: qdrant-client
11
+ Requires-Dist: numpy
12
+ Requires-Dist: scipy
13
+ Requires-Dist: sentence-transformers
14
+ Requires-Dist: pydantic
15
+ Requires-Dist: python-dotenv
@@ -0,0 +1,140 @@
1
+ # HalfLife
2
+
3
+ **Temporal-Aware Chunk Re-Ranking Engine for Retrieval-Augmented Generation (RAG)**
4
+
5
+ HalfLife is a plug-and-play middleware that enhances any RAG pipeline by re-ranking retrieved chunks using **temporal signals**, **decay functions**, and **multi-factor scoring**.
6
+
7
+ Instead of relying solely on semantic similarity, HalfLife introduces a **time-aware ranking layer** that improves freshness, relevance, and contextual correctness in generated responses.
8
+
9
+ ---
10
+
11
+ ## ✨ Why HalfLife?
12
+
13
+ Traditional RAG systems rank documents using:
14
+
15
+ ```
16
+ relevance ≈ semantic similarity(query, document)
17
+ ```
18
+
19
+ HalfLife extends this to:
20
+
21
+ ```
22
+ relevance = f(semantic_similarity, temporal_decay, trust, priors)
23
+ ```
24
+
25
+ This enables:
26
+
27
+ * Better handling of **time-sensitive queries**
28
+ * Reduced reliance on **outdated information**
29
+ * Improved **context diversity across time**
30
+ * More **robust and explainable retrieval pipelines**
31
+
32
+ ---
33
+
34
+ ## 🧠 Core Idea
35
+
36
+ HalfLife sits between your retriever (e.g., Qdrant) and your LLM:
37
+
38
+ ```
39
+ Retriever → HalfLife → LLM
40
+ ```
41
+
42
+ It **re-scores and reorders chunks** before they are passed into the model.
43
+
44
+ ---
45
+
46
+ ## ⚙️ Core Features
47
+
48
+ ### 🔍 1. Plug-and-Play Reranking
49
+ HalfLife sits between your retriever (e.g., Qdrant) and your LLM. It **re-scores and reorders chunks** before they reach the model.
50
+
51
+ ### ⏳ 2. Multi-Strategy Decay
52
+ Supports modular decay functions via a central registry:
53
+ * **Exponential**: Standard time-based decay.
54
+ * **Piecewise**: Different decay rates for recent vs. historical windows.
55
+ * **Learned (NEW)**: Features a pure-NumPy MLP (`DecayMLP`) that predicts the optimal $\lambda$ at ingestion time based on document type, source, and feedback.
56
+
57
+ ### 🧠 3. Intent-Aware Fusion
58
+ HalfLife automatically classifies user queries into **Fresh**, **Historical**, or **Static** intents and adapts its scoring weights accordingly:
59
+ * **Fresh Query**: Penalizes older results to surface recent breakthroughs.
60
+ * **Historical Query**: Inverts the decay signal to pull older source documents to the top.
61
+
62
+ ---
63
+
64
+ ## 🏗️ Architecture
65
+
66
+ ```
67
+ User Query
68
+
69
+ Intent Classifier (Fresh vs Historical)
70
+
71
+ Vector Retrieval (Qdrant)
72
+
73
+ HalfLife Engine
74
+ ├── Score Fetch (Redis-backed)
75
+ ├── Learned λ Prediction (MLP)
76
+ └── Intent-Aware Fusion
77
+
78
+ Re-ranked Chunks
79
+ ```
80
+
81
+ ---
82
+
83
+ ## 🛠️ Getting Started (Developer Experience)
84
+
85
+ ### 1. Install via Pip (Package Mode)
86
+ HalfLife is now a standard Python package. You can install it and use the `halflife` CLI:
87
+
88
+ ```bash
89
+ git clone https://github.com/yourusername/halflife.git
90
+ pip install -e .
91
+ ```
92
+
93
+ ### 2. Launch Services
94
+ Infrastructure is managed via Docker:
95
+ ```bash
96
+ docker-compose up -d
97
+ ```
98
+
99
+ ### 3. Unified CLI
100
+ Use the `halflife` command for all common tasks:
101
+ ```bash
102
+ # Run the end-to-end quickstart
103
+ halflife quickstart
104
+
105
+ # Start the API server
106
+ halflife serve --port 8000
107
+
108
+ # Run evaluation benchmarks
109
+ halflife benchmark --output results.json
110
+ ```
111
+
112
+ ---
113
+
114
+ ## 🧪 Evaluation & Rigour: The Decoy Mechanism
115
+
116
+ To ensure HalfLife's effectiveness, we built a **108-chunk synthetic corpus** containing "Decoys". For every relevant chunk, there is a decoy with **identical text but a different timestamp**.
117
+
118
+ Because their embeddings are identical, standard cosine similarity cannot separate them. Only HalfLife's temporal engine can correctly surface the right chunk, providing a rigorous test for your RAG pipeline's time-awareness.
119
+
120
+ ---
121
+
122
+ ## 🧬 Learned Decay Workflow
123
+
124
+ 1. **Collect Baseline**: Run `halflife benchmark --output run_001.json`.
125
+ 2. **Train the MLP**: Run `halflife train --results run_001.json`.
126
+ 3. **Deploy**: The engine automatically loads `decay_mlp.npz` and starts predicting $\lambda$ for all new ingested chunks.
127
+
128
+ ---
129
+
130
+ ## 🧩 Status & Roadmap
131
+ * [x] **Phase 1**: Core Decay Engine & Redis Metadata Store.
132
+ * [x] **Phase 2**: Intent-Aware Fusion & Historical Inversion.
133
+ * [x] **Phase 3**: Learned Decay MLP & Benchmark Harness.
134
+ * [ ] **Phase 4**: Event-Driven Fact Supersession (In Progress).
135
+ * [ ] **Phase 5**: Multi-Vector Store SDKs (Pinecone, Weaviate).
136
+
137
+ ---
138
+
139
+ ## 📄 License & Contributing
140
+ MIT License. Contributions are welcome for new decay functions and integration plugins!
@@ -0,0 +1,142 @@
1
+ import os
2
+ import logging
3
+ from typing import List, Dict, Optional
4
+
5
+ from fastapi import FastAPI, Body, HTTPException
6
+ from pydantic import BaseModel, Field
7
+
8
+ from engine.store.redis_store import RedisStore
9
+ from engine.fusion.reranker import Reranker
10
+ from engine.classifier.query_intent import QueryIntentClassifier
11
+
12
+ logging.basicConfig(level=logging.INFO)
13
+ logger = logging.getLogger(__name__)
14
+
15
+ app = FastAPI(title="HalfLife Re-ranking API", version="0.2.0")
16
+
17
+ REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379")
18
+ store = RedisStore(url=REDIS_URL)
19
+ reranker = Reranker(store)
20
+ classifier = QueryIntentClassifier()
21
+
22
+
23
+ # ------------------------------------------------------------------ #
24
+ # Request / response models #
25
+ # ------------------------------------------------------------------ #
26
+
27
+ class ChunkInput(BaseModel):
28
+ id: str
29
+ score: float = Field(..., ge=0.0, le=1.0)
30
+ payload: Dict = Field(default_factory=dict,
31
+ description="Qdrant payload — must include 'timestamp'")
32
+
33
+ class RerankRequest(BaseModel):
34
+ query: str
35
+ chunks: List[ChunkInput]
36
+ top_k: int = Field(10, ge=1, le=100)
37
+ weights: Optional[Dict] = None # override auto-weights from classifier
38
+
39
+ class MetadataIngestRequest(BaseModel):
40
+ chunk_id: str
41
+ decay_type: str = "exponential"
42
+ decay_params: Dict = Field(default_factory=lambda: {"lambda": 1e-6})
43
+ trust_score: float = Field(0.5, ge=0.0, le=1.0)
44
+
45
+ class FeedbackRequest(BaseModel):
46
+ chunk_id: str
47
+ was_useful: bool
48
+
49
+
50
+ # ------------------------------------------------------------------ #
51
+ # Endpoints #
52
+ # ------------------------------------------------------------------ #
53
+
54
+ @app.get("/health")
55
+ def health_check():
56
+ redis_ok = False
57
+ try:
58
+ redis_ok = store.client.ping() if store.client else False
59
+ except Exception:
60
+ pass
61
+ return {"status": "ok", "redis": redis_ok}
62
+
63
+
64
+ @app.post("/rerank")
65
+ def rerank_endpoint(req: RerankRequest):
66
+ """
67
+ Main middleware endpoint.
68
+ """
69
+ try:
70
+ # Classify query intent → weights + intent label
71
+ classification = classifier.classify(req.query)
72
+ weights = req.weights or classification["weights"]
73
+ intent = classification["intent"]
74
+
75
+ chunks_as_dicts = [c.model_dump() for c in req.chunks]
76
+
77
+ result = reranker.rerank(
78
+ query=req.query,
79
+ chunks=chunks_as_dicts,
80
+ top_k=req.top_k,
81
+ weights=weights,
82
+ intent=intent,
83
+ )
84
+
85
+ return {
86
+ **result,
87
+ "query_intent": intent,
88
+ }
89
+
90
+ except Exception as e:
91
+ logger.exception("Rerank failed")
92
+ raise HTTPException(status_code=500, detail=str(e))
93
+
94
+
95
+ @app.post("/ingest/metadata")
96
+ def ingest_metadata(req: MetadataIngestRequest):
97
+ """
98
+ Directly write Redis metadata for a chunk.
99
+ """
100
+ metadata = RedisStore.build_metadata(
101
+ chunk_id=req.chunk_id,
102
+ decay_type=req.decay_type,
103
+ decay_params=req.decay_params,
104
+ trust_score=req.trust_score,
105
+ )
106
+ store.set_chunk(req.chunk_id, metadata)
107
+ store.mark_dirty(req.chunk_id)
108
+ return {"status": "ingested", "chunk_id": req.chunk_id}
109
+
110
+
111
+ @app.post("/feedback")
112
+ def feedback_endpoint(req: FeedbackRequest):
113
+ """
114
+ Log chunk utility signal. Marks cache dirty.
115
+ """
116
+ from engine.feedback.updater import FeedbackUpdater
117
+ updater = FeedbackUpdater(store)
118
+ updater.log_feedback(req.chunk_id, req.was_useful)
119
+ store.mark_dirty(req.chunk_id)
120
+ store.increment_feedback(req.chunk_id, req.was_useful)
121
+ return {"status": "recorded", "chunk_id": req.chunk_id}
122
+
123
+
124
+ @app.get("/chunks/{chunk_id}/debug")
125
+ def debug_chunk(chunk_id: str):
126
+ """
127
+ Inspect the full Redis state for a chunk.
128
+ """
129
+ metadata = store.get_chunk(chunk_id)
130
+ if not metadata:
131
+ raise HTTPException(status_code=404, detail=f"No metadata for chunk {chunk_id}")
132
+
133
+ cached_score = store.get_cached_score(chunk_id)
134
+ feedback_counts = store.get_feedback_counts(chunk_id)
135
+
136
+ return {
137
+ "chunk_id": chunk_id,
138
+ "metadata": metadata,
139
+ "cached_score": cached_score,
140
+ "feedback_counts": feedback_counts,
141
+ "dirty": cached_score is None,
142
+ }
@@ -0,0 +1,54 @@
1
+ from typing import Dict
2
+
3
+ class DocTypeClassifier:
4
+ """
5
+ Classifies a text chunk into a document category:
6
+ - news: Fast decay (exponential)
7
+ - documentation: Stable/Step decay (piecewise)
8
+ - research: Slow/Landmark decay (exponential with slow lambda)
9
+ - reference: Universal truth
10
+ """
11
+ NEWS_KEYWORDS = {"breaking", "today", "flash", "update", "newsworthy"}
12
+ DOCS_KEYWORDS = {"version", "release", "api", "usage", "compatibility"}
13
+ RESEARCH_KEYWORDS = {"abstract", "paper", "methodology", "citation", "experiment"}
14
+
15
+ def classify(self, text: str) -> Dict:
16
+ """
17
+ Classifies the document type and returns initial decay settings.
18
+ """
19
+ text_lower = text.lower()
20
+
21
+ # Check for News
22
+ if any(kw in text_lower for kw in self.NEWS_KEYWORDS):
23
+ return {
24
+ "doc_type": "news",
25
+ "decay_type": "exponential",
26
+ "decay_params": {"lambda": 1e-5}, # ~1 day half-life
27
+ "trust_score": 0.6
28
+ }
29
+
30
+ # Check for Documentation
31
+ if any(kw in text_lower for kw in self.DOCS_KEYWORDS):
32
+ return {
33
+ "doc_type": "documentation",
34
+ "decay_type": "piecewise",
35
+ "decay_params": {}, # Using defaults in piecewise
36
+ "trust_score": 0.8
37
+ }
38
+
39
+ # Check for Research
40
+ if any(kw in text_lower for kw in self.RESEARCH_KEYWORDS):
41
+ return {
42
+ "doc_type": "research",
43
+ "decay_type": "exponential",
44
+ "decay_params": {"lambda": 1e-7}, # Landmark papers
45
+ "trust_score": 0.9
46
+ }
47
+
48
+ # Default fallback
49
+ return {
50
+ "doc_type": "generic",
51
+ "decay_type": "exponential",
52
+ "decay_params": {"lambda": 1e-6}, # ~8 days half-life
53
+ "trust_score": 0.5
54
+ }
@@ -0,0 +1,91 @@
1
+ import logging
2
+ from typing import Dict
3
+
4
+ logger = logging.getLogger(__name__)
5
+
6
+
7
+ class QueryIntentClassifier:
8
+ """
9
+ Classifies the temporal intent of a query and returns fusion weights.
10
+
11
+ Intent categories:
12
+ fresh — user wants current information ("latest", "recent")
13
+ β (temporal) is high, α (vector) is lower
14
+ historical — user wants evolution or past state ("history of", "how did X evolve")
15
+ β is kept moderate but the reranker INVERTS temporal_score
16
+ so that older chunks rank higher (see reranker.py)
17
+ static — time-agnostic ("what is", "define", "explain")
18
+ α (vector) dominates, temporal signal is minimal
19
+
20
+ The reranker consumes both 'weights' and 'intent' from this output.
21
+ Weights alone are not enough for historical queries — the inversion
22
+ flag is what actually surfaces old content.
23
+ """
24
+
25
+ FRESH_KEYWORDS = {
26
+ "latest", "recent", "newest", "current", "now",
27
+ "today", "this week", "this month", "breaking", "just",
28
+ "updated", "new", "2024", "2025",
29
+ }
30
+
31
+ HISTORICAL_KEYWORDS = {
32
+ "history", "historical", "evolution", "evolved", "origins",
33
+ "background", "originally", "used to", "how did", "first version",
34
+ "early", "founded", "invented", "introduced", "over the years",
35
+ "timeline", "progression",
36
+ }
37
+
38
+ def classify(self, query: str) -> Dict:
39
+ """
40
+ Returns:
41
+ {
42
+ "intent": "fresh" | "historical" | "static",
43
+ "weights": {"vector": float, "temporal": float, "trust": float},
44
+ }
45
+ """
46
+ q = query.lower()
47
+
48
+ if any(kw in q for kw in self.FRESH_KEYWORDS):
49
+ return {
50
+ "intent": "fresh",
51
+ "weights": {
52
+ "vector": 0.3,
53
+ "temporal": 0.6,
54
+ "trust": 0.1,
55
+ },
56
+ }
57
+
58
+ if any(kw in q for kw in self.HISTORICAL_KEYWORDS):
59
+ # NOTE: weights here look similar to static, but the reranker
60
+ # receives intent="historical" and inverts temporal_score.
61
+ # The result: temporal weight still matters, but it now rewards
62
+ # old chunks instead of fresh ones.
63
+ return {
64
+ "intent": "historical",
65
+ "weights": {
66
+ "vector": 0.5,
67
+ "temporal": 0.3,
68
+ "trust": 0.2,
69
+ },
70
+ }
71
+
72
+ # Default: static / time-agnostic
73
+ return {
74
+ "intent": "static",
75
+ "weights": {
76
+ "vector": 0.7,
77
+ "temporal": 0.1,
78
+ "trust": 0.2,
79
+ },
80
+ }
81
+
82
+
83
+ if __name__ == "__main__":
84
+ clf = QueryIntentClassifier()
85
+ for q in [
86
+ "latest BERT papers",
87
+ "history of transformer architectures",
88
+ "what is attention mechanism",
89
+ ]:
90
+ result = clf.classify(q)
91
+ print(f"{q!r:45s} → intent={result['intent']}, weights={result['weights']}")
@@ -0,0 +1,14 @@
1
+ from abc import ABC, abstractmethod
2
+ from datetime import datetime
3
+
4
+ class DecayFunction(ABC):
5
+ def __init__(self, params: dict):
6
+ self.params = params
7
+
8
+ @abstractmethod
9
+ def compute(self, timestamp: datetime, now: datetime) -> float:
10
+ """
11
+ Compute the decay score for a given timestamp and the current time.
12
+ Returns a value between 0.0 and 1.0.
13
+ """
14
+ pass
@@ -0,0 +1,18 @@
1
+ import math
2
+ from datetime import datetime
3
+ from .base import DecayFunction
4
+
5
+ class ExponentialDecay(DecayFunction):
6
+ """
7
+ Exponential decay: score = e^(-lambda * delta_time)
8
+ Good for news and fast-moving trends.
9
+ """
10
+ def compute(self, timestamp: datetime, now: datetime) -> float:
11
+ delta_seconds = (now - timestamp).total_seconds()
12
+ # Ensure delta_seconds is not negative (e.g., if there's a minor clock drift)
13
+ delta_seconds = max(0, delta_seconds)
14
+
15
+ # lambda_ is the decay constant. Default: 1e-6 (roughly half-life of 8 days)
16
+ lambda_ = self.params.get("lambda", 1e-6)
17
+
18
+ return math.exp(-lambda_ * delta_seconds)
@@ -0,0 +1,46 @@
1
+ """
2
+ learned.py — LearnedDecay function (Option A: chunk-level λ predictor).
3
+
4
+ This class is used by the DecayRegistry like any other decay function.
5
+ The key difference from ExponentialDecay: λ was predicted by the MLP
6
+ at ingestion time (not hand-tuned), stored in Redis, and loaded here.
7
+
8
+ At query time this is pure exponential decay — no ML inference.
9
+ The MLP runs only at ingestion time (or when feedback updates trigger
10
+ a λ re-prediction via FeedbackUpdater).
11
+
12
+ The decay_params dict in Redis must contain:
13
+ {"lambda": float} — predicted by LearnedDecayEngine.predict_lambda()
14
+
15
+ If lambda is missing, falls back to the MLP's cold-start default for
16
+ generic doc type — equivalent to ExponentialDecay with λ=1e-6.
17
+ """
18
+
19
+ import math
20
+ from datetime import datetime
21
+ from .base import DecayFunction
22
+
23
+
24
+ class LearnedDecay(DecayFunction):
25
+ """
26
+ Exponential decay using a MLP-predicted λ stored in params.
27
+
28
+ Identical runtime behaviour to ExponentialDecay — the "learned"
29
+ part is in how λ was set, not in how it's used.
30
+
31
+ decay(Δt) = e^(-λ · Δt)
32
+
33
+ where λ ∈ [1e-8, 1e-4] was predicted by DecayMLP from chunk features.
34
+ """
35
+
36
+ def compute(self, timestamp: datetime, now: datetime) -> float:
37
+ delta_seconds = (now - timestamp).total_seconds()
38
+ delta_seconds = max(0.0, delta_seconds)
39
+
40
+ lambda_ = self.params.get("lambda", 1e-6)
41
+
42
+ # Safety clamp — λ should always be in the MLP's output range,
43
+ # but guard against stale Redis values or hand-edited metadata.
44
+ lambda_ = max(1e-8, min(lambda_, 1e-4))
45
+
46
+ return math.exp(-lambda_ * delta_seconds)