alpha-engine-lib 0.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. alpha_engine_lib/__init__.py +3 -0
  2. alpha_engine_lib/agent_schemas.py +663 -0
  3. alpha_engine_lib/alerts.py +576 -0
  4. alpha_engine_lib/arcticdb.py +340 -0
  5. alpha_engine_lib/collector_results.py +69 -0
  6. alpha_engine_lib/cost.py +665 -0
  7. alpha_engine_lib/dates.py +273 -0
  8. alpha_engine_lib/decision_capture.py +462 -0
  9. alpha_engine_lib/ec2_spot.py +363 -0
  10. alpha_engine_lib/email_sender.py +206 -0
  11. alpha_engine_lib/eval_artifacts.py +361 -0
  12. alpha_engine_lib/logging.py +303 -0
  13. alpha_engine_lib/model_pricing.yaml +73 -0
  14. alpha_engine_lib/pillars.py +756 -0
  15. alpha_engine_lib/pipeline_status/__init__.py +70 -0
  16. alpha_engine_lib/pipeline_status/read.py +541 -0
  17. alpha_engine_lib/pipeline_status/registry.py +368 -0
  18. alpha_engine_lib/pipeline_status/templates.py +120 -0
  19. alpha_engine_lib/preflight.py +444 -0
  20. alpha_engine_lib/rag/__init__.py +39 -0
  21. alpha_engine_lib/rag/db.py +96 -0
  22. alpha_engine_lib/rag/embeddings.py +63 -0
  23. alpha_engine_lib/rag/migrations/0001_content_tsv.sql +39 -0
  24. alpha_engine_lib/rag/rerank.py +377 -0
  25. alpha_engine_lib/rag/retrieval.py +465 -0
  26. alpha_engine_lib/rag/schema.sql +65 -0
  27. alpha_engine_lib/reconcile.py +203 -0
  28. alpha_engine_lib/secrets.py +186 -0
  29. alpha_engine_lib/sources/__init__.py +35 -0
  30. alpha_engine_lib/sources/protocols.py +227 -0
  31. alpha_engine_lib/ssm_log_capture.py +274 -0
  32. alpha_engine_lib/telegram.py +165 -0
  33. alpha_engine_lib/trading_calendar.py +236 -0
  34. alpha_engine_lib/transparency.py +746 -0
  35. alpha_engine_lib/transparency_inventory.yaml +260 -0
  36. alpha_engine_lib/universe.py +83 -0
  37. alpha_engine_lib-0.32.0.dist-info/METADATA +217 -0
  38. alpha_engine_lib-0.32.0.dist-info/RECORD +40 -0
  39. alpha_engine_lib-0.32.0.dist-info/WHEEL +5 -0
  40. alpha_engine_lib-0.32.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,377 @@
1
+ """RAG reranking — reorder retrieval candidates under a joint query+doc model.
2
+
3
+ Reranking sits between candidate generation (`retrieve(method="hybrid", ...)`)
4
+ and LLM consumption. Hybrid retrieval over a wide candidate pool (e.g. top-30)
5
+ gives high recall; rerank then provides precision by scoring each
6
+ ``(query, document)`` pair jointly under a model that's purpose-built for
7
+ relevance ranking. This decouples the two trade-offs that bi-encoders /
8
+ keyword retrieval can't resolve simultaneously.
9
+
10
+ Two implementations are shipped:
11
+
12
+ - :class:`CrossEncoderReranker` — local BAAI ``bge-reranker-v2-m3`` (or any
13
+ cross-encoder loadable via ``sentence-transformers``). Zero external API
14
+ surface, deterministic, ~100-300ms latency on CPU at top-50. Default for
15
+ Alpha Engine consumers per the no-new-vendor posture.
16
+ - :class:`LLMJudgeReranker` — Anthropic Haiku with a 1-5 relevance rubric.
17
+ Higher latency + cost than cross-encoder; configurable opt-in for
18
+ scenarios that need rerank criteria beyond pure semantic similarity
19
+ ("rerank by recency-weighted relevance", "rerank by financial
20
+ materiality").
21
+
22
+ Both implementations share the :class:`Reranker` protocol and the in-process
23
+ :class:`RerankCache` (LRU, keyed by ``sha256(query) + chunk_id``). Cache
24
+ lifetime is the process / Lambda container — no cross-run persistence,
25
+ because query embeddings drift with corpus updates and rerank scores are
26
+ cheap-to-recompute relative to the LLM call they enable.
27
+ """
28
+
29
+ from __future__ import annotations
30
+
31
+ import hashlib
32
+ import logging
33
+ import os
34
+ from collections import OrderedDict
35
+ from dataclasses import dataclass, field
36
+ from typing import Callable, Protocol, runtime_checkable
37
+
38
+ from .retrieval import RetrievalResult
39
+
40
+ logger = logging.getLogger(__name__)
41
+
42
+
43
+ # Cap how many ``(query, doc)`` pairs the in-process cache retains so a
44
+ # long-running Lambda container doesn't grow unbounded. 1024 entries is
45
+ # ~8 queries × top-50 reranks × 2x slack — plenty of headroom for the
46
+ # 6-sector × ~25-ticker research run's qual-tool burst.
47
+ _DEFAULT_CACHE_MAXSIZE = 1024
48
+
49
+
50
+ # ── Cache ───────────────────────────────────────────────────────────────────
51
+
52
+
53
+ class RerankCache:
54
+ """Process-local LRU cache for rerank scores keyed by ``(query, chunk_id)``.
55
+
56
+ Keeps a tight cap on memory (``maxsize`` entries, eviction in
57
+ insertion order) so a hot Lambda container that processes many
58
+ distinct queries doesn't accumulate unbounded state. Lifetime is
59
+ the container — no cross-invocation persistence (the
60
+ ``RAG_RERANK_CACHE_TTL`` knob is intentionally absent because
61
+ Lambda /tmp + the implied IO cost would exceed the cost of the
62
+ rerank itself for typical query volumes).
63
+ """
64
+
65
+ def __init__(self, maxsize: int = _DEFAULT_CACHE_MAXSIZE) -> None:
66
+ self._store: OrderedDict[str, float] = OrderedDict()
67
+ self._maxsize = maxsize
68
+
69
+ @staticmethod
70
+ def make_key(query: str, chunk_id: str | None) -> str:
71
+ # chunk_id can be None for results that didn't carry a primary key
72
+ # back from the retriever (legacy ``vector_score-only`` paths); fall
73
+ # back to hashing the content snippet plus the doc tuple so we
74
+ # still get a stable key per ``(query, doc)`` pair.
75
+ suffix = chunk_id if chunk_id is not None else "no_chunk_id"
76
+ digest = hashlib.sha256(query.encode("utf-8")).hexdigest()[:16]
77
+ return f"{digest}:{suffix}"
78
+
79
+ def get(self, key: str) -> float | None:
80
+ if key not in self._store:
81
+ return None
82
+ self._store.move_to_end(key)
83
+ return self._store[key]
84
+
85
+ def put(self, key: str, score: float) -> None:
86
+ if key in self._store:
87
+ self._store.move_to_end(key)
88
+ self._store[key] = score
89
+ if len(self._store) > self._maxsize:
90
+ self._store.popitem(last=False)
91
+
92
+ def __len__(self) -> int:
93
+ return len(self._store)
94
+
95
+
96
+ # ── Reranker protocol ───────────────────────────────────────────────────────
97
+
98
+
99
+ @runtime_checkable
100
+ class Reranker(Protocol):
101
+ """Score-and-reorder a candidate list under a joint query+doc model.
102
+
103
+ Implementations may consult a cache, but the protocol surface is
104
+ pure: take a query + candidate list, return the same candidates
105
+ reordered (and optionally truncated to ``top_k``) with
106
+ per-result ``rerank_score`` populated.
107
+ """
108
+
109
+ name: str
110
+
111
+ def rerank(
112
+ self,
113
+ query: str,
114
+ candidates: list[RetrievalResult],
115
+ top_k: int,
116
+ ) -> list[RetrievalResult]:
117
+ ...
118
+
119
+
120
+ # ── Cross-encoder (local model) ─────────────────────────────────────────────
121
+
122
+
123
+ @dataclass
124
+ class CrossEncoderReranker:
125
+ """Local cross-encoder reranker.
126
+
127
+ Default model is BAAI ``bge-reranker-v2-m3``: a multilingual
128
+ cross-encoder published 2024 at ~600MB on disk, ~100-300ms latency
129
+ per query at top-50 on CPU. Any sentence-transformers
130
+ :class:`CrossEncoder`-compatible model can be substituted via
131
+ ``model_name``.
132
+
133
+ The underlying ``sentence-transformers`` install is gated behind
134
+ the ``[rerank]`` extra so callers that only use vector/hybrid
135
+ retrieval don't pay the ~2GB torch + transformers + model-download
136
+ install cost. Importing this module does NOT load the model;
137
+ initialization happens lazily on the first :meth:`rerank` call so
138
+ a non-rerank import path stays cheap.
139
+ """
140
+
141
+ model_name: str = "BAAI/bge-reranker-v2-m3"
142
+ cache: RerankCache = field(default_factory=RerankCache)
143
+ name: str = "cross_encoder"
144
+ # When unset, defer model load until first rerank() call. Tests
145
+ # patch this directly with a callable returning predict-able scores
146
+ # to exercise the score-aware reorder path without paying the
147
+ # ~600MB model download.
148
+ _model: object | None = None
149
+
150
+ def _ensure_model(self) -> object:
151
+ if self._model is not None:
152
+ return self._model
153
+ try:
154
+ # Imported lazily so a bare ``from alpha_engine_lib.rag import
155
+ # retrieve`` stays cheap on consumers that don't rerank.
156
+ from sentence_transformers import CrossEncoder
157
+ except ImportError as exc:
158
+ raise ImportError(
159
+ "CrossEncoderReranker requires sentence-transformers. "
160
+ "Install with: pip install 'alpha-engine-lib[rerank]'"
161
+ ) from exc
162
+ logger.info("Loading cross-encoder model: %s", self.model_name)
163
+ self._model = CrossEncoder(self.model_name)
164
+ return self._model
165
+
166
+ def rerank(
167
+ self,
168
+ query: str,
169
+ candidates: list[RetrievalResult],
170
+ top_k: int,
171
+ ) -> list[RetrievalResult]:
172
+ if not candidates:
173
+ return []
174
+
175
+ uncached_pairs: list[tuple[int, str]] = []
176
+ scores: list[float | None] = [None] * len(candidates)
177
+ for idx, cand in enumerate(candidates):
178
+ key = self.cache.make_key(query, cand.chunk_id)
179
+ cached = self.cache.get(key)
180
+ if cached is not None:
181
+ scores[idx] = cached
182
+ else:
183
+ uncached_pairs.append((idx, cand.content))
184
+
185
+ if uncached_pairs:
186
+ model = self._ensure_model()
187
+ pair_inputs = [[query, content] for _, content in uncached_pairs]
188
+ # ``predict`` returns one logit per pair; higher = more relevant.
189
+ # Type cast through ``list(map(float, ...))`` keeps tests
190
+ # happy when a numpy array is returned by the real model and
191
+ # when a plain list is returned by the test fake.
192
+ raw = model.predict(pair_inputs) # type: ignore[attr-defined]
193
+ fresh_scores = list(map(float, raw))
194
+ for (idx, _content), score in zip(uncached_pairs, fresh_scores):
195
+ scores[idx] = score
196
+ self.cache.put(
197
+ self.cache.make_key(query, candidates[idx].chunk_id),
198
+ score,
199
+ )
200
+
201
+ return _attach_and_sort(candidates, scores, self.name, top_k)
202
+
203
+
204
+ # ── LLM-as-judge ────────────────────────────────────────────────────────────
205
+
206
+
207
+ # Default rubric — kept terse to fit a Haiku context window comfortably
208
+ # at top-50 candidates and to leave room for the candidate text itself.
209
+ # Scores follow a 1-5 integer Likert that the model returns as plain
210
+ # JSON for deterministic parsing.
211
+ _DEFAULT_LLM_RUBRIC = (
212
+ "Rate the relevance of the following document to the query on a "
213
+ "1-5 scale where 1=irrelevant, 3=tangentially related, 5=directly "
214
+ "answers the query. Respond with ONLY a single integer between 1 "
215
+ "and 5."
216
+ )
217
+
218
+
219
+ @dataclass
220
+ class LLMJudgeReranker:
221
+ """LLM-as-judge reranker — one Haiku call per (query, doc) pair.
222
+
223
+ More expensive + slower than the cross-encoder (one LLM round-trip
224
+ per candidate vs. one batched local-model inference for the whole
225
+ set) but more flexible: the rubric can encode criteria beyond
226
+ semantic similarity ("rerank by recency-weighted financial
227
+ materiality"). Configure via :attr:`rubric` at construction.
228
+
229
+ Default ``rubric`` is a strict 1-5 Likert; output is parsed as
230
+ ``int(response.strip()[0])`` to tolerate the occasional Haiku
231
+ leading whitespace or trailing punctuation. Parses that fail
232
+ produce a neutral score of 3 + a warning log; the caller's batch
233
+ still completes.
234
+
235
+ The Anthropic client is injected so consumers can plug in a
236
+ pre-configured ``ChatAnthropic`` (langchain) or
237
+ ``anthropic.Anthropic`` instance. The protocol surface is just
238
+ ``client.messages.create(...)`` for the raw SDK shape.
239
+ """
240
+
241
+ client: object
242
+ model: str = "claude-haiku-4-5-20251001"
243
+ rubric: str = _DEFAULT_LLM_RUBRIC
244
+ cache: RerankCache = field(default_factory=RerankCache)
245
+ name: str = "llm_judge"
246
+
247
+ def rerank(
248
+ self,
249
+ query: str,
250
+ candidates: list[RetrievalResult],
251
+ top_k: int,
252
+ ) -> list[RetrievalResult]:
253
+ if not candidates:
254
+ return []
255
+
256
+ scores: list[float | None] = [None] * len(candidates)
257
+ for idx, cand in enumerate(candidates):
258
+ key = self.cache.make_key(query, cand.chunk_id)
259
+ cached = self.cache.get(key)
260
+ if cached is not None:
261
+ scores[idx] = cached
262
+ continue
263
+ score = self._score_one(query, cand.content)
264
+ scores[idx] = score
265
+ self.cache.put(key, score)
266
+
267
+ return _attach_and_sort(candidates, scores, self.name, top_k)
268
+
269
+ def _score_one(self, query: str, content: str) -> float:
270
+ # Truncate the candidate text so a top-50 sweep at ~3K tokens per
271
+ # candidate doesn't push the prompt past Haiku's window.
272
+ snippet = content[:4000]
273
+ prompt = (
274
+ f"{self.rubric}\n\n"
275
+ f"Query: {query}\n\n"
276
+ f"Document:\n{snippet}\n\n"
277
+ f"Score (1-5):"
278
+ )
279
+ try:
280
+ response = self.client.messages.create( # type: ignore[attr-defined]
281
+ model=self.model,
282
+ max_tokens=8,
283
+ messages=[{"role": "user", "content": prompt}],
284
+ )
285
+ # Anthropic SDK response shape: response.content is a list of
286
+ # content blocks; the first text block holds the integer.
287
+ text_block = response.content[0]
288
+ raw = getattr(text_block, "text", str(text_block)).strip()
289
+ return float(int(raw[0]))
290
+ except (ValueError, IndexError, AttributeError) as exc:
291
+ logger.warning(
292
+ "LLMJudgeReranker parse-fail (returning neutral 3): %s — raw=%r",
293
+ exc, locals().get("raw", "<no response>"),
294
+ )
295
+ return 3.0
296
+
297
+
298
+ # ── Helpers ─────────────────────────────────────────────────────────────────
299
+
300
+
301
+ def _attach_and_sort(
302
+ candidates: list[RetrievalResult],
303
+ scores: list[float | None],
304
+ method_name: str,
305
+ top_k: int,
306
+ ) -> list[RetrievalResult]:
307
+ """Stamp ``rerank_score`` + ``rerank_method`` on each result and sort.
308
+
309
+ ``RetrievalResult`` is a dataclass — set the fields directly. If the
310
+ score list contains ``None`` for any candidate (shouldn't happen
311
+ under correct caller flow, but defensive), those candidates sort to
312
+ the tail so we don't drop them silently.
313
+ """
314
+ paired = list(zip(candidates, scores))
315
+ paired.sort(key=lambda x: (x[1] is None, -(x[1] or 0.0)))
316
+ out: list[RetrievalResult] = []
317
+ for cand, score in paired[:top_k]:
318
+ cand.rerank_score = score # type: ignore[attr-defined]
319
+ cand.rerank_method = method_name # type: ignore[attr-defined]
320
+ out.append(cand)
321
+ return out
322
+
323
+
324
+ # ── Factory for the retrieve() integration ──────────────────────────────────
325
+
326
+
327
+ # Module-level registry of named reranker instances. Lazily populated
328
+ # the first time :func:`get_reranker` resolves a given name, then
329
+ # memoized so subsequent retrieve(rerank="cross_encoder", ...) calls
330
+ # share the same cache + model handle within the Lambda container.
331
+ _RERANKER_REGISTRY: dict[str, Reranker] = {}
332
+
333
+
334
+ # Factory hook used by :func:`get_reranker` for the ``"llm_judge"``
335
+ # case — exposed at module scope so tests can patch it without
336
+ # importing the anthropic SDK. Default constructs an Anthropic client
337
+ # from the environment, matching the pattern used elsewhere in
338
+ # alpha-engine-research.
339
+ def _default_llm_judge_factory() -> Reranker:
340
+ try:
341
+ from anthropic import Anthropic # type: ignore[import-not-found]
342
+ except ImportError as exc:
343
+ raise ImportError(
344
+ "LLMJudgeReranker requires the anthropic SDK. "
345
+ "Install via: pip install anthropic"
346
+ ) from exc
347
+ api_key = os.environ.get("ANTHROPIC_API_KEY")
348
+ if not api_key:
349
+ raise RuntimeError(
350
+ "LLMJudgeReranker needs ANTHROPIC_API_KEY in the environment."
351
+ )
352
+ return LLMJudgeReranker(client=Anthropic(api_key=api_key))
353
+
354
+
355
+ _LLM_JUDGE_FACTORY: Callable[[], Reranker] = _default_llm_judge_factory
356
+
357
+
358
+ def get_reranker(name: str) -> Reranker:
359
+ """Resolve a named reranker, constructing + caching on first use.
360
+
361
+ Supported names: ``"cross_encoder"`` (default — local BAAI),
362
+ ``"llm_judge"`` (Anthropic Haiku via the ``anthropic`` SDK).
363
+ Tests register fakes by writing directly to
364
+ :data:`_RERANKER_REGISTRY` before the ``retrieve(rerank=...)`` call.
365
+ """
366
+ if name in _RERANKER_REGISTRY:
367
+ return _RERANKER_REGISTRY[name]
368
+ if name == "cross_encoder":
369
+ instance: Reranker = CrossEncoderReranker()
370
+ elif name == "llm_judge":
371
+ instance = _LLM_JUDGE_FACTORY()
372
+ else:
373
+ raise ValueError(
374
+ f"Unknown reranker {name!r}; supported: 'cross_encoder', 'llm_judge'"
375
+ )
376
+ _RERANKER_REGISTRY[name] = instance
377
+ return instance