hindsight-api 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/__init__.py +10 -9
- hindsight_api/alembic/env.py +5 -8
- hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +266 -180
- hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +32 -32
- hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +11 -11
- hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +7 -12
- hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +23 -15
- hindsight_api/alembic/versions/rename_personality_to_disposition.py +30 -21
- hindsight_api/api/__init__.py +10 -10
- hindsight_api/api/http.py +575 -593
- hindsight_api/api/mcp.py +30 -28
- hindsight_api/banner.py +13 -6
- hindsight_api/config.py +9 -13
- hindsight_api/engine/__init__.py +9 -9
- hindsight_api/engine/cross_encoder.py +22 -21
- hindsight_api/engine/db_utils.py +5 -4
- hindsight_api/engine/embeddings.py +22 -21
- hindsight_api/engine/entity_resolver.py +81 -75
- hindsight_api/engine/llm_wrapper.py +61 -79
- hindsight_api/engine/memory_engine.py +603 -625
- hindsight_api/engine/query_analyzer.py +100 -97
- hindsight_api/engine/response_models.py +105 -106
- hindsight_api/engine/retain/__init__.py +9 -16
- hindsight_api/engine/retain/bank_utils.py +34 -58
- hindsight_api/engine/retain/chunk_storage.py +4 -12
- hindsight_api/engine/retain/deduplication.py +9 -28
- hindsight_api/engine/retain/embedding_processing.py +4 -11
- hindsight_api/engine/retain/embedding_utils.py +3 -4
- hindsight_api/engine/retain/entity_processing.py +7 -17
- hindsight_api/engine/retain/fact_extraction.py +155 -165
- hindsight_api/engine/retain/fact_storage.py +11 -23
- hindsight_api/engine/retain/link_creation.py +11 -39
- hindsight_api/engine/retain/link_utils.py +166 -95
- hindsight_api/engine/retain/observation_regeneration.py +39 -52
- hindsight_api/engine/retain/orchestrator.py +72 -62
- hindsight_api/engine/retain/types.py +49 -43
- hindsight_api/engine/search/__init__.py +5 -5
- hindsight_api/engine/search/fusion.py +6 -15
- hindsight_api/engine/search/graph_retrieval.py +22 -23
- hindsight_api/engine/search/mpfp_retrieval.py +76 -92
- hindsight_api/engine/search/observation_utils.py +9 -16
- hindsight_api/engine/search/reranking.py +4 -7
- hindsight_api/engine/search/retrieval.py +87 -66
- hindsight_api/engine/search/scoring.py +5 -7
- hindsight_api/engine/search/temporal_extraction.py +8 -11
- hindsight_api/engine/search/think_utils.py +115 -39
- hindsight_api/engine/search/trace.py +68 -39
- hindsight_api/engine/search/tracer.py +44 -35
- hindsight_api/engine/search/types.py +20 -17
- hindsight_api/engine/task_backend.py +21 -26
- hindsight_api/engine/utils.py +25 -10
- hindsight_api/main.py +21 -40
- hindsight_api/mcp_local.py +190 -0
- hindsight_api/metrics.py +44 -30
- hindsight_api/migrations.py +10 -8
- hindsight_api/models.py +60 -72
- hindsight_api/pg0.py +22 -23
- hindsight_api/server.py +3 -6
- hindsight_api-0.1.7.dist-info/METADATA +178 -0
- hindsight_api-0.1.7.dist-info/RECORD +64 -0
- {hindsight_api-0.1.5.dist-info → hindsight_api-0.1.7.dist-info}/entry_points.txt +1 -0
- hindsight_api-0.1.5.dist-info/METADATA +0 -42
- hindsight_api-0.1.5.dist-info/RECORD +0 -63
- {hindsight_api-0.1.5.dist-info → hindsight_api-0.1.7.dist-info}/WHEEL +0 -0
|
@@ -8,16 +8,17 @@ Implements:
|
|
|
8
8
|
4. Temporal retrieval (time-aware search with spreading)
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
|
-
from typing import List, Dict, Optional
|
|
12
|
-
from dataclasses import dataclass, field
|
|
13
|
-
from datetime import datetime
|
|
14
11
|
import asyncio
|
|
15
12
|
import logging
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from datetime import UTC, datetime
|
|
15
|
+
from typing import Optional
|
|
16
|
+
|
|
17
|
+
from ...config import get_config
|
|
16
18
|
from ..db_utils import acquire_with_retry
|
|
17
|
-
from .
|
|
18
|
-
from .graph_retrieval import GraphRetriever, BFSGraphRetriever
|
|
19
|
+
from .graph_retrieval import BFSGraphRetriever, GraphRetriever
|
|
19
20
|
from .mpfp_retrieval import MPFPGraphRetriever
|
|
20
|
-
from
|
|
21
|
+
from .types import RetrievalResult
|
|
21
22
|
|
|
22
23
|
logger = logging.getLogger(__name__)
|
|
23
24
|
|
|
@@ -25,16 +26,17 @@ logger = logging.getLogger(__name__)
|
|
|
25
26
|
@dataclass
|
|
26
27
|
class ParallelRetrievalResult:
|
|
27
28
|
"""Result from parallel retrieval across all methods."""
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
29
|
+
|
|
30
|
+
semantic: list[RetrievalResult]
|
|
31
|
+
bm25: list[RetrievalResult]
|
|
32
|
+
graph: list[RetrievalResult]
|
|
33
|
+
temporal: list[RetrievalResult] | None
|
|
34
|
+
timings: dict[str, float] = field(default_factory=dict)
|
|
35
|
+
temporal_constraint: tuple | None = None # (start_date, end_date)
|
|
34
36
|
|
|
35
37
|
|
|
36
38
|
# Default graph retriever instance (can be overridden)
|
|
37
|
-
_default_graph_retriever:
|
|
39
|
+
_default_graph_retriever: GraphRetriever | None = None
|
|
38
40
|
|
|
39
41
|
|
|
40
42
|
def get_default_graph_retriever() -> GraphRetriever:
|
|
@@ -62,12 +64,8 @@ def set_default_graph_retriever(retriever: GraphRetriever) -> None:
|
|
|
62
64
|
|
|
63
65
|
|
|
64
66
|
async def retrieve_semantic(
|
|
65
|
-
conn,
|
|
66
|
-
|
|
67
|
-
bank_id: str,
|
|
68
|
-
fact_type: str,
|
|
69
|
-
limit: int
|
|
70
|
-
) -> List[RetrievalResult]:
|
|
67
|
+
conn, query_emb_str: str, bank_id: str, fact_type: str, limit: int
|
|
68
|
+
) -> list[RetrievalResult]:
|
|
71
69
|
"""
|
|
72
70
|
Semantic retrieval via vector similarity.
|
|
73
71
|
|
|
@@ -93,18 +91,15 @@ async def retrieve_semantic(
|
|
|
93
91
|
ORDER BY embedding <=> $1::vector
|
|
94
92
|
LIMIT $4
|
|
95
93
|
""",
|
|
96
|
-
query_emb_str,
|
|
94
|
+
query_emb_str,
|
|
95
|
+
bank_id,
|
|
96
|
+
fact_type,
|
|
97
|
+
limit,
|
|
97
98
|
)
|
|
98
99
|
return [RetrievalResult.from_db_row(dict(r)) for r in results]
|
|
99
100
|
|
|
100
101
|
|
|
101
|
-
async def retrieve_bm25(
|
|
102
|
-
conn,
|
|
103
|
-
query_text: str,
|
|
104
|
-
bank_id: str,
|
|
105
|
-
fact_type: str,
|
|
106
|
-
limit: int
|
|
107
|
-
) -> List[RetrievalResult]:
|
|
102
|
+
async def retrieve_bm25(conn, query_text: str, bank_id: str, fact_type: str, limit: int) -> list[RetrievalResult]:
|
|
108
103
|
"""
|
|
109
104
|
BM25 keyword retrieval via full-text search.
|
|
110
105
|
|
|
@@ -122,7 +117,7 @@ async def retrieve_bm25(
|
|
|
122
117
|
|
|
123
118
|
# Sanitize query text: remove special characters that have meaning in tsquery
|
|
124
119
|
# Keep only alphanumeric characters and spaces
|
|
125
|
-
sanitized_text = re.sub(r
|
|
120
|
+
sanitized_text = re.sub(r"[^\w\s]", " ", query_text.lower())
|
|
126
121
|
|
|
127
122
|
# Split and filter empty strings
|
|
128
123
|
tokens = [token for token in sanitized_text.split() if token]
|
|
@@ -146,7 +141,10 @@ async def retrieve_bm25(
|
|
|
146
141
|
ORDER BY bm25_score DESC
|
|
147
142
|
LIMIT $4
|
|
148
143
|
""",
|
|
149
|
-
query_tsquery,
|
|
144
|
+
query_tsquery,
|
|
145
|
+
bank_id,
|
|
146
|
+
fact_type,
|
|
147
|
+
limit,
|
|
150
148
|
)
|
|
151
149
|
return [RetrievalResult.from_db_row(dict(r)) for r in results]
|
|
152
150
|
|
|
@@ -159,8 +157,8 @@ async def retrieve_temporal(
|
|
|
159
157
|
start_date: datetime,
|
|
160
158
|
end_date: datetime,
|
|
161
159
|
budget: int,
|
|
162
|
-
semantic_threshold: float = 0.1
|
|
163
|
-
) ->
|
|
160
|
+
semantic_threshold: float = 0.1,
|
|
161
|
+
) -> list[RetrievalResult]:
|
|
164
162
|
"""
|
|
165
163
|
Temporal retrieval with spreading activation.
|
|
166
164
|
|
|
@@ -182,13 +180,12 @@ async def retrieve_temporal(
|
|
|
182
180
|
Returns:
|
|
183
181
|
List of RetrievalResult objects with temporal scores
|
|
184
182
|
"""
|
|
185
|
-
from datetime import timezone
|
|
186
183
|
|
|
187
184
|
# Ensure start_date and end_date are timezone-aware (UTC) to match database datetimes
|
|
188
185
|
if start_date.tzinfo is None:
|
|
189
|
-
start_date = start_date.replace(tzinfo=
|
|
186
|
+
start_date = start_date.replace(tzinfo=UTC)
|
|
190
187
|
if end_date.tzinfo is None:
|
|
191
|
-
end_date = end_date.replace(tzinfo=
|
|
188
|
+
end_date = end_date.replace(tzinfo=UTC)
|
|
192
189
|
|
|
193
190
|
entry_points = await conn.fetch(
|
|
194
191
|
"""
|
|
@@ -215,7 +212,12 @@ async def retrieve_temporal(
|
|
|
215
212
|
ORDER BY COALESCE(occurred_start, mentioned_at, occurred_end) DESC, (embedding <=> $1::vector) ASC
|
|
216
213
|
LIMIT 10
|
|
217
214
|
""",
|
|
218
|
-
query_emb_str,
|
|
215
|
+
query_emb_str,
|
|
216
|
+
bank_id,
|
|
217
|
+
fact_type,
|
|
218
|
+
start_date,
|
|
219
|
+
end_date,
|
|
220
|
+
semantic_threshold,
|
|
219
221
|
)
|
|
220
222
|
|
|
221
223
|
if not entry_points:
|
|
@@ -258,7 +260,9 @@ async def retrieve_temporal(
|
|
|
258
260
|
results.append(ep_result)
|
|
259
261
|
|
|
260
262
|
# Spread through temporal links
|
|
261
|
-
queue = [
|
|
263
|
+
queue = [
|
|
264
|
+
(RetrievalResult.from_db_row(dict(ep)), ep["similarity"], 1.0) for ep in entry_points
|
|
265
|
+
] # (unit, semantic_sim, temporal_score)
|
|
262
266
|
budget_remaining = budget - len(entry_points)
|
|
263
267
|
|
|
264
268
|
while queue and budget_remaining > 0:
|
|
@@ -283,7 +287,10 @@ async def retrieve_temporal(
|
|
|
283
287
|
ORDER BY ml.weight DESC
|
|
284
288
|
LIMIT 10
|
|
285
289
|
""",
|
|
286
|
-
query_emb_str,
|
|
290
|
+
query_emb_str,
|
|
291
|
+
current.id,
|
|
292
|
+
fact_type,
|
|
293
|
+
semantic_threshold,
|
|
287
294
|
)
|
|
288
295
|
|
|
289
296
|
for n in neighbors:
|
|
@@ -307,7 +314,9 @@ async def retrieve_temporal(
|
|
|
307
314
|
|
|
308
315
|
if neighbor_best_date:
|
|
309
316
|
days_from_mid = abs((neighbor_best_date - mid_date).total_seconds() / 86400)
|
|
310
|
-
neighbor_temporal_proximity =
|
|
317
|
+
neighbor_temporal_proximity = (
|
|
318
|
+
1.0 - min(days_from_mid / (total_days / 2), 1.0) if total_days > 0 else 1.0
|
|
319
|
+
)
|
|
311
320
|
else:
|
|
312
321
|
neighbor_temporal_proximity = 0.3 # Lower score if no temporal data
|
|
313
322
|
|
|
@@ -349,9 +358,9 @@ async def retrieve_parallel(
|
|
|
349
358
|
bank_id: str,
|
|
350
359
|
fact_type: str,
|
|
351
360
|
thinking_budget: int,
|
|
352
|
-
question_date:
|
|
361
|
+
question_date: datetime | None = None,
|
|
353
362
|
query_analyzer: Optional["QueryAnalyzer"] = None,
|
|
354
|
-
graph_retriever:
|
|
363
|
+
graph_retriever: GraphRetriever | None = None,
|
|
355
364
|
) -> ParallelRetrievalResult:
|
|
356
365
|
"""
|
|
357
366
|
Run 3-way or 4-way parallel retrieval (adds temporal if detected).
|
|
@@ -372,29 +381,26 @@ async def retrieve_parallel(
|
|
|
372
381
|
"""
|
|
373
382
|
from .temporal_extraction import extract_temporal_constraint
|
|
374
383
|
|
|
375
|
-
temporal_constraint = extract_temporal_constraint(
|
|
376
|
-
query_text, reference_date=question_date, analyzer=query_analyzer
|
|
377
|
-
)
|
|
384
|
+
temporal_constraint = extract_temporal_constraint(query_text, reference_date=question_date, analyzer=query_analyzer)
|
|
378
385
|
|
|
379
386
|
retriever = graph_retriever or get_default_graph_retriever()
|
|
380
387
|
|
|
381
388
|
if retriever.name == "mpfp":
|
|
382
389
|
return await _retrieve_parallel_mpfp(
|
|
383
|
-
pool, query_text, query_embedding_str, bank_id, fact_type,
|
|
384
|
-
thinking_budget, temporal_constraint, retriever
|
|
390
|
+
pool, query_text, query_embedding_str, bank_id, fact_type, thinking_budget, temporal_constraint, retriever
|
|
385
391
|
)
|
|
386
392
|
else:
|
|
387
393
|
return await _retrieve_parallel_bfs(
|
|
388
|
-
pool, query_text, query_embedding_str, bank_id, fact_type,
|
|
389
|
-
thinking_budget, temporal_constraint, retriever
|
|
394
|
+
pool, query_text, query_embedding_str, bank_id, fact_type, thinking_budget, temporal_constraint, retriever
|
|
390
395
|
)
|
|
391
396
|
|
|
392
397
|
|
|
393
398
|
@dataclass
|
|
394
399
|
class _SemanticGraphResult:
|
|
395
400
|
"""Internal result from semantic→graph chain."""
|
|
396
|
-
|
|
397
|
-
|
|
401
|
+
|
|
402
|
+
semantic: list[RetrievalResult]
|
|
403
|
+
graph: list[RetrievalResult]
|
|
398
404
|
semantic_time: float
|
|
399
405
|
graph_time: float
|
|
400
406
|
|
|
@@ -402,7 +408,8 @@ class _SemanticGraphResult:
|
|
|
402
408
|
@dataclass
|
|
403
409
|
class _TimedResult:
|
|
404
410
|
"""Internal result with timing."""
|
|
405
|
-
|
|
411
|
+
|
|
412
|
+
results: list[RetrievalResult]
|
|
406
413
|
time: float
|
|
407
414
|
|
|
408
415
|
|
|
@@ -413,7 +420,7 @@ async def _retrieve_parallel_mpfp(
|
|
|
413
420
|
bank_id: str,
|
|
414
421
|
fact_type: str,
|
|
415
422
|
thinking_budget: int,
|
|
416
|
-
temporal_constraint:
|
|
423
|
+
temporal_constraint: tuple | None,
|
|
417
424
|
retriever: GraphRetriever,
|
|
418
425
|
) -> ParallelRetrievalResult:
|
|
419
426
|
"""
|
|
@@ -430,9 +437,7 @@ async def _retrieve_parallel_mpfp(
|
|
|
430
437
|
"""Chain: semantic retrieval → graph retrieval (using semantic as seeds)."""
|
|
431
438
|
start = time.time()
|
|
432
439
|
async with acquire_with_retry(pool) as conn:
|
|
433
|
-
semantic = await retrieve_semantic(
|
|
434
|
-
conn, query_embedding_str, bank_id, fact_type, limit=thinking_budget
|
|
435
|
-
)
|
|
440
|
+
semantic = await retrieve_semantic(conn, query_embedding_str, bank_id, fact_type, limit=thinking_budget)
|
|
436
441
|
semantic_time = time.time() - start
|
|
437
442
|
|
|
438
443
|
# Get temporal seeds if needed (quick query, part of this chain)
|
|
@@ -441,8 +446,7 @@ async def _retrieve_parallel_mpfp(
|
|
|
441
446
|
tc_start, tc_end = temporal_constraint
|
|
442
447
|
async with acquire_with_retry(pool) as conn:
|
|
443
448
|
temporal_seeds = await _get_temporal_entry_points(
|
|
444
|
-
conn, query_embedding_str, bank_id, fact_type,
|
|
445
|
-
tc_start, tc_end, limit=20
|
|
449
|
+
conn, query_embedding_str, bank_id, fact_type, tc_start, tc_end, limit=20
|
|
446
450
|
)
|
|
447
451
|
|
|
448
452
|
# Run graph with seeds
|
|
@@ -473,8 +477,14 @@ async def _retrieve_parallel_mpfp(
|
|
|
473
477
|
start = time.time()
|
|
474
478
|
async with acquire_with_retry(pool) as conn:
|
|
475
479
|
results = await retrieve_temporal(
|
|
476
|
-
conn,
|
|
477
|
-
|
|
480
|
+
conn,
|
|
481
|
+
query_embedding_str,
|
|
482
|
+
bank_id,
|
|
483
|
+
fact_type,
|
|
484
|
+
tc_start,
|
|
485
|
+
tc_end,
|
|
486
|
+
budget=thinking_budget,
|
|
487
|
+
semantic_threshold=0.1,
|
|
478
488
|
)
|
|
479
489
|
return _TimedResult(results, time.time() - start)
|
|
480
490
|
|
|
@@ -527,14 +537,13 @@ async def _get_temporal_entry_points(
|
|
|
527
537
|
end_date: datetime,
|
|
528
538
|
limit: int = 20,
|
|
529
539
|
semantic_threshold: float = 0.1,
|
|
530
|
-
) ->
|
|
540
|
+
) -> list[RetrievalResult]:
|
|
531
541
|
"""Get temporal entry points (facts in date range with semantic relevance)."""
|
|
532
|
-
from datetime import timezone
|
|
533
542
|
|
|
534
543
|
if start_date.tzinfo is None:
|
|
535
|
-
start_date = start_date.replace(tzinfo=
|
|
544
|
+
start_date = start_date.replace(tzinfo=UTC)
|
|
536
545
|
if end_date.tzinfo is None:
|
|
537
|
-
end_date = end_date.replace(tzinfo=
|
|
546
|
+
end_date = end_date.replace(tzinfo=UTC)
|
|
538
547
|
|
|
539
548
|
rows = await conn.fetch(
|
|
540
549
|
"""
|
|
@@ -557,7 +566,13 @@ async def _get_temporal_entry_points(
|
|
|
557
566
|
(embedding <=> $1::vector) ASC
|
|
558
567
|
LIMIT $7
|
|
559
568
|
""",
|
|
560
|
-
query_embedding_str,
|
|
569
|
+
query_embedding_str,
|
|
570
|
+
bank_id,
|
|
571
|
+
fact_type,
|
|
572
|
+
start_date,
|
|
573
|
+
end_date,
|
|
574
|
+
semantic_threshold,
|
|
575
|
+
limit,
|
|
561
576
|
)
|
|
562
577
|
|
|
563
578
|
results = []
|
|
@@ -597,7 +612,7 @@ async def _retrieve_parallel_bfs(
|
|
|
597
612
|
bank_id: str,
|
|
598
613
|
fact_type: str,
|
|
599
614
|
thinking_budget: int,
|
|
600
|
-
temporal_constraint:
|
|
615
|
+
temporal_constraint: tuple | None,
|
|
601
616
|
retriever: GraphRetriever,
|
|
602
617
|
) -> ParallelRetrievalResult:
|
|
603
618
|
"""BFS retrieval: all methods run in parallel (original behavior)."""
|
|
@@ -631,8 +646,14 @@ async def _retrieve_parallel_bfs(
|
|
|
631
646
|
start = time.time()
|
|
632
647
|
async with acquire_with_retry(pool) as conn:
|
|
633
648
|
results = await retrieve_temporal(
|
|
634
|
-
conn,
|
|
635
|
-
|
|
649
|
+
conn,
|
|
650
|
+
query_embedding_str,
|
|
651
|
+
bank_id,
|
|
652
|
+
fact_type,
|
|
653
|
+
tc_start,
|
|
654
|
+
tc_end,
|
|
655
|
+
budget=thinking_budget,
|
|
656
|
+
semantic_threshold=0.1,
|
|
636
657
|
)
|
|
637
658
|
return _TimedResult(results, time.time() - start)
|
|
638
659
|
|
|
@@ -4,11 +4,11 @@ Scoring functions for memory search and retrieval.
|
|
|
4
4
|
Includes recency weighting, frequency weighting, temporal proximity,
|
|
5
5
|
and similarity calculations used in memory activation and ranking.
|
|
6
6
|
"""
|
|
7
|
+
|
|
7
8
|
from datetime import datetime
|
|
8
|
-
from typing import List
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
def cosine_similarity(vec1:
|
|
11
|
+
def cosine_similarity(vec1: list[float], vec2: list[float]) -> float:
|
|
12
12
|
"""
|
|
13
13
|
Calculate cosine similarity between two vectors.
|
|
14
14
|
|
|
@@ -58,6 +58,7 @@ def calculate_recency_weight(days_since: float, half_life_days: float = 365.0) -
|
|
|
58
58
|
Weight between 0 and 1
|
|
59
59
|
"""
|
|
60
60
|
import math
|
|
61
|
+
|
|
61
62
|
# Logarithmic decay: 1 / (1 + log(1 + days_since/half_life))
|
|
62
63
|
# This decays much slower than exponential, giving better long-term differentiation
|
|
63
64
|
normalized_age = days_since / half_life_days
|
|
@@ -79,6 +80,7 @@ def calculate_frequency_weight(access_count: int, max_boost: float = 2.0) -> flo
|
|
|
79
80
|
Weight between 1.0 and max_boost
|
|
80
81
|
"""
|
|
81
82
|
import math
|
|
83
|
+
|
|
82
84
|
if access_count <= 0:
|
|
83
85
|
return 1.0
|
|
84
86
|
|
|
@@ -116,11 +118,7 @@ def calculate_temporal_anchor(occurred_start: datetime, occurred_end: datetime)
|
|
|
116
118
|
return midpoint
|
|
117
119
|
|
|
118
120
|
|
|
119
|
-
def calculate_temporal_proximity(
|
|
120
|
-
anchor_a: datetime,
|
|
121
|
-
anchor_b: datetime,
|
|
122
|
-
half_life_days: float = 30.0
|
|
123
|
-
) -> float:
|
|
121
|
+
def calculate_temporal_proximity(anchor_a: datetime, anchor_b: datetime, half_life_days: float = 30.0) -> float:
|
|
124
122
|
"""
|
|
125
123
|
Calculate temporal proximity between two temporal anchors.
|
|
126
124
|
|
|
@@ -4,16 +4,16 @@ Temporal extraction for time-aware search queries.
|
|
|
4
4
|
Handles natural language temporal expressions using transformer-based query analysis.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
-
from typing import Optional, Tuple
|
|
8
|
-
from datetime import datetime
|
|
9
7
|
import logging
|
|
10
|
-
from
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
|
|
10
|
+
from hindsight_api.engine.query_analyzer import DateparserQueryAnalyzer, QueryAnalyzer
|
|
11
11
|
|
|
12
12
|
logger = logging.getLogger(__name__)
|
|
13
13
|
|
|
14
14
|
# Global default analyzer instance
|
|
15
15
|
# Can be overridden by passing a custom analyzer to extract_temporal_constraint
|
|
16
|
-
_default_analyzer:
|
|
16
|
+
_default_analyzer: QueryAnalyzer | None = None
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
def get_default_analyzer() -> QueryAnalyzer:
|
|
@@ -33,9 +33,9 @@ def get_default_analyzer() -> QueryAnalyzer:
|
|
|
33
33
|
|
|
34
34
|
def extract_temporal_constraint(
|
|
35
35
|
query: str,
|
|
36
|
-
reference_date:
|
|
37
|
-
analyzer:
|
|
38
|
-
) ->
|
|
36
|
+
reference_date: datetime | None = None,
|
|
37
|
+
analyzer: QueryAnalyzer | None = None,
|
|
38
|
+
) -> tuple[datetime, datetime] | None:
|
|
39
39
|
"""
|
|
40
40
|
Extract temporal constraint from query.
|
|
41
41
|
|
|
@@ -55,10 +55,7 @@ def extract_temporal_constraint(
|
|
|
55
55
|
analysis = analyzer.analyze(query, reference_date)
|
|
56
56
|
|
|
57
57
|
if analysis.temporal_constraint:
|
|
58
|
-
result = (
|
|
59
|
-
analysis.temporal_constraint.start_date,
|
|
60
|
-
analysis.temporal_constraint.end_date
|
|
61
|
-
)
|
|
58
|
+
result = (analysis.temporal_constraint.start_date, analysis.temporal_constraint.end_date)
|
|
62
59
|
return result
|
|
63
60
|
|
|
64
61
|
return None
|