py-context-graph 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- decision_graph/__init__.py +8 -0
- decision_graph/backends/__init__.py +0 -0
- decision_graph/backends/firestore/__init__.py +41 -0
- decision_graph/backends/firestore/stores.py +254 -0
- decision_graph/backends/memory/__init__.py +30 -0
- decision_graph/backends/memory/stores.py +323 -0
- decision_graph/clustering_service.py +301 -0
- decision_graph/context_graph/__init__.py +0 -0
- decision_graph/context_graph/planner.py +102 -0
- decision_graph/context_graph/post_processing.py +247 -0
- decision_graph/context_graph/registry.py +35 -0
- decision_graph/context_graph/service.py +360 -0
- decision_graph/context_graph/templates.py +138 -0
- decision_graph/context_retrieval.py +298 -0
- decision_graph/core/__init__.py +0 -0
- decision_graph/core/config.py +44 -0
- decision_graph/core/decision_trace_profiles.py +76 -0
- decision_graph/core/domain.py +307 -0
- decision_graph/core/interfaces.py +160 -0
- decision_graph/core/matching.py +383 -0
- decision_graph/core/registry.py +35 -0
- decision_graph/decision_enrichment.py +22 -0
- decision_graph/decision_trace_pipeline.py +293 -0
- decision_graph/enrichment_service.py +209 -0
- decision_graph/extraction_service.py +50 -0
- decision_graph/graph.py +51 -0
- decision_graph/ingestion.py +171 -0
- decision_graph/llm/__init__.py +3 -0
- decision_graph/llm/litellm_adapter.py +63 -0
- decision_graph/markdown_chunker.py +50 -0
- decision_graph/prompt_loader.py +19 -0
- decision_graph/prompts/decision_enrichment.txt +35 -0
- decision_graph/prompts/decision_trace.txt +177 -0
- decision_graph/py.typed +0 -0
- decision_graph/retrieval.py +274 -0
- decision_graph/services.py +362 -0
- decision_graph/visualization.py +133 -0
- py_context_graph-0.1.0.dist-info/METADATA +271 -0
- py_context_graph-0.1.0.dist-info/RECORD +41 -0
- py_context_graph-0.1.0.dist-info/WHEEL +4 -0
- py_context_graph-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""Decision graph / cross-conversation decision intelligence flow."""
|
|
2
|
+
|
|
3
|
+
from decision_graph.core.config import LLMConfig
|
|
4
|
+
from decision_graph.core.interfaces import LLMAdapter
|
|
5
|
+
from decision_graph.graph import DecisionGraph
|
|
6
|
+
from decision_graph.llm import LiteLLMAdapter
|
|
7
|
+
|
|
8
|
+
__all__ = ["DecisionGraph", "LLMAdapter", "LLMConfig", "LiteLLMAdapter"]
|
|
File without changes
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""Simple Firestore backend for decision_graph.
|
|
2
|
+
|
|
3
|
+
Usage::
|
|
4
|
+
|
|
5
|
+
from google.cloud import firestore
|
|
6
|
+
from decision_graph.backends.firestore import FirestoreBackend
|
|
7
|
+
|
|
8
|
+
client = firestore.Client()
|
|
9
|
+
backend = FirestoreBackend(client=client, collection_prefix="myapp_")
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from decision_graph.backends.firestore.stores import (
|
|
13
|
+
FirestoreClusterStore,
|
|
14
|
+
FirestoreEnrichmentStore,
|
|
15
|
+
FirestoreLinkStore,
|
|
16
|
+
FirestoreProjectionStore,
|
|
17
|
+
)
|
|
18
|
+
from decision_graph.core.registry import StorageBackend
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class FirestoreBackend(StorageBackend):
|
|
22
|
+
"""Standalone Firestore backend — requires only a ``google.cloud.firestore.Client``."""
|
|
23
|
+
|
|
24
|
+
def __init__(self, *, client, collection_prefix: str = ""):
|
|
25
|
+
self._client = client
|
|
26
|
+
self._prefix = collection_prefix
|
|
27
|
+
|
|
28
|
+
def _col(self, name: str) -> str:
|
|
29
|
+
return f"{self._prefix}{name}"
|
|
30
|
+
|
|
31
|
+
def enrichment_store(self):
|
|
32
|
+
return FirestoreEnrichmentStore(self._client, self._col("decision_enrichments"))
|
|
33
|
+
|
|
34
|
+
def projection_store(self):
|
|
35
|
+
return FirestoreProjectionStore(self._client, self._col("decision_projections"))
|
|
36
|
+
|
|
37
|
+
def cluster_store(self):
|
|
38
|
+
return FirestoreClusterStore(self._client, self._col("decision_clusters"))
|
|
39
|
+
|
|
40
|
+
def link_store(self):
|
|
41
|
+
return FirestoreLinkStore(self._client, self._col("decision_links"))
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
"""Simple Firestore implementations of the decision graph store protocols.
|
|
2
|
+
|
|
3
|
+
No BaseDAO, no Singleton, no encryption. Just a Firestore client and collection names.
|
|
4
|
+
Users provide their own ``google.cloud.firestore.Client`` instance.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import time
|
|
8
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
9
|
+
|
|
10
|
+
from decision_graph.core.interfaces import (
|
|
11
|
+
ClusterStore,
|
|
12
|
+
EnrichmentStore,
|
|
13
|
+
LinkStore,
|
|
14
|
+
ProjectionStore,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class FirestoreEnrichmentStore(EnrichmentStore):
|
|
19
|
+
def __init__(self, client, collection_name: str):
|
|
20
|
+
self._client = client
|
|
21
|
+
self._collection_name = collection_name
|
|
22
|
+
|
|
23
|
+
def _col(self):
|
|
24
|
+
return self._client.collection(self._collection_name)
|
|
25
|
+
|
|
26
|
+
def find_by_id(self, decision_id: str) -> Optional[dict]:
|
|
27
|
+
snap = self._col().document(decision_id).get()
|
|
28
|
+
return snap.to_dict() if snap.exists else None
|
|
29
|
+
|
|
30
|
+
def find_by_ids(self, ids: List[str]) -> Dict[str, dict]:
|
|
31
|
+
if not ids:
|
|
32
|
+
return {}
|
|
33
|
+
refs = [self._col().document(did) for did in ids]
|
|
34
|
+
result = {}
|
|
35
|
+
for snap in self._client.get_all(refs):
|
|
36
|
+
if snap.exists:
|
|
37
|
+
result[snap.id] = snap.to_dict()
|
|
38
|
+
return result
|
|
39
|
+
|
|
40
|
+
async def find_by_ids_async(self, ids: List[str]) -> Dict[str, dict]:
|
|
41
|
+
return self.find_by_ids(ids)
|
|
42
|
+
|
|
43
|
+
def save(self, decision_id: str, data: dict) -> None:
|
|
44
|
+
self._col().document(decision_id).set(data)
|
|
45
|
+
|
|
46
|
+
def upsert(self, decision_id: str, data: dict) -> None:
|
|
47
|
+
self._col().document(decision_id).set(data, merge=True)
|
|
48
|
+
|
|
49
|
+
def query(
|
|
50
|
+
self,
|
|
51
|
+
filters: List[Tuple[str, str, Any]],
|
|
52
|
+
order_by: Optional[List[Tuple[str, str]]] = None,
|
|
53
|
+
limit: int = 200,
|
|
54
|
+
) -> List[dict]:
|
|
55
|
+
in_filters = [(f, v) for f, op, v in filters if op == "in"]
|
|
56
|
+
regular_filters = [(f, op, v) for f, op, v in filters if op != "in"]
|
|
57
|
+
|
|
58
|
+
if in_filters:
|
|
59
|
+
all_rows: List[dict] = []
|
|
60
|
+
for field, values in in_filters:
|
|
61
|
+
for i in range(0, len(values), 30):
|
|
62
|
+
chunk = values[i : i + 30]
|
|
63
|
+
q = self._col()
|
|
64
|
+
for rf, rop, rv in regular_filters:
|
|
65
|
+
q = q.where(rf, rop, rv)
|
|
66
|
+
q = q.where(field, "in", chunk)
|
|
67
|
+
if order_by:
|
|
68
|
+
for ob_field, ob_dir in order_by:
|
|
69
|
+
q = q.order_by(ob_field, direction=ob_dir)
|
|
70
|
+
q = q.limit(limit)
|
|
71
|
+
all_rows.extend(doc.to_dict() for doc in q.stream())
|
|
72
|
+
return all_rows[:limit]
|
|
73
|
+
|
|
74
|
+
q = self._col()
|
|
75
|
+
for f, op, v in filters:
|
|
76
|
+
q = q.where(f, op, v)
|
|
77
|
+
if order_by:
|
|
78
|
+
for ob_field, ob_dir in order_by:
|
|
79
|
+
q = q.order_by(ob_field, direction=ob_dir)
|
|
80
|
+
q = q.limit(limit)
|
|
81
|
+
return [doc.to_dict() for doc in q.stream()]
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class FirestoreProjectionStore(ProjectionStore):
|
|
85
|
+
def __init__(self, client, collection_name: str):
|
|
86
|
+
self._client = client
|
|
87
|
+
self._collection_name = collection_name
|
|
88
|
+
|
|
89
|
+
def _col(self):
|
|
90
|
+
return self._client.collection(self._collection_name)
|
|
91
|
+
|
|
92
|
+
def find_by_id(self, pid: str) -> Optional[dict]:
|
|
93
|
+
snap = self._col().document(pid).get()
|
|
94
|
+
return snap.to_dict() if snap.exists else None
|
|
95
|
+
|
|
96
|
+
def find_by_ids(self, ids: List[str]) -> Dict[str, dict]:
|
|
97
|
+
if not ids:
|
|
98
|
+
return {}
|
|
99
|
+
refs = [self._col().document(pid) for pid in ids]
|
|
100
|
+
result = {}
|
|
101
|
+
for snap in self._client.get_all(refs):
|
|
102
|
+
if snap.exists:
|
|
103
|
+
result[snap.id] = snap.to_dict()
|
|
104
|
+
return result
|
|
105
|
+
|
|
106
|
+
def find_by_conv_ids(self, cids: List[str], proj_type: str) -> List[dict]:
|
|
107
|
+
cid_set = set(cids)
|
|
108
|
+
q = self._col().where("proj_type", "==", proj_type).where("valid", "==", True)
|
|
109
|
+
return [doc.to_dict() for doc in q.stream() if doc.to_dict().get("cid") in cid_set]
|
|
110
|
+
|
|
111
|
+
async def find_by_filters(
|
|
112
|
+
self,
|
|
113
|
+
*,
|
|
114
|
+
gids: List[str],
|
|
115
|
+
proj_type: str,
|
|
116
|
+
last_n_days: Optional[int] = None,
|
|
117
|
+
limit: Optional[int] = None,
|
|
118
|
+
before_ts: Optional[float] = None,
|
|
119
|
+
) -> List[dict]:
|
|
120
|
+
q = self._col().where("proj_type", "==", proj_type).where("valid", "==", True)
|
|
121
|
+
rows = []
|
|
122
|
+
gid_set = set(gids)
|
|
123
|
+
for doc in q.stream():
|
|
124
|
+
data = doc.to_dict()
|
|
125
|
+
if data.get("gid") not in gid_set:
|
|
126
|
+
continue
|
|
127
|
+
rows.append(data)
|
|
128
|
+
|
|
129
|
+
if last_n_days is not None:
|
|
130
|
+
cutoff = time.time() - (last_n_days * 86400)
|
|
131
|
+
rows = [r for r in rows if (r.get("updated_at") or r.get("created_at") or 0) >= cutoff]
|
|
132
|
+
if before_ts is not None:
|
|
133
|
+
rows = [r for r in rows if (r.get("updated_at") or r.get("created_at") or 0) < before_ts]
|
|
134
|
+
rows.sort(key=lambda r: r.get("updated_at") or r.get("created_at") or 0, reverse=True)
|
|
135
|
+
if limit:
|
|
136
|
+
rows = rows[:limit]
|
|
137
|
+
return rows
|
|
138
|
+
|
|
139
|
+
def query(
|
|
140
|
+
self,
|
|
141
|
+
filters: List[Tuple[str, str, Any]],
|
|
142
|
+
order_by: Optional[List[Tuple[str, str]]] = None,
|
|
143
|
+
limit: int = 200,
|
|
144
|
+
) -> List[dict]:
|
|
145
|
+
q = self._col()
|
|
146
|
+
for f, op, v in filters:
|
|
147
|
+
q = q.where(f, op, v)
|
|
148
|
+
if order_by:
|
|
149
|
+
for ob_field, ob_dir in order_by:
|
|
150
|
+
q = q.order_by(ob_field, direction=ob_dir)
|
|
151
|
+
q = q.limit(limit)
|
|
152
|
+
return [doc.to_dict() for doc in q.stream()]
|
|
153
|
+
|
|
154
|
+
def invalidate(self, pid: str) -> None:
|
|
155
|
+
self._col().document(pid).update({"valid": False})
|
|
156
|
+
|
|
157
|
+
def save(self, *, pid: str, gid: str, cid: str, proj_type: str, projection: dict, msg_ts: int) -> bool:
|
|
158
|
+
doc_ref = self._col().document(pid)
|
|
159
|
+
snap = doc_ref.get()
|
|
160
|
+
if snap.exists:
|
|
161
|
+
return False
|
|
162
|
+
doc_ref.set(
|
|
163
|
+
{
|
|
164
|
+
"pid": pid,
|
|
165
|
+
"gid": gid,
|
|
166
|
+
"cid": cid,
|
|
167
|
+
"proj_type": proj_type,
|
|
168
|
+
"projection": projection,
|
|
169
|
+
"created_at": msg_ts,
|
|
170
|
+
"updated_at": msg_ts,
|
|
171
|
+
"valid": True,
|
|
172
|
+
}
|
|
173
|
+
)
|
|
174
|
+
return True
|
|
175
|
+
|
|
176
|
+
def update(self, *, pid: str, projection: dict, update_type: str, msg_ts: int) -> dict:
|
|
177
|
+
doc_ref = self._col().document(pid)
|
|
178
|
+
doc_ref.update({"projection": projection, "updated_at": msg_ts})
|
|
179
|
+
snap = doc_ref.get()
|
|
180
|
+
return snap.to_dict() if snap.exists else {}
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
class FirestoreClusterStore(ClusterStore):
|
|
184
|
+
def __init__(self, client, collection_name: str):
|
|
185
|
+
self._client = client
|
|
186
|
+
self._collection_name = collection_name
|
|
187
|
+
|
|
188
|
+
def _col(self):
|
|
189
|
+
return self._client.collection(self._collection_name)
|
|
190
|
+
|
|
191
|
+
def create(self, data: dict) -> str:
|
|
192
|
+
cluster_id = data.get("cluster_id", "")
|
|
193
|
+
self._col().document(cluster_id).set(data)
|
|
194
|
+
return cluster_id
|
|
195
|
+
|
|
196
|
+
def update(self, cluster_id: str, updates: dict) -> None:
|
|
197
|
+
self._col().document(cluster_id).update(updates)
|
|
198
|
+
|
|
199
|
+
def find_by_id(self, cluster_id: str) -> Optional[dict]:
|
|
200
|
+
snap = self._col().document(cluster_id).get()
|
|
201
|
+
return snap.to_dict() if snap.exists else None
|
|
202
|
+
|
|
203
|
+
def find_by_ids(self, cluster_ids: List[str]) -> List[dict]:
|
|
204
|
+
if not cluster_ids:
|
|
205
|
+
return []
|
|
206
|
+
refs = [self._col().document(cid) for cid in cluster_ids]
|
|
207
|
+
return [snap.to_dict() for snap in self._client.get_all(refs) if snap.exists]
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
class FirestoreLinkStore(LinkStore):
|
|
212
|
+
def __init__(self, client, collection_name: str):
|
|
213
|
+
self._client = client
|
|
214
|
+
self._collection_name = collection_name
|
|
215
|
+
|
|
216
|
+
def _col(self):
|
|
217
|
+
return self._client.collection(self._collection_name)
|
|
218
|
+
|
|
219
|
+
def save_batch(self, links: List[dict]) -> int:
|
|
220
|
+
if not links:
|
|
221
|
+
return 0
|
|
222
|
+
for link in links:
|
|
223
|
+
decision_id = link.get("decision_id", "")
|
|
224
|
+
self._col().document(decision_id).set(link)
|
|
225
|
+
return len(links)
|
|
226
|
+
|
|
227
|
+
def find_by_decision_id(self, decision_id: str) -> Optional[dict]:
|
|
228
|
+
snap = self._col().document(decision_id).get()
|
|
229
|
+
return snap.to_dict() if snap.exists else None
|
|
230
|
+
|
|
231
|
+
def find_by_cluster_id(self, cluster_id: str) -> List[dict]:
|
|
232
|
+
q = self._col().where("cluster_id", "==", cluster_id)
|
|
233
|
+
return [doc.to_dict() for doc in q.stream()]
|
|
234
|
+
|
|
235
|
+
def find_by_decision_ids(self, decision_ids: List[str]) -> Dict[str, dict]:
|
|
236
|
+
if not decision_ids:
|
|
237
|
+
return {}
|
|
238
|
+
refs = [self._col().document(did) for did in decision_ids]
|
|
239
|
+
return {snap.id: snap.to_dict() for snap in self._client.get_all(refs) if snap.exists}
|
|
240
|
+
|
|
241
|
+
def find_cluster_ids_by_gids(self, gids: List[str]) -> List[str]:
|
|
242
|
+
if not gids:
|
|
243
|
+
return []
|
|
244
|
+
gid_set = set(gids)
|
|
245
|
+
cluster_ids = set()
|
|
246
|
+
for doc in self._col().stream():
|
|
247
|
+
data = doc.to_dict()
|
|
248
|
+
if data.get("gid") in gid_set and data.get("cluster_id"):
|
|
249
|
+
cluster_ids.add(data["cluster_id"])
|
|
250
|
+
return list(cluster_ids)
|
|
251
|
+
|
|
252
|
+
def find_cluster_id_for_decision(self, decision_id: str) -> Optional[str]:
|
|
253
|
+
link = self.find_by_decision_id(decision_id)
|
|
254
|
+
return link.get("cluster_id") if link else None
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from decision_graph.backends.memory.stores import (
|
|
2
|
+
InMemoryClusterStore,
|
|
3
|
+
InMemoryEnrichmentStore,
|
|
4
|
+
InMemoryLinkStore,
|
|
5
|
+
InMemoryProjectionStore,
|
|
6
|
+
InMemoryVectorIndex,
|
|
7
|
+
)
|
|
8
|
+
from decision_graph.core.registry import StorageBackend
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class InMemoryBackend(StorageBackend):
|
|
12
|
+
"""Fully in-memory backend. Useful for testing and as a reference implementation."""
|
|
13
|
+
|
|
14
|
+
def __init__(self):
|
|
15
|
+
self._enrichment_store = InMemoryEnrichmentStore()
|
|
16
|
+
self._projection_store = InMemoryProjectionStore()
|
|
17
|
+
self._cluster_store = InMemoryClusterStore()
|
|
18
|
+
self._link_store = InMemoryLinkStore()
|
|
19
|
+
|
|
20
|
+
def enrichment_store(self):
|
|
21
|
+
return self._enrichment_store
|
|
22
|
+
|
|
23
|
+
def projection_store(self):
|
|
24
|
+
return self._projection_store
|
|
25
|
+
|
|
26
|
+
def cluster_store(self):
|
|
27
|
+
return self._cluster_store
|
|
28
|
+
|
|
29
|
+
def link_store(self):
|
|
30
|
+
return self._link_store
|
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
"""In-memory implementations of the decision graph store protocols."""
|
|
2
|
+
|
|
3
|
+
import math
|
|
4
|
+
import re
|
|
5
|
+
import time
|
|
6
|
+
from collections import Counter
|
|
7
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
8
|
+
|
|
9
|
+
import pandas as pd
|
|
10
|
+
|
|
11
|
+
from decision_graph.core.interfaces import (
|
|
12
|
+
ClusterStore,
|
|
13
|
+
EnrichmentStore,
|
|
14
|
+
LinkStore,
|
|
15
|
+
ProjectionStore,
|
|
16
|
+
)
|
|
17
|
+
from decision_graph.ingestion import breakdown_hydrated_clusters
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _get_nested(data: dict, field_path: str) -> Tuple[bool, Any]:
|
|
21
|
+
cur = data
|
|
22
|
+
for part in field_path.split("."):
|
|
23
|
+
if not isinstance(cur, dict) or part not in cur:
|
|
24
|
+
return False, None
|
|
25
|
+
cur = cur[part]
|
|
26
|
+
return True, cur
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _matches_filter(row: dict, field: str, op: str, value: Any) -> bool:
|
|
30
|
+
exists, field_value = _get_nested(row, field)
|
|
31
|
+
if not exists:
|
|
32
|
+
return False
|
|
33
|
+
if op == "==":
|
|
34
|
+
return field_value == value
|
|
35
|
+
if op == "!=":
|
|
36
|
+
return field_value != value
|
|
37
|
+
if op == "in":
|
|
38
|
+
return field_value in (value or [])
|
|
39
|
+
if op == "array_contains":
|
|
40
|
+
return isinstance(field_value, list) and value in field_value
|
|
41
|
+
if op == ">=":
|
|
42
|
+
return field_value >= value
|
|
43
|
+
if op == ">":
|
|
44
|
+
return field_value > value
|
|
45
|
+
if op == "<=":
|
|
46
|
+
return field_value <= value
|
|
47
|
+
if op == "<":
|
|
48
|
+
return field_value < value
|
|
49
|
+
raise ValueError(f"Unsupported filter op: {op}")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _apply_query(
|
|
53
|
+
data: Dict[str, dict],
|
|
54
|
+
filters: List[Tuple[str, str, Any]],
|
|
55
|
+
order_by: Optional[List[Tuple[str, str]]] = None,
|
|
56
|
+
limit: int = 200,
|
|
57
|
+
) -> List[dict]:
|
|
58
|
+
rows = list(data.values())
|
|
59
|
+
for field, op, value in filters:
|
|
60
|
+
rows = [r for r in rows if _matches_filter(r, field, op, value)]
|
|
61
|
+
if order_by:
|
|
62
|
+
for ob_field, ob_dir in reversed(order_by):
|
|
63
|
+
reverse = ob_dir.upper() == "DESCENDING"
|
|
64
|
+
rows = sorted(
|
|
65
|
+
rows,
|
|
66
|
+
key=lambda r, f=ob_field: (r.get(f) is None, r.get(f)),
|
|
67
|
+
reverse=reverse,
|
|
68
|
+
)
|
|
69
|
+
return rows[:limit]
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class InMemoryEnrichmentStore(EnrichmentStore):
|
|
73
|
+
def __init__(self):
|
|
74
|
+
self._data: Dict[str, dict] = {}
|
|
75
|
+
|
|
76
|
+
def find_by_id(self, decision_id: str) -> Optional[dict]:
|
|
77
|
+
return self._data.get(decision_id)
|
|
78
|
+
|
|
79
|
+
def find_by_ids(self, ids: List[str]) -> Dict[str, dict]:
|
|
80
|
+
return {did: self._data[did] for did in ids if did in self._data}
|
|
81
|
+
|
|
82
|
+
async def find_by_ids_async(self, ids: List[str]) -> Dict[str, dict]:
|
|
83
|
+
return self.find_by_ids(ids)
|
|
84
|
+
|
|
85
|
+
def save(self, decision_id: str, data: dict) -> None:
|
|
86
|
+
self._data[decision_id] = dict(data)
|
|
87
|
+
|
|
88
|
+
def upsert(self, decision_id: str, data: dict) -> None:
|
|
89
|
+
existing = self._data.get(decision_id, {})
|
|
90
|
+
existing.update(data)
|
|
91
|
+
self._data[decision_id] = existing
|
|
92
|
+
|
|
93
|
+
def query(
|
|
94
|
+
self,
|
|
95
|
+
filters: List[Tuple[str, str, Any]],
|
|
96
|
+
order_by: Optional[List[Tuple[str, str]]] = None,
|
|
97
|
+
limit: int = 200,
|
|
98
|
+
) -> List[dict]:
|
|
99
|
+
return _apply_query(self._data, filters, order_by, limit)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class InMemoryProjectionStore(ProjectionStore):
|
|
103
|
+
def __init__(self):
|
|
104
|
+
self._data: Dict[str, dict] = {}
|
|
105
|
+
|
|
106
|
+
def find_by_ids(self, ids: List[str]) -> Dict[str, dict]:
|
|
107
|
+
return {pid: self._data[pid] for pid in ids if pid in self._data}
|
|
108
|
+
|
|
109
|
+
def find_by_conv_ids(self, cids: List[str], proj_type: str) -> List[dict]:
|
|
110
|
+
cid_set = set(cids)
|
|
111
|
+
return [
|
|
112
|
+
doc
|
|
113
|
+
for doc in self._data.values()
|
|
114
|
+
if doc.get("cid") in cid_set and doc.get("proj_type") == proj_type and doc.get("valid", True)
|
|
115
|
+
]
|
|
116
|
+
|
|
117
|
+
async def find_by_filters(
|
|
118
|
+
self,
|
|
119
|
+
*,
|
|
120
|
+
gids: List[str],
|
|
121
|
+
proj_type: str,
|
|
122
|
+
last_n_days: Optional[int] = None,
|
|
123
|
+
limit: Optional[int] = None,
|
|
124
|
+
before_ts: Optional[float] = None,
|
|
125
|
+
) -> List[dict]:
|
|
126
|
+
gid_set = set(gids)
|
|
127
|
+
rows = [
|
|
128
|
+
doc
|
|
129
|
+
for doc in self._data.values()
|
|
130
|
+
if doc.get("gid") in gid_set and doc.get("proj_type") == proj_type and doc.get("valid", True)
|
|
131
|
+
]
|
|
132
|
+
if last_n_days is not None:
|
|
133
|
+
cutoff = time.time() - (last_n_days * 86400)
|
|
134
|
+
rows = [r for r in rows if (r.get("updated_at") or r.get("created_at") or 0) >= cutoff]
|
|
135
|
+
if before_ts is not None:
|
|
136
|
+
rows = [r for r in rows if (r.get("updated_at") or r.get("created_at") or 0) < before_ts]
|
|
137
|
+
rows.sort(key=lambda r: r.get("updated_at") or r.get("created_at") or 0, reverse=True)
|
|
138
|
+
if limit:
|
|
139
|
+
rows = rows[:limit]
|
|
140
|
+
return rows
|
|
141
|
+
|
|
142
|
+
def find_by_id(self, pid: str) -> Optional[dict]:
|
|
143
|
+
return self._data.get(pid)
|
|
144
|
+
|
|
145
|
+
def query(
|
|
146
|
+
self,
|
|
147
|
+
filters: List[Tuple[str, str, Any]],
|
|
148
|
+
order_by: Optional[List[Tuple[str, str]]] = None,
|
|
149
|
+
limit: int = 200,
|
|
150
|
+
) -> List[dict]:
|
|
151
|
+
return _apply_query(self._data, filters, order_by, limit)
|
|
152
|
+
|
|
153
|
+
def invalidate(self, pid: str) -> None:
|
|
154
|
+
if pid in self._data:
|
|
155
|
+
self._data[pid]["valid"] = False
|
|
156
|
+
|
|
157
|
+
def save(self, *, pid: str, gid: str, cid: str, proj_type: str, projection: dict, msg_ts: int) -> bool:
|
|
158
|
+
is_new = pid not in self._data
|
|
159
|
+
if is_new:
|
|
160
|
+
self._data[pid] = {
|
|
161
|
+
"pid": pid,
|
|
162
|
+
"gid": gid,
|
|
163
|
+
"cid": cid,
|
|
164
|
+
"proj_type": proj_type,
|
|
165
|
+
"projection": projection,
|
|
166
|
+
"created_at": msg_ts,
|
|
167
|
+
"updated_at": msg_ts,
|
|
168
|
+
"valid": True,
|
|
169
|
+
}
|
|
170
|
+
return is_new
|
|
171
|
+
|
|
172
|
+
def update(self, *, pid: str, projection: dict, update_type: str, msg_ts: int) -> dict:
|
|
173
|
+
if pid in self._data:
|
|
174
|
+
self._data[pid]["projection"] = projection
|
|
175
|
+
self._data[pid]["updated_at"] = msg_ts
|
|
176
|
+
return self._data.get(pid, {})
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
class InMemoryClusterStore(ClusterStore):
|
|
180
|
+
def __init__(self):
|
|
181
|
+
self._data: Dict[str, dict] = {}
|
|
182
|
+
|
|
183
|
+
def create(self, data: dict) -> str:
|
|
184
|
+
cluster_id = data.get("cluster_id", "")
|
|
185
|
+
self._data[cluster_id] = dict(data)
|
|
186
|
+
return cluster_id
|
|
187
|
+
|
|
188
|
+
def update(self, cluster_id: str, updates: dict) -> None:
|
|
189
|
+
if cluster_id in self._data:
|
|
190
|
+
self._data[cluster_id].update(updates)
|
|
191
|
+
|
|
192
|
+
def find_by_id(self, cluster_id: str) -> Optional[dict]:
|
|
193
|
+
return self._data.get(cluster_id)
|
|
194
|
+
|
|
195
|
+
def find_by_ids(self, cluster_ids: List[str]) -> List[dict]:
|
|
196
|
+
return [self._data[cid] for cid in cluster_ids if cid in self._data]
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
class InMemoryLinkStore(LinkStore):
|
|
200
|
+
def __init__(self):
|
|
201
|
+
self._data: Dict[str, dict] = {}
|
|
202
|
+
|
|
203
|
+
def save_batch(self, links: List[dict]) -> int:
|
|
204
|
+
for link in links:
|
|
205
|
+
decision_id = link.get("decision_id", "")
|
|
206
|
+
self._data[decision_id] = dict(link)
|
|
207
|
+
return len(links)
|
|
208
|
+
|
|
209
|
+
def find_by_decision_id(self, decision_id: str) -> Optional[dict]:
|
|
210
|
+
return self._data.get(decision_id)
|
|
211
|
+
|
|
212
|
+
def find_by_cluster_id(self, cluster_id: str) -> List[dict]:
|
|
213
|
+
return [d for d in self._data.values() if d.get("cluster_id") == cluster_id]
|
|
214
|
+
|
|
215
|
+
def find_by_decision_ids(self, decision_ids: List[str]) -> Dict[str, dict]:
|
|
216
|
+
return {did: self._data[did] for did in decision_ids if did in self._data}
|
|
217
|
+
|
|
218
|
+
def find_cluster_ids_by_gids(self, gids: List[str]) -> List[str]:
|
|
219
|
+
gid_set = set(gids)
|
|
220
|
+
cluster_ids = set()
|
|
221
|
+
for link in self._data.values():
|
|
222
|
+
if link.get("gid") in gid_set and link.get("cluster_id"):
|
|
223
|
+
cluster_ids.add(link["cluster_id"])
|
|
224
|
+
return list(cluster_ids)
|
|
225
|
+
|
|
226
|
+
def find_cluster_id_for_decision(self, decision_id: str) -> Optional[str]:
|
|
227
|
+
link = self._data.get(decision_id)
|
|
228
|
+
return link.get("cluster_id") if link else None
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
_STOP_WORDS = frozenset(
|
|
232
|
+
"a an the is are was were be been being have has had do does did will would "
|
|
233
|
+
"shall should may might can could i you he she it we they me him her us them "
|
|
234
|
+
"my your his its our their this that these those in on at to for with by from "
|
|
235
|
+
"of and or not no but if so as up out about into over after".split()
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def _tokenize(text: str) -> List[str]:
|
|
240
|
+
return [w for w in re.findall(r"[a-z0-9]+", text.lower()) if w not in _STOP_WORDS and len(w) > 1]
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def _tfidf_vector(tokens: List[str], idf: Dict[str, float]) -> Dict[str, float]:
|
|
244
|
+
tf = Counter(tokens)
|
|
245
|
+
total = len(tokens) or 1
|
|
246
|
+
return {t: (c / total) * idf.get(t, 1.0) for t, c in tf.items()}
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _cosine(a: Dict[str, float], b: Dict[str, float]) -> float:
|
|
250
|
+
keys = set(a) & set(b)
|
|
251
|
+
if not keys:
|
|
252
|
+
return 0.0
|
|
253
|
+
dot = sum(a[k] * b[k] for k in keys)
|
|
254
|
+
mag_a = math.sqrt(sum(v * v for v in a.values()))
|
|
255
|
+
mag_b = math.sqrt(sum(v * v for v in b.values()))
|
|
256
|
+
if mag_a == 0 or mag_b == 0:
|
|
257
|
+
return 0.0
|
|
258
|
+
return dot / (mag_a * mag_b)
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
class InMemoryVectorIndex:
|
|
262
|
+
"""TF-IDF cosine similarity vector index. No external dependencies."""
|
|
263
|
+
|
|
264
|
+
def __init__(self):
|
|
265
|
+
self._docs: List[Dict[str, Any]] = []
|
|
266
|
+
self._idf: Dict[str, float] = {}
|
|
267
|
+
|
|
268
|
+
def add(self, *, pid: str, text: str, gid: str, cid: str):
|
|
269
|
+
tokens = _tokenize(text)
|
|
270
|
+
self._docs.append({"pid": pid, "tokens": tokens, "gid": gid, "cid": cid})
|
|
271
|
+
self._rebuild_idf()
|
|
272
|
+
|
|
273
|
+
def _rebuild_idf(self):
|
|
274
|
+
n = len(self._docs)
|
|
275
|
+
df: Dict[str, int] = {}
|
|
276
|
+
for doc in self._docs:
|
|
277
|
+
for t in set(doc["tokens"]):
|
|
278
|
+
df[t] = df.get(t, 0) + 1
|
|
279
|
+
self._idf = {t: math.log((n + 1) / (c + 1)) + 1 for t, c in df.items()}
|
|
280
|
+
|
|
281
|
+
def get_top_n_matches(self, *, query: str, query_filter: Optional[dict], top_n: int) -> pd.DataFrame:
|
|
282
|
+
if not self._docs:
|
|
283
|
+
return pd.DataFrame()
|
|
284
|
+
|
|
285
|
+
query_tokens = _tokenize(query)
|
|
286
|
+
query_vec = _tfidf_vector(query_tokens, self._idf)
|
|
287
|
+
|
|
288
|
+
allowed_gids = None
|
|
289
|
+
if query_filter and "$in" in (query_filter.get("gid") or {}):
|
|
290
|
+
allowed_gids = set(query_filter["gid"]["$in"])
|
|
291
|
+
|
|
292
|
+
results = []
|
|
293
|
+
for doc in self._docs:
|
|
294
|
+
if allowed_gids and doc["gid"] not in allowed_gids:
|
|
295
|
+
continue
|
|
296
|
+
doc_vec = _tfidf_vector(doc["tokens"], self._idf)
|
|
297
|
+
score = _cosine(query_vec, doc_vec)
|
|
298
|
+
results.append({"pid": doc["pid"], "score": score, "gid": doc["gid"], "cid": doc["cid"]})
|
|
299
|
+
|
|
300
|
+
results.sort(key=lambda x: x["score"], reverse=True)
|
|
301
|
+
return pd.DataFrame(results[:top_n]) if results else pd.DataFrame()
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
class InMemoryGraphStore:
|
|
305
|
+
"""In-memory graph store that materializes hydrated clusters into graph arrays."""
|
|
306
|
+
|
|
307
|
+
def __init__(self):
|
|
308
|
+
self._hydrated_clusters: List[Dict[str, Any]] = []
|
|
309
|
+
self._graph_arrays: Dict[str, list] = {}
|
|
310
|
+
|
|
311
|
+
def ingest(self, hydrated_clusters: List[Dict[str, Any]]) -> None:
|
|
312
|
+
self._hydrated_clusters.extend(hydrated_clusters)
|
|
313
|
+
arrays = breakdown_hydrated_clusters(hydrated_clusters)
|
|
314
|
+
for key, items in arrays.items():
|
|
315
|
+
self._graph_arrays.setdefault(key, []).extend(items)
|
|
316
|
+
|
|
317
|
+
@property
|
|
318
|
+
def hydrated_clusters(self) -> List[Dict[str, Any]]:
|
|
319
|
+
return self._hydrated_clusters
|
|
320
|
+
|
|
321
|
+
@property
|
|
322
|
+
def graph_arrays(self) -> Dict[str, list]:
|
|
323
|
+
return self._graph_arrays
|