minder-cli 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- minder/__init__.py +12 -0
- minder/api/routers/prompts.py +177 -0
- minder/application/__init__.py +1 -0
- minder/application/admin/__init__.py +11 -0
- minder/application/admin/dto.py +453 -0
- minder/application/admin/jobs.py +327 -0
- minder/application/admin/use_cases.py +1895 -0
- minder/auth/__init__.py +12 -0
- minder/auth/context.py +26 -0
- minder/auth/middleware.py +70 -0
- minder/auth/principal.py +59 -0
- minder/auth/rate_limiter.py +89 -0
- minder/auth/rbac.py +60 -0
- minder/auth/service.py +541 -0
- minder/bootstrap/__init__.py +9 -0
- minder/bootstrap/providers.py +109 -0
- minder/bootstrap/transport.py +807 -0
- minder/cache/__init__.py +10 -0
- minder/cache/providers.py +140 -0
- minder/chunking/__init__.py +4 -0
- minder/chunking/code_splitter.py +184 -0
- minder/chunking/splitter.py +136 -0
- minder/cli.py +1542 -0
- minder/config.py +179 -0
- minder/continuity.py +363 -0
- minder/dev.py +160 -0
- minder/embedding/__init__.py +9 -0
- minder/embedding/base.py +7 -0
- minder/embedding/local.py +65 -0
- minder/embedding/openai.py +7 -0
- minder/graph/__init__.py +11 -0
- minder/graph/edges.py +13 -0
- minder/graph/executor.py +127 -0
- minder/graph/graph.py +263 -0
- minder/graph/nodes/__init__.py +27 -0
- minder/graph/nodes/evaluator.py +21 -0
- minder/graph/nodes/guard.py +64 -0
- minder/graph/nodes/llm.py +59 -0
- minder/graph/nodes/planning.py +30 -0
- minder/graph/nodes/reasoning.py +87 -0
- minder/graph/nodes/reranker.py +141 -0
- minder/graph/nodes/retriever.py +86 -0
- minder/graph/nodes/verification.py +230 -0
- minder/graph/nodes/workflow_planner.py +250 -0
- minder/graph/runtime.py +15 -0
- minder/graph/state.py +26 -0
- minder/llm/__init__.py +5 -0
- minder/llm/base.py +14 -0
- minder/llm/local.py +381 -0
- minder/llm/openai.py +89 -0
- minder/models/__init__.py +109 -0
- minder/models/base.py +10 -0
- minder/models/client.py +137 -0
- minder/models/document.py +34 -0
- minder/models/error.py +32 -0
- minder/models/graph.py +114 -0
- minder/models/history.py +32 -0
- minder/models/job.py +62 -0
- minder/models/prompt.py +41 -0
- minder/models/repository.py +62 -0
- minder/models/rule.py +68 -0
- minder/models/session.py +51 -0
- minder/models/skill.py +52 -0
- minder/models/user.py +41 -0
- minder/models/workflow.py +35 -0
- minder/observability/__init__.py +57 -0
- minder/observability/audit.py +243 -0
- minder/observability/logging.py +253 -0
- minder/observability/metrics.py +448 -0
- minder/observability/tracing.py +215 -0
- minder/presentation/__init__.py +1 -0
- minder/presentation/http/__init__.py +1 -0
- minder/presentation/http/admin/__init__.py +3 -0
- minder/presentation/http/admin/api.py +1309 -0
- minder/presentation/http/admin/context.py +94 -0
- minder/presentation/http/admin/dashboard.py +111 -0
- minder/presentation/http/admin/jobs.py +208 -0
- minder/presentation/http/admin/memories.py +185 -0
- minder/presentation/http/admin/prompts.py +219 -0
- minder/presentation/http/admin/routes.py +127 -0
- minder/presentation/http/admin/runtime.py +650 -0
- minder/presentation/http/admin/search.py +368 -0
- minder/presentation/http/admin/skills.py +230 -0
- minder/prompts/__init__.py +646 -0
- minder/prompts/formatter.py +142 -0
- minder/resources/__init__.py +318 -0
- minder/retrieval/__init__.py +5 -0
- minder/retrieval/hybrid.py +178 -0
- minder/retrieval/mmr.py +116 -0
- minder/retrieval/multi_hop.py +115 -0
- minder/runtime.py +15 -0
- minder/server.py +145 -0
- minder/store/__init__.py +64 -0
- minder/store/document.py +115 -0
- minder/store/error.py +82 -0
- minder/store/feedback.py +114 -0
- minder/store/graph.py +588 -0
- minder/store/history.py +57 -0
- minder/store/interfaces.py +512 -0
- minder/store/milvus/__init__.py +11 -0
- minder/store/milvus/client.py +26 -0
- minder/store/milvus/collections.py +15 -0
- minder/store/milvus/vector_store.py +232 -0
- minder/store/mongodb/__init__.py +11 -0
- minder/store/mongodb/client.py +49 -0
- minder/store/mongodb/indexes.py +90 -0
- minder/store/mongodb/operational_store.py +993 -0
- minder/store/relational.py +1087 -0
- minder/store/repo_state.py +58 -0
- minder/store/rule.py +93 -0
- minder/store/vector.py +79 -0
- minder/tools/__init__.py +47 -0
- minder/tools/auth.py +94 -0
- minder/tools/graph.py +839 -0
- minder/tools/ingest.py +353 -0
- minder/tools/memory.py +381 -0
- minder/tools/query.py +307 -0
- minder/tools/registry.py +269 -0
- minder/tools/repo_scanner.py +1266 -0
- minder/tools/search.py +15 -0
- minder/tools/session.py +316 -0
- minder/tools/skills.py +899 -0
- minder/tools/workflow.py +215 -0
- minder/transport/__init__.py +4 -0
- minder/transport/base.py +286 -0
- minder/transport/sse.py +252 -0
- minder/transport/stdio.py +29 -0
- minder_cli-0.2.0.dist-info/METADATA +318 -0
- minder_cli-0.2.0.dist-info/RECORD +132 -0
- minder_cli-0.2.0.dist-info/WHEEL +4 -0
- minder_cli-0.2.0.dist-info/entry_points.txt +2 -0
- minder_cli-0.2.0.dist-info/licenses/LICENSE +201 -0
minder/tools/memory.py
ADDED
|
@@ -0,0 +1,381 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import math
|
|
4
|
+
import uuid
|
|
5
|
+
from typing import Any, TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
from minder.continuity import compatibility_score_for_memory
|
|
8
|
+
from minder.config import MinderConfig
|
|
9
|
+
from minder.embedding.local import LocalEmbeddingProvider
|
|
10
|
+
from minder.observability.metrics import record_continuity_recall
|
|
11
|
+
from minder.store.interfaces import IOperationalStore
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from minder.continuity import ContinuitySynthesizer
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class MemoryTools:
|
|
18
|
+
def __init__(self, store: IOperationalStore, config: MinderConfig) -> None:
|
|
19
|
+
self._store = store
|
|
20
|
+
self._config = config
|
|
21
|
+
self._embedder = LocalEmbeddingProvider(
|
|
22
|
+
config.embedding.model_path,
|
|
23
|
+
dimensions=min(config.embedding.dimensions, 16),
|
|
24
|
+
runtime="auto",
|
|
25
|
+
)
|
|
26
|
+
self._synthesizer: ContinuitySynthesizer | None = None
|
|
27
|
+
|
|
28
|
+
def _get_synthesizer(self) -> "ContinuitySynthesizer":
|
|
29
|
+
if self._synthesizer is None:
|
|
30
|
+
from minder.continuity import ContinuitySynthesizer
|
|
31
|
+
|
|
32
|
+
self._synthesizer = ContinuitySynthesizer(self._config)
|
|
33
|
+
return self._synthesizer
|
|
34
|
+
|
|
35
|
+
async def minder_memory_store(
|
|
36
|
+
self,
|
|
37
|
+
*,
|
|
38
|
+
title: str,
|
|
39
|
+
content: str,
|
|
40
|
+
tags: list[str],
|
|
41
|
+
language: str,
|
|
42
|
+
) -> dict[str, Any]:
|
|
43
|
+
skill = await self._store.create_skill(
|
|
44
|
+
id=uuid.uuid4(),
|
|
45
|
+
title=title,
|
|
46
|
+
content=content,
|
|
47
|
+
language=language,
|
|
48
|
+
tags=tags,
|
|
49
|
+
embedding=self._embedder.embed(f"{title}\n{content}"),
|
|
50
|
+
usage_count=0,
|
|
51
|
+
quality_score=0.0,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# Record persistent audit event
|
|
55
|
+
try:
|
|
56
|
+
await self._store.create_audit_log(
|
|
57
|
+
actor_type="system",
|
|
58
|
+
actor_id="minder",
|
|
59
|
+
event_type="skill.created",
|
|
60
|
+
resource_type="skill",
|
|
61
|
+
resource_id=str(skill.id),
|
|
62
|
+
outcome="success",
|
|
63
|
+
audit_metadata={"title": title},
|
|
64
|
+
)
|
|
65
|
+
except Exception:
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
return {"id": str(skill.id), "title": skill.title, "tags": list(skill.tags)}
|
|
69
|
+
|
|
70
|
+
async def minder_memory_recall(
|
|
71
|
+
self,
|
|
72
|
+
query: str,
|
|
73
|
+
*,
|
|
74
|
+
limit: int = 5,
|
|
75
|
+
current_step: str | None = None,
|
|
76
|
+
artifact_type: str | None = None,
|
|
77
|
+
) -> list[dict[str, Any]]:
|
|
78
|
+
query_embedding = self._embedder.embed(query)
|
|
79
|
+
skills = await self._store.list_skills()
|
|
80
|
+
ranked: list[dict[str, Any]] = []
|
|
81
|
+
for skill in skills:
|
|
82
|
+
embedding = skill.embedding if isinstance(skill.embedding, list) else None
|
|
83
|
+
if not embedding:
|
|
84
|
+
continue
|
|
85
|
+
semantic_score = self._cosine_similarity(query_embedding, embedding)
|
|
86
|
+
compatibility_score, compatibility_reasons = compatibility_score_for_memory(
|
|
87
|
+
tags=list(skill.tags) if isinstance(skill.tags, list) else [],
|
|
88
|
+
title=str(skill.title),
|
|
89
|
+
content=str(skill.content),
|
|
90
|
+
current_step=current_step,
|
|
91
|
+
artifact_type=artifact_type,
|
|
92
|
+
)
|
|
93
|
+
score = min((semantic_score * 0.8) + (compatibility_score * 0.2), 1.0)
|
|
94
|
+
ranked.append(
|
|
95
|
+
{
|
|
96
|
+
"id": str(skill.id),
|
|
97
|
+
"title": skill.title,
|
|
98
|
+
"content": skill.content,
|
|
99
|
+
"tags": list(skill.tags) if isinstance(skill.tags, list) else [],
|
|
100
|
+
"semantic_score": round(semantic_score, 4),
|
|
101
|
+
"step_compatibility": round(compatibility_score, 4),
|
|
102
|
+
"continuity_reasons": compatibility_reasons,
|
|
103
|
+
"score": round(score, 4),
|
|
104
|
+
}
|
|
105
|
+
)
|
|
106
|
+
ranked.sort(key=lambda item: float(item["score"]), reverse=True)
|
|
107
|
+
limited = ranked[:limit]
|
|
108
|
+
synthesis, synthesis_meta = self._get_synthesizer().synthesize_memory_hits(
|
|
109
|
+
query=query,
|
|
110
|
+
hits=limited,
|
|
111
|
+
current_step=current_step,
|
|
112
|
+
artifact_type=artifact_type,
|
|
113
|
+
)
|
|
114
|
+
for item in limited:
|
|
115
|
+
item["recall_summary"] = synthesis["summary"]
|
|
116
|
+
item["hit_summary"] = synthesis["hit_summaries"].get(str(item["id"]), "")
|
|
117
|
+
item["synthesis"] = synthesis_meta
|
|
118
|
+
record_continuity_recall(
|
|
119
|
+
provider=str(synthesis_meta.get("provider", "unknown")),
|
|
120
|
+
step_compatibility=float(item["step_compatibility"]),
|
|
121
|
+
)
|
|
122
|
+
return limited
|
|
123
|
+
|
|
124
|
+
async def minder_memory_list(self) -> list[dict[str, Any]]:
|
|
125
|
+
skills = await self._store.list_skills()
|
|
126
|
+
return [
|
|
127
|
+
{
|
|
128
|
+
"id": str(skill.id),
|
|
129
|
+
"title": skill.title,
|
|
130
|
+
"language": skill.language,
|
|
131
|
+
"tags": list(skill.tags) if isinstance(skill.tags, list) else [],
|
|
132
|
+
}
|
|
133
|
+
for skill in skills
|
|
134
|
+
]
|
|
135
|
+
|
|
136
|
+
async def minder_memory_delete(self, skill_id: str) -> dict[str, bool]:
|
|
137
|
+
await self._store.delete_skill(uuid.UUID(skill_id))
|
|
138
|
+
|
|
139
|
+
# Record persistent audit event
|
|
140
|
+
try:
|
|
141
|
+
await self._store.create_audit_log(
|
|
142
|
+
actor_type="system",
|
|
143
|
+
actor_id="minder",
|
|
144
|
+
event_type="skill.deleted",
|
|
145
|
+
resource_type="skill",
|
|
146
|
+
resource_id=skill_id,
|
|
147
|
+
outcome="success",
|
|
148
|
+
)
|
|
149
|
+
except Exception:
|
|
150
|
+
pass
|
|
151
|
+
|
|
152
|
+
return {"deleted": True}
|
|
153
|
+
|
|
154
|
+
async def minder_memory_compact(
|
|
155
|
+
self,
|
|
156
|
+
*,
|
|
157
|
+
memory_ids: list[str],
|
|
158
|
+
similarity_threshold: float = 0.92,
|
|
159
|
+
dry_run: bool = True,
|
|
160
|
+
) -> dict[str, Any]:
|
|
161
|
+
normalized_ids = self._normalize_memory_ids(memory_ids)
|
|
162
|
+
if len(normalized_ids) < 2:
|
|
163
|
+
raise ValueError("At least two memory_ids are required for compaction")
|
|
164
|
+
|
|
165
|
+
records = []
|
|
166
|
+
for memory_id in normalized_ids:
|
|
167
|
+
skill = await self._store.get_skill_by_id(uuid.UUID(memory_id))
|
|
168
|
+
if skill is None:
|
|
169
|
+
raise ValueError(f"Memory not found: {memory_id}")
|
|
170
|
+
embedding = self._embedder.embed(
|
|
171
|
+
self._compaction_text(
|
|
172
|
+
title=str(skill.title),
|
|
173
|
+
content=str(skill.content),
|
|
174
|
+
)
|
|
175
|
+
)
|
|
176
|
+
records.append(
|
|
177
|
+
{
|
|
178
|
+
"id": str(skill.id),
|
|
179
|
+
"title": str(skill.title),
|
|
180
|
+
"content": str(skill.content),
|
|
181
|
+
"language": str(getattr(skill, "language", "") or "markdown"),
|
|
182
|
+
"tags": list(getattr(skill, "tags", []) or []),
|
|
183
|
+
"embedding": embedding,
|
|
184
|
+
"usage_count": int(getattr(skill, "usage_count", 0) or 0),
|
|
185
|
+
"quality_score": float(getattr(skill, "quality_score", 0.0) or 0.0),
|
|
186
|
+
"created_at": getattr(skill, "created_at", None),
|
|
187
|
+
"updated_at": getattr(skill, "updated_at", None),
|
|
188
|
+
}
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
groups = self._duplicate_groups(records, similarity_threshold)
|
|
192
|
+
plans = [
|
|
193
|
+
self._build_compaction_plan(group) for group in groups if len(group) > 1
|
|
194
|
+
]
|
|
195
|
+
result: dict[str, Any] = {
|
|
196
|
+
"dry_run": dry_run,
|
|
197
|
+
"candidate_count": len(records),
|
|
198
|
+
"duplicate_group_count": len(plans),
|
|
199
|
+
"plans": plans,
|
|
200
|
+
}
|
|
201
|
+
if dry_run or not plans:
|
|
202
|
+
result["compacted_count"] = 0
|
|
203
|
+
result["deleted_count"] = 0
|
|
204
|
+
return result
|
|
205
|
+
|
|
206
|
+
compacted: list[dict[str, Any]] = []
|
|
207
|
+
deleted_count = 0
|
|
208
|
+
for plan in plans:
|
|
209
|
+
primary_id = str(plan["primary_id"])
|
|
210
|
+
primary = next(item for item in plan["members"] if item["id"] == primary_id)
|
|
211
|
+
merged_tags = sorted(
|
|
212
|
+
{
|
|
213
|
+
str(tag).strip().lower()
|
|
214
|
+
for member in plan["members"]
|
|
215
|
+
for tag in list(member.get("tags", []) or [])
|
|
216
|
+
if str(tag).strip()
|
|
217
|
+
}
|
|
218
|
+
)
|
|
219
|
+
merged_content = max(
|
|
220
|
+
[str(member.get("content", "") or "") for member in plan["members"]],
|
|
221
|
+
key=len,
|
|
222
|
+
)
|
|
223
|
+
merged_quality = max(
|
|
224
|
+
float(member.get("quality_score", 0.0) or 0.0)
|
|
225
|
+
for member in plan["members"]
|
|
226
|
+
)
|
|
227
|
+
merged_usage = sum(
|
|
228
|
+
int(member.get("usage_count", 0) or 0) for member in plan["members"]
|
|
229
|
+
)
|
|
230
|
+
updated = await self._store.update_skill(
|
|
231
|
+
uuid.UUID(primary_id),
|
|
232
|
+
content=merged_content,
|
|
233
|
+
tags=merged_tags,
|
|
234
|
+
usage_count=merged_usage,
|
|
235
|
+
quality_score=merged_quality,
|
|
236
|
+
embedding=self._embedder.embed(f"{primary['title']}\n{merged_content}"),
|
|
237
|
+
)
|
|
238
|
+
if updated is None:
|
|
239
|
+
raise ValueError(f"Memory not found during compaction: {primary_id}")
|
|
240
|
+
|
|
241
|
+
duplicate_ids = [
|
|
242
|
+
str(member["id"])
|
|
243
|
+
for member in plan["members"]
|
|
244
|
+
if str(member["id"]) != primary_id
|
|
245
|
+
]
|
|
246
|
+
for duplicate_id in duplicate_ids:
|
|
247
|
+
await self._store.delete_skill(uuid.UUID(duplicate_id))
|
|
248
|
+
deleted_count += 1
|
|
249
|
+
|
|
250
|
+
try:
|
|
251
|
+
await self._store.create_audit_log(
|
|
252
|
+
actor_type="system",
|
|
253
|
+
actor_id="minder",
|
|
254
|
+
event_type="skill.compacted",
|
|
255
|
+
resource_type="skill",
|
|
256
|
+
resource_id=primary_id,
|
|
257
|
+
outcome="success",
|
|
258
|
+
audit_metadata={
|
|
259
|
+
"merged_ids": duplicate_ids,
|
|
260
|
+
"similarity_threshold": similarity_threshold,
|
|
261
|
+
},
|
|
262
|
+
)
|
|
263
|
+
except Exception:
|
|
264
|
+
pass
|
|
265
|
+
|
|
266
|
+
compacted.append(
|
|
267
|
+
{
|
|
268
|
+
"primary_id": primary_id,
|
|
269
|
+
"merged_ids": duplicate_ids,
|
|
270
|
+
"merged_tags": merged_tags,
|
|
271
|
+
"usage_count": merged_usage,
|
|
272
|
+
"quality_score": round(merged_quality, 4),
|
|
273
|
+
}
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
result["compacted_count"] = len(compacted)
|
|
277
|
+
result["deleted_count"] = deleted_count
|
|
278
|
+
result["compacted"] = compacted
|
|
279
|
+
return result
|
|
280
|
+
|
|
281
|
+
@staticmethod
|
|
282
|
+
def _compaction_text(*, title: str, content: str) -> str:
|
|
283
|
+
normalized_content = str(content or "").strip()
|
|
284
|
+
if normalized_content:
|
|
285
|
+
return normalized_content
|
|
286
|
+
return str(title or "").strip()
|
|
287
|
+
|
|
288
|
+
@staticmethod
|
|
289
|
+
def _normalize_memory_ids(memory_ids: list[str]) -> list[str]:
|
|
290
|
+
normalized: list[str] = []
|
|
291
|
+
seen: set[str] = set()
|
|
292
|
+
for raw_id in memory_ids:
|
|
293
|
+
value = str(raw_id or "").strip()
|
|
294
|
+
if not value or value in seen:
|
|
295
|
+
continue
|
|
296
|
+
uuid.UUID(value)
|
|
297
|
+
seen.add(value)
|
|
298
|
+
normalized.append(value)
|
|
299
|
+
return normalized
|
|
300
|
+
|
|
301
|
+
@staticmethod
|
|
302
|
+
def _duplicate_groups(
|
|
303
|
+
records: list[dict[str, Any]], similarity_threshold: float
|
|
304
|
+
) -> list[list[dict[str, Any]]]:
|
|
305
|
+
adjacency: dict[str, set[str]] = {
|
|
306
|
+
str(record["id"]): set() for record in records
|
|
307
|
+
}
|
|
308
|
+
record_map = {str(record["id"]): record for record in records}
|
|
309
|
+
for index, left in enumerate(records):
|
|
310
|
+
left_embedding = list(left.get("embedding") or [])
|
|
311
|
+
for right in records[index + 1 :]:
|
|
312
|
+
right_embedding = list(right.get("embedding") or [])
|
|
313
|
+
similarity = MemoryTools._cosine_similarity(
|
|
314
|
+
left_embedding, right_embedding
|
|
315
|
+
)
|
|
316
|
+
if similarity < similarity_threshold:
|
|
317
|
+
continue
|
|
318
|
+
left_id = str(left["id"])
|
|
319
|
+
right_id = str(right["id"])
|
|
320
|
+
adjacency[left_id].add(right_id)
|
|
321
|
+
adjacency[right_id].add(left_id)
|
|
322
|
+
|
|
323
|
+
groups: list[list[dict[str, Any]]] = []
|
|
324
|
+
visited: set[str] = set()
|
|
325
|
+
for record in records:
|
|
326
|
+
record_id = str(record["id"])
|
|
327
|
+
if record_id in visited:
|
|
328
|
+
continue
|
|
329
|
+
stack = [record_id]
|
|
330
|
+
component: list[dict[str, Any]] = []
|
|
331
|
+
while stack:
|
|
332
|
+
current = stack.pop()
|
|
333
|
+
if current in visited:
|
|
334
|
+
continue
|
|
335
|
+
visited.add(current)
|
|
336
|
+
component.append(record_map[current])
|
|
337
|
+
stack.extend(sorted(adjacency[current] - visited))
|
|
338
|
+
groups.append(component)
|
|
339
|
+
return groups
|
|
340
|
+
|
|
341
|
+
def _build_compaction_plan(self, members: list[dict[str, Any]]) -> dict[str, Any]:
|
|
342
|
+
primary = max(members, key=self._primary_sort_key)
|
|
343
|
+
duplicate_ids = [
|
|
344
|
+
str(member["id"])
|
|
345
|
+
for member in members
|
|
346
|
+
if str(member["id"]) != str(primary["id"])
|
|
347
|
+
]
|
|
348
|
+
return {
|
|
349
|
+
"primary_id": str(primary["id"]),
|
|
350
|
+
"primary_title": str(primary["title"]),
|
|
351
|
+
"duplicate_ids": duplicate_ids,
|
|
352
|
+
"duplicate_titles": [
|
|
353
|
+
str(member["title"])
|
|
354
|
+
for member in members
|
|
355
|
+
if str(member["id"]) != str(primary["id"])
|
|
356
|
+
],
|
|
357
|
+
"members": members,
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
@staticmethod
|
|
361
|
+
def _primary_sort_key(member: dict[str, Any]) -> tuple[float, int, str, str, int]:
|
|
362
|
+
updated_at = member.get("updated_at")
|
|
363
|
+
created_at = member.get("created_at")
|
|
364
|
+
return (
|
|
365
|
+
float(member.get("quality_score", 0.0) or 0.0),
|
|
366
|
+
int(member.get("usage_count", 0) or 0),
|
|
367
|
+
str(updated_at or ""),
|
|
368
|
+
str(created_at or ""),
|
|
369
|
+
len(str(member.get("content", "") or "")),
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
@staticmethod
|
|
373
|
+
def _cosine_similarity(left: list[float], right: list[float]) -> float:
|
|
374
|
+
if not left or not right or len(left) != len(right):
|
|
375
|
+
return 0.0
|
|
376
|
+
numerator = sum(a * b for a, b in zip(left, right, strict=False))
|
|
377
|
+
left_norm = math.sqrt(sum(value * value for value in left))
|
|
378
|
+
right_norm = math.sqrt(sum(value * value for value in right))
|
|
379
|
+
if left_norm == 0 or right_norm == 0:
|
|
380
|
+
return 0.0
|
|
381
|
+
return numerator / (left_norm * right_norm)
|
minder/tools/query.py
ADDED
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import AsyncGenerator
|
|
4
|
+
import uuid
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from minder.config import MinderConfig
|
|
9
|
+
from minder.embedding.local import LocalEmbeddingProvider
|
|
10
|
+
from minder.graph import GraphState, MinderGraph
|
|
11
|
+
from minder.graph.nodes.retriever import RetrieverNode
|
|
12
|
+
from minder.observability.metrics import (
|
|
13
|
+
record_continuity_packet,
|
|
14
|
+
record_query_prompt_render,
|
|
15
|
+
)
|
|
16
|
+
from minder.prompts import PromptRegistry
|
|
17
|
+
from minder.store.interfaces import IOperationalStore, IVectorStore
|
|
18
|
+
from minder.tools.graph import GraphTools
|
|
19
|
+
from minder.tools.ingest import IngestTools
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class QueryTools:
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
store: IOperationalStore,
|
|
26
|
+
config: MinderConfig,
|
|
27
|
+
graph: MinderGraph | None = None,
|
|
28
|
+
vector_store: IVectorStore | None = None,
|
|
29
|
+
graph_tools: GraphTools | None = None,
|
|
30
|
+
) -> None:
|
|
31
|
+
from minder.store.vector import VectorStore
|
|
32
|
+
|
|
33
|
+
self._store = store
|
|
34
|
+
self._config = config
|
|
35
|
+
self._graph = graph or MinderGraph(store, config)
|
|
36
|
+
self._vector_store = vector_store or VectorStore(store, store)
|
|
37
|
+
self._embedding_provider = LocalEmbeddingProvider(
|
|
38
|
+
config.embedding.model_path,
|
|
39
|
+
dimensions=config.embedding.dimensions,
|
|
40
|
+
runtime="auto",
|
|
41
|
+
)
|
|
42
|
+
self._ingest_tools = IngestTools(
|
|
43
|
+
self._store,
|
|
44
|
+
self._embedding_provider,
|
|
45
|
+
vector_store=self._vector_store,
|
|
46
|
+
)
|
|
47
|
+
self._graph_tools = graph_tools
|
|
48
|
+
|
|
49
|
+
async def minder_query(
|
|
50
|
+
self,
|
|
51
|
+
query: str,
|
|
52
|
+
*,
|
|
53
|
+
repo_path: str | None,
|
|
54
|
+
session_id: uuid.UUID | None = None,
|
|
55
|
+
user_id: uuid.UUID | None = None,
|
|
56
|
+
repo_id: uuid.UUID | None = None,
|
|
57
|
+
workflow_name: str | None = None,
|
|
58
|
+
verification_payload: dict[str, Any] | None = None,
|
|
59
|
+
max_attempts: int = 2,
|
|
60
|
+
allowed_repo_scopes: list[str] | None = None,
|
|
61
|
+
) -> dict[str, Any]:
|
|
62
|
+
project_name = Path(repo_path).name if repo_path else None
|
|
63
|
+
if repo_path:
|
|
64
|
+
await self._ingest_tools.minder_ingest_directory(
|
|
65
|
+
repo_path, project=project_name
|
|
66
|
+
)
|
|
67
|
+
workflow_context: dict[str, Any] = (
|
|
68
|
+
{"workflow_name": workflow_name} if workflow_name else {}
|
|
69
|
+
)
|
|
70
|
+
if self._graph_tools is not None and repo_path:
|
|
71
|
+
cross_repo_context, cross_repo_graph = (
|
|
72
|
+
await self._graph_tools.build_cross_repo_context(
|
|
73
|
+
query,
|
|
74
|
+
repo_path=repo_path,
|
|
75
|
+
repo_id=str(repo_id) if repo_id is not None else None,
|
|
76
|
+
repo_name=Path(repo_path).name,
|
|
77
|
+
allowed_repo_scopes=allowed_repo_scopes,
|
|
78
|
+
)
|
|
79
|
+
)
|
|
80
|
+
if cross_repo_context:
|
|
81
|
+
workflow_context["cross_repo_context"] = cross_repo_context
|
|
82
|
+
if cross_repo_graph is not None:
|
|
83
|
+
workflow_context["cross_repo_graph"] = cross_repo_graph
|
|
84
|
+
query_prompt = await PromptRegistry.resolve_prompt_model(
|
|
85
|
+
"query_reasoning",
|
|
86
|
+
self._store,
|
|
87
|
+
)
|
|
88
|
+
state = GraphState(
|
|
89
|
+
query=query,
|
|
90
|
+
session_id=session_id,
|
|
91
|
+
user_id=user_id,
|
|
92
|
+
repo_id=repo_id,
|
|
93
|
+
repo_path=repo_path,
|
|
94
|
+
workflow_context=workflow_context,
|
|
95
|
+
metadata={
|
|
96
|
+
"verification_payload": verification_payload,
|
|
97
|
+
"max_attempts": max_attempts,
|
|
98
|
+
"project_name": project_name,
|
|
99
|
+
"query_prompt_name": getattr(query_prompt, "name", "query_reasoning"),
|
|
100
|
+
"query_prompt_template": getattr(query_prompt, "content_template", ""),
|
|
101
|
+
"query_prompt_defaults": dict(
|
|
102
|
+
getattr(query_prompt, "defaults", {}) or {}
|
|
103
|
+
),
|
|
104
|
+
"query_prompt_source": (
|
|
105
|
+
"builtin"
|
|
106
|
+
if bool(getattr(query_prompt, "is_builtin", False))
|
|
107
|
+
else "custom"
|
|
108
|
+
),
|
|
109
|
+
},
|
|
110
|
+
)
|
|
111
|
+
result = await self._graph.run(state)
|
|
112
|
+
record_continuity_packet("query")
|
|
113
|
+
record_query_prompt_render(
|
|
114
|
+
str(
|
|
115
|
+
result.metadata.get(
|
|
116
|
+
"query_prompt_source",
|
|
117
|
+
state.metadata.get("query_prompt_source", "unknown"),
|
|
118
|
+
)
|
|
119
|
+
),
|
|
120
|
+
correction_retries=sum(
|
|
121
|
+
1
|
|
122
|
+
for item in result.transition_log
|
|
123
|
+
if str(item.get("edge")) == "guard_failed"
|
|
124
|
+
),
|
|
125
|
+
)
|
|
126
|
+
return {
|
|
127
|
+
"answer": result.llm_output.get("text", ""),
|
|
128
|
+
"sources": result.reasoning_output.get("sources", []),
|
|
129
|
+
"workflow": result.workflow_context,
|
|
130
|
+
"guard_result": result.guard_result,
|
|
131
|
+
"verification_result": result.verification_result,
|
|
132
|
+
"evaluation": result.evaluation,
|
|
133
|
+
"provider": result.llm_output.get("provider"),
|
|
134
|
+
"model": result.llm_output.get(
|
|
135
|
+
"model", result.llm_output.get("model_path")
|
|
136
|
+
),
|
|
137
|
+
"runtime": result.llm_output.get("runtime"),
|
|
138
|
+
"orchestration_runtime": result.metadata.get("orchestration_runtime"),
|
|
139
|
+
"transition_log": result.transition_log,
|
|
140
|
+
"edge": result.metadata.get("edge"),
|
|
141
|
+
"cross_repo_graph": result.workflow_context.get("cross_repo_graph"),
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
async def minder_query_stream(
|
|
145
|
+
self,
|
|
146
|
+
query: str,
|
|
147
|
+
*,
|
|
148
|
+
repo_path: str | None,
|
|
149
|
+
session_id: uuid.UUID | None = None,
|
|
150
|
+
user_id: uuid.UUID | None = None,
|
|
151
|
+
repo_id: uuid.UUID | None = None,
|
|
152
|
+
workflow_name: str | None = None,
|
|
153
|
+
verification_payload: dict[str, Any] | None = None,
|
|
154
|
+
max_attempts: int = 2,
|
|
155
|
+
allowed_repo_scopes: list[str] | None = None,
|
|
156
|
+
) -> AsyncGenerator[dict[str, Any], None]:
|
|
157
|
+
project_name = Path(repo_path).name if repo_path else None
|
|
158
|
+
if repo_path:
|
|
159
|
+
await self._ingest_tools.minder_ingest_directory(
|
|
160
|
+
repo_path, project=project_name
|
|
161
|
+
)
|
|
162
|
+
workflow_context: dict[str, Any] = (
|
|
163
|
+
{"workflow_name": workflow_name} if workflow_name else {}
|
|
164
|
+
)
|
|
165
|
+
if self._graph_tools is not None and repo_path:
|
|
166
|
+
cross_repo_context, cross_repo_graph = (
|
|
167
|
+
await self._graph_tools.build_cross_repo_context(
|
|
168
|
+
query,
|
|
169
|
+
repo_path=repo_path,
|
|
170
|
+
repo_id=str(repo_id) if repo_id is not None else None,
|
|
171
|
+
repo_name=Path(repo_path).name,
|
|
172
|
+
allowed_repo_scopes=allowed_repo_scopes,
|
|
173
|
+
)
|
|
174
|
+
)
|
|
175
|
+
if cross_repo_context:
|
|
176
|
+
workflow_context["cross_repo_context"] = cross_repo_context
|
|
177
|
+
if cross_repo_graph is not None:
|
|
178
|
+
workflow_context["cross_repo_graph"] = cross_repo_graph
|
|
179
|
+
query_prompt = await PromptRegistry.resolve_prompt_model(
|
|
180
|
+
"query_reasoning",
|
|
181
|
+
self._store,
|
|
182
|
+
)
|
|
183
|
+
state = GraphState(
|
|
184
|
+
query=query,
|
|
185
|
+
session_id=session_id,
|
|
186
|
+
user_id=user_id,
|
|
187
|
+
repo_id=repo_id,
|
|
188
|
+
repo_path=repo_path,
|
|
189
|
+
workflow_context=workflow_context,
|
|
190
|
+
metadata={
|
|
191
|
+
"verification_payload": verification_payload,
|
|
192
|
+
"max_attempts": max_attempts,
|
|
193
|
+
"project_name": project_name,
|
|
194
|
+
"query_prompt_name": getattr(query_prompt, "name", "query_reasoning"),
|
|
195
|
+
"query_prompt_template": getattr(query_prompt, "content_template", ""),
|
|
196
|
+
"query_prompt_defaults": dict(
|
|
197
|
+
getattr(query_prompt, "defaults", {}) or {}
|
|
198
|
+
),
|
|
199
|
+
"query_prompt_source": (
|
|
200
|
+
"builtin"
|
|
201
|
+
if bool(getattr(query_prompt, "is_builtin", False))
|
|
202
|
+
else "custom"
|
|
203
|
+
),
|
|
204
|
+
},
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
async for event in self._graph.stream(state):
|
|
208
|
+
if str(event.get("type")) == "final":
|
|
209
|
+
final_state = event.get("state")
|
|
210
|
+
if isinstance(final_state, GraphState):
|
|
211
|
+
result = self._result_from_state(final_state)
|
|
212
|
+
record_continuity_packet("query")
|
|
213
|
+
record_query_prompt_render(
|
|
214
|
+
str(
|
|
215
|
+
final_state.metadata.get(
|
|
216
|
+
"query_prompt_source",
|
|
217
|
+
state.metadata.get("query_prompt_source", "unknown"),
|
|
218
|
+
)
|
|
219
|
+
),
|
|
220
|
+
correction_retries=sum(
|
|
221
|
+
1
|
|
222
|
+
for item in final_state.transition_log
|
|
223
|
+
if str(item.get("edge")) == "guard_failed"
|
|
224
|
+
),
|
|
225
|
+
)
|
|
226
|
+
yield {"type": "final", "payload": result}
|
|
227
|
+
continue
|
|
228
|
+
yield event
|
|
229
|
+
|
|
230
|
+
def _result_from_state(self, result: GraphState) -> dict[str, Any]:
|
|
231
|
+
return {
|
|
232
|
+
"answer": result.llm_output.get("text", ""),
|
|
233
|
+
"sources": result.reasoning_output.get("sources", []),
|
|
234
|
+
"workflow": result.workflow_context,
|
|
235
|
+
"guard_result": result.guard_result,
|
|
236
|
+
"verification_result": result.verification_result,
|
|
237
|
+
"evaluation": result.evaluation,
|
|
238
|
+
"provider": result.llm_output.get("provider"),
|
|
239
|
+
"model": result.llm_output.get(
|
|
240
|
+
"model", result.llm_output.get("model_path")
|
|
241
|
+
),
|
|
242
|
+
"runtime": result.llm_output.get("runtime"),
|
|
243
|
+
"orchestration_runtime": result.metadata.get("orchestration_runtime"),
|
|
244
|
+
"transition_log": result.transition_log,
|
|
245
|
+
"edge": result.metadata.get("edge"),
|
|
246
|
+
"cross_repo_graph": result.workflow_context.get("cross_repo_graph"),
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
async def minder_search_code(
|
|
250
|
+
self, query: str, *, repo_path: str, limit: int = 5
|
|
251
|
+
) -> list[dict[str, Any]]:
|
|
252
|
+
await self._ingest_tools.minder_ingest_directory(
|
|
253
|
+
repo_path, project=Path(repo_path).name
|
|
254
|
+
)
|
|
255
|
+
project_name = Path(repo_path).name
|
|
256
|
+
semantic_code_hits = await self._vector_store.search_documents(
|
|
257
|
+
self._embedding_provider.embed(query),
|
|
258
|
+
project=project_name,
|
|
259
|
+
doc_types={"code"},
|
|
260
|
+
limit=limit,
|
|
261
|
+
score_threshold=0.0,
|
|
262
|
+
)
|
|
263
|
+
if semantic_code_hits:
|
|
264
|
+
return [
|
|
265
|
+
{
|
|
266
|
+
"path": doc["path"],
|
|
267
|
+
"title": doc["title"],
|
|
268
|
+
"score": doc["score"],
|
|
269
|
+
"source_type": doc.get("doc_type", "unknown"),
|
|
270
|
+
}
|
|
271
|
+
for doc in semantic_code_hits[:limit]
|
|
272
|
+
]
|
|
273
|
+
|
|
274
|
+
state = GraphState(
|
|
275
|
+
query=query,
|
|
276
|
+
repo_path=repo_path,
|
|
277
|
+
metadata={"project_name": project_name},
|
|
278
|
+
)
|
|
279
|
+
retriever = RetrieverNode(
|
|
280
|
+
top_k=limit,
|
|
281
|
+
embedding_provider=self._embedding_provider,
|
|
282
|
+
vector_store=self._vector_store,
|
|
283
|
+
score_threshold=self._config.retrieval.similarity_threshold,
|
|
284
|
+
)
|
|
285
|
+
state = await retriever.run(state)
|
|
286
|
+
code_docs = [
|
|
287
|
+
doc for doc in state.retrieved_docs if doc.get("doc_type") == "code"
|
|
288
|
+
]
|
|
289
|
+
docs_to_return = code_docs or state.retrieved_docs
|
|
290
|
+
return [
|
|
291
|
+
{
|
|
292
|
+
"path": doc["path"],
|
|
293
|
+
"title": doc["title"],
|
|
294
|
+
"score": doc["score"],
|
|
295
|
+
"source_type": doc.get("doc_type", "unknown"),
|
|
296
|
+
}
|
|
297
|
+
for doc in docs_to_return[:limit]
|
|
298
|
+
]
|
|
299
|
+
|
|
300
|
+
async def minder_search_errors(
|
|
301
|
+
self, query: str, *, limit: int = 5
|
|
302
|
+
) -> list[dict[str, Any]]:
|
|
303
|
+
return await self._store.search_errors(query, limit=limit)
|
|
304
|
+
|
|
305
|
+
@staticmethod
|
|
306
|
+
def discover_repo_files(repo_path: str) -> list[str]:
|
|
307
|
+
return [str(path) for path in Path(repo_path).rglob("*") if path.is_file()]
|