minder-cli 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- minder/__init__.py +12 -0
- minder/api/routers/prompts.py +177 -0
- minder/application/__init__.py +1 -0
- minder/application/admin/__init__.py +11 -0
- minder/application/admin/dto.py +453 -0
- minder/application/admin/jobs.py +327 -0
- minder/application/admin/use_cases.py +1895 -0
- minder/auth/__init__.py +12 -0
- minder/auth/context.py +26 -0
- minder/auth/middleware.py +70 -0
- minder/auth/principal.py +59 -0
- minder/auth/rate_limiter.py +89 -0
- minder/auth/rbac.py +60 -0
- minder/auth/service.py +541 -0
- minder/bootstrap/__init__.py +9 -0
- minder/bootstrap/providers.py +109 -0
- minder/bootstrap/transport.py +807 -0
- minder/cache/__init__.py +10 -0
- minder/cache/providers.py +140 -0
- minder/chunking/__init__.py +4 -0
- minder/chunking/code_splitter.py +184 -0
- minder/chunking/splitter.py +136 -0
- minder/cli.py +1542 -0
- minder/config.py +179 -0
- minder/continuity.py +363 -0
- minder/dev.py +160 -0
- minder/embedding/__init__.py +9 -0
- minder/embedding/base.py +7 -0
- minder/embedding/local.py +65 -0
- minder/embedding/openai.py +7 -0
- minder/graph/__init__.py +11 -0
- minder/graph/edges.py +13 -0
- minder/graph/executor.py +127 -0
- minder/graph/graph.py +263 -0
- minder/graph/nodes/__init__.py +27 -0
- minder/graph/nodes/evaluator.py +21 -0
- minder/graph/nodes/guard.py +64 -0
- minder/graph/nodes/llm.py +59 -0
- minder/graph/nodes/planning.py +30 -0
- minder/graph/nodes/reasoning.py +87 -0
- minder/graph/nodes/reranker.py +141 -0
- minder/graph/nodes/retriever.py +86 -0
- minder/graph/nodes/verification.py +230 -0
- minder/graph/nodes/workflow_planner.py +250 -0
- minder/graph/runtime.py +15 -0
- minder/graph/state.py +26 -0
- minder/llm/__init__.py +5 -0
- minder/llm/base.py +14 -0
- minder/llm/local.py +381 -0
- minder/llm/openai.py +89 -0
- minder/models/__init__.py +109 -0
- minder/models/base.py +10 -0
- minder/models/client.py +137 -0
- minder/models/document.py +34 -0
- minder/models/error.py +32 -0
- minder/models/graph.py +114 -0
- minder/models/history.py +32 -0
- minder/models/job.py +62 -0
- minder/models/prompt.py +41 -0
- minder/models/repository.py +62 -0
- minder/models/rule.py +68 -0
- minder/models/session.py +51 -0
- minder/models/skill.py +52 -0
- minder/models/user.py +41 -0
- minder/models/workflow.py +35 -0
- minder/observability/__init__.py +57 -0
- minder/observability/audit.py +243 -0
- minder/observability/logging.py +253 -0
- minder/observability/metrics.py +448 -0
- minder/observability/tracing.py +215 -0
- minder/presentation/__init__.py +1 -0
- minder/presentation/http/__init__.py +1 -0
- minder/presentation/http/admin/__init__.py +3 -0
- minder/presentation/http/admin/api.py +1309 -0
- minder/presentation/http/admin/context.py +94 -0
- minder/presentation/http/admin/dashboard.py +111 -0
- minder/presentation/http/admin/jobs.py +208 -0
- minder/presentation/http/admin/memories.py +185 -0
- minder/presentation/http/admin/prompts.py +219 -0
- minder/presentation/http/admin/routes.py +127 -0
- minder/presentation/http/admin/runtime.py +650 -0
- minder/presentation/http/admin/search.py +368 -0
- minder/presentation/http/admin/skills.py +230 -0
- minder/prompts/__init__.py +646 -0
- minder/prompts/formatter.py +142 -0
- minder/resources/__init__.py +318 -0
- minder/retrieval/__init__.py +5 -0
- minder/retrieval/hybrid.py +178 -0
- minder/retrieval/mmr.py +116 -0
- minder/retrieval/multi_hop.py +115 -0
- minder/runtime.py +15 -0
- minder/server.py +145 -0
- minder/store/__init__.py +64 -0
- minder/store/document.py +115 -0
- minder/store/error.py +82 -0
- minder/store/feedback.py +114 -0
- minder/store/graph.py +588 -0
- minder/store/history.py +57 -0
- minder/store/interfaces.py +512 -0
- minder/store/milvus/__init__.py +11 -0
- minder/store/milvus/client.py +26 -0
- minder/store/milvus/collections.py +15 -0
- minder/store/milvus/vector_store.py +232 -0
- minder/store/mongodb/__init__.py +11 -0
- minder/store/mongodb/client.py +49 -0
- minder/store/mongodb/indexes.py +90 -0
- minder/store/mongodb/operational_store.py +993 -0
- minder/store/relational.py +1087 -0
- minder/store/repo_state.py +58 -0
- minder/store/rule.py +93 -0
- minder/store/vector.py +79 -0
- minder/tools/__init__.py +47 -0
- minder/tools/auth.py +94 -0
- minder/tools/graph.py +839 -0
- minder/tools/ingest.py +353 -0
- minder/tools/memory.py +381 -0
- minder/tools/query.py +307 -0
- minder/tools/registry.py +269 -0
- minder/tools/repo_scanner.py +1266 -0
- minder/tools/search.py +15 -0
- minder/tools/session.py +316 -0
- minder/tools/skills.py +899 -0
- minder/tools/workflow.py +215 -0
- minder/transport/__init__.py +4 -0
- minder/transport/base.py +286 -0
- minder/transport/sse.py +252 -0
- minder/transport/stdio.py +29 -0
- minder_cli-0.2.0.dist-info/METADATA +318 -0
- minder_cli-0.2.0.dist-info/RECORD +132 -0
- minder_cli-0.2.0.dist-info/WHEEL +4 -0
- minder_cli-0.2.0.dist-info/entry_points.txt +2 -0
- minder_cli-0.2.0.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
from minder.config import MinderConfig
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(slots=True)
|
|
11
|
+
class PromptDraft:
|
|
12
|
+
name: str
|
|
13
|
+
title: str
|
|
14
|
+
description: str
|
|
15
|
+
content_template: str
|
|
16
|
+
arguments: list[str]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _normalize_arguments(arguments: list[str]) -> list[str]:
|
|
20
|
+
normalized: list[str] = []
|
|
21
|
+
seen: set[str] = set()
|
|
22
|
+
for argument in arguments:
|
|
23
|
+
value = str(argument).strip()
|
|
24
|
+
if not value or value in seen:
|
|
25
|
+
continue
|
|
26
|
+
seen.add(value)
|
|
27
|
+
normalized.append(value)
|
|
28
|
+
return normalized
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _humanize_name(name: str) -> str:
|
|
32
|
+
parts = [part for part in re.split(r"[_\-\s]+", name.strip()) if part]
|
|
33
|
+
return " ".join(part.capitalize() for part in parts) or "Prompt"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _heuristic_polish(draft: PromptDraft) -> PromptDraft:
|
|
37
|
+
normalized_args = _normalize_arguments(draft.arguments)
|
|
38
|
+
title = draft.title.strip() or _humanize_name(draft.name)
|
|
39
|
+
description = draft.description.strip() or (
|
|
40
|
+
f"Guide the model to act as {title.lower()} with grounded, actionable output."
|
|
41
|
+
)
|
|
42
|
+
base_task = draft.content_template.strip()
|
|
43
|
+
arg_section = "\n".join(f"- {{{argument}}}" for argument in normalized_args)
|
|
44
|
+
if not arg_section:
|
|
45
|
+
arg_section = "- No named placeholders required."
|
|
46
|
+
|
|
47
|
+
polished_template = "\n\n".join(
|
|
48
|
+
section
|
|
49
|
+
for section in [
|
|
50
|
+
f"## Role\nYou are {title}. {description}",
|
|
51
|
+
f"## Inputs\n{arg_section}",
|
|
52
|
+
(
|
|
53
|
+
"## Task\n" + base_task
|
|
54
|
+
if base_task
|
|
55
|
+
else "## Task\nRespond with a concrete, well-structured answer tailored to the provided inputs."
|
|
56
|
+
),
|
|
57
|
+
"## Output Requirements\n- Be specific and practical.\n- Preserve important technical constraints.\n- Avoid filler and generic advice.",
|
|
58
|
+
]
|
|
59
|
+
if section.strip()
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
return PromptDraft(
|
|
63
|
+
name=draft.name.strip(),
|
|
64
|
+
title=title,
|
|
65
|
+
description=description,
|
|
66
|
+
content_template=polished_template,
|
|
67
|
+
arguments=normalized_args,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _extract_json_object(raw: str) -> dict[str, object] | None:
|
|
72
|
+
if not raw.strip():
|
|
73
|
+
return None
|
|
74
|
+
candidates = re.findall(r"\{.*\}", raw, flags=re.DOTALL)
|
|
75
|
+
for candidate in candidates:
|
|
76
|
+
try:
|
|
77
|
+
value = json.loads(candidate)
|
|
78
|
+
except json.JSONDecodeError:
|
|
79
|
+
continue
|
|
80
|
+
if isinstance(value, dict):
|
|
81
|
+
return value
|
|
82
|
+
return None
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def polish_prompt_draft(
|
|
86
|
+
draft: PromptDraft, config: MinderConfig
|
|
87
|
+
) -> tuple[PromptDraft, dict[str, str]]:
|
|
88
|
+
from minder.llm.local import LocalModelLLM
|
|
89
|
+
|
|
90
|
+
polished = _heuristic_polish(draft)
|
|
91
|
+
llm = LocalModelLLM(
|
|
92
|
+
config.llm.model_path,
|
|
93
|
+
runtime="auto",
|
|
94
|
+
context_length=config.llm.context_length,
|
|
95
|
+
)
|
|
96
|
+
runtime = llm.runtime
|
|
97
|
+
|
|
98
|
+
instruction = """You are polishing an MCP prompt template for an engineering assistant.
|
|
99
|
+
Return only valid JSON with keys: title, description, content_template.
|
|
100
|
+
Keep placeholders exactly as provided, for example {repo_name} or {error}.
|
|
101
|
+
Do not invent new placeholders.
|
|
102
|
+
Make the prompt direct, structured, and useful for a coding workflow.
|
|
103
|
+
"""
|
|
104
|
+
request_payload = {
|
|
105
|
+
"name": polished.name,
|
|
106
|
+
"title": polished.title,
|
|
107
|
+
"description": polished.description,
|
|
108
|
+
"arguments": polished.arguments,
|
|
109
|
+
"content_template": polished.content_template,
|
|
110
|
+
}
|
|
111
|
+
llm_response = llm.complete_text(
|
|
112
|
+
f"{instruction}\n\nDraft:\n{json.dumps(request_payload, ensure_ascii=True, indent=2)}",
|
|
113
|
+
max_tokens=900,
|
|
114
|
+
temperature=min(max(config.llm.temperature, 0.05), 0.3),
|
|
115
|
+
fallback="",
|
|
116
|
+
)
|
|
117
|
+
parsed = _extract_json_object(llm_response)
|
|
118
|
+
if not parsed:
|
|
119
|
+
return polished, {
|
|
120
|
+
"provider": "heuristic",
|
|
121
|
+
"model": config.llm.model_name,
|
|
122
|
+
"runtime": runtime,
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
merged = PromptDraft(
|
|
126
|
+
name=polished.name,
|
|
127
|
+
title=str(parsed.get("title", polished.title)).strip() or polished.title,
|
|
128
|
+
description=(
|
|
129
|
+
str(parsed.get("description", polished.description)).strip()
|
|
130
|
+
or polished.description
|
|
131
|
+
),
|
|
132
|
+
content_template=(
|
|
133
|
+
str(parsed.get("content_template", polished.content_template)).strip()
|
|
134
|
+
or polished.content_template
|
|
135
|
+
),
|
|
136
|
+
arguments=polished.arguments,
|
|
137
|
+
)
|
|
138
|
+
return merged, {
|
|
139
|
+
"provider": "local_llm",
|
|
140
|
+
"model": "gemma-4-e2b-it",
|
|
141
|
+
"runtime": runtime,
|
|
142
|
+
}
|
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
"""MCP resource registration for Minder."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from collections import Counter
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from mcp.server.fastmcp import FastMCP
|
|
11
|
+
|
|
12
|
+
from minder.store.interfaces import IGraphRepository, IOperationalStore
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ResourceRegistry:
|
|
16
|
+
"""Registers all Minder MCP resources onto a :class:`FastMCP` app."""
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
def register(
|
|
20
|
+
app: FastMCP,
|
|
21
|
+
store: IOperationalStore,
|
|
22
|
+
graph_store: IGraphRepository | None = None,
|
|
23
|
+
) -> None:
|
|
24
|
+
"""Register core Minder resources, and graph resources when available.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
app: The FastMCP application to register resources with.
|
|
28
|
+
store: An initialised operational store used to fetch live data.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
# ------------------------------------------------------------------
|
|
32
|
+
# minder://skills
|
|
33
|
+
# ------------------------------------------------------------------
|
|
34
|
+
|
|
35
|
+
@app.resource(
|
|
36
|
+
"minder://skills",
|
|
37
|
+
name="minder_skills",
|
|
38
|
+
title="Minder Skills",
|
|
39
|
+
description=(
|
|
40
|
+
"List all stored skills with their id, title, language, and tags."
|
|
41
|
+
),
|
|
42
|
+
mime_type="application/json",
|
|
43
|
+
)
|
|
44
|
+
async def skills_resource() -> str:
|
|
45
|
+
skills = await store.list_skills()
|
|
46
|
+
return json.dumps(
|
|
47
|
+
[
|
|
48
|
+
{
|
|
49
|
+
"id": str(s.id),
|
|
50
|
+
"title": s.title,
|
|
51
|
+
"language": getattr(s, "language", ""),
|
|
52
|
+
"tags": list(s.tags) if s.tags else [],
|
|
53
|
+
}
|
|
54
|
+
for s in skills
|
|
55
|
+
],
|
|
56
|
+
indent=2,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
# ------------------------------------------------------------------
|
|
60
|
+
# minder://repos
|
|
61
|
+
# ------------------------------------------------------------------
|
|
62
|
+
|
|
63
|
+
@app.resource(
|
|
64
|
+
"minder://repos",
|
|
65
|
+
name="minder_repos",
|
|
66
|
+
title="Minder Repositories",
|
|
67
|
+
description=(
|
|
68
|
+
"List all repositories with their name, URL, and current workflow state."
|
|
69
|
+
),
|
|
70
|
+
mime_type="application/json",
|
|
71
|
+
)
|
|
72
|
+
async def repos_resource() -> str:
|
|
73
|
+
repos = await store.list_repositories()
|
|
74
|
+
result: list[dict[str, Any]] = []
|
|
75
|
+
for repo in repos:
|
|
76
|
+
state = await store.get_workflow_state_by_repo(repo.id)
|
|
77
|
+
workflow_info: dict[str, Any] | None = None
|
|
78
|
+
if state is not None:
|
|
79
|
+
workflow_info = {
|
|
80
|
+
"current_step": state.current_step,
|
|
81
|
+
"completed_steps": list(state.completed_steps),
|
|
82
|
+
"blocked_by": list(state.blocked_by),
|
|
83
|
+
}
|
|
84
|
+
result.append(
|
|
85
|
+
{
|
|
86
|
+
"id": str(repo.id),
|
|
87
|
+
"name": repo.repo_name,
|
|
88
|
+
"url": getattr(repo, "repo_url", ""),
|
|
89
|
+
"workflow_state": workflow_info,
|
|
90
|
+
}
|
|
91
|
+
)
|
|
92
|
+
return json.dumps(result, indent=2)
|
|
93
|
+
|
|
94
|
+
# ------------------------------------------------------------------
|
|
95
|
+
# minder://stats
|
|
96
|
+
# ------------------------------------------------------------------
|
|
97
|
+
|
|
98
|
+
@app.resource(
|
|
99
|
+
"minder://stats",
|
|
100
|
+
name="minder_stats",
|
|
101
|
+
title="Minder Statistics",
|
|
102
|
+
description=(
|
|
103
|
+
"Aggregated counts: total skills, repos, workflows, and recorded errors."
|
|
104
|
+
),
|
|
105
|
+
mime_type="application/json",
|
|
106
|
+
)
|
|
107
|
+
async def stats_resource() -> str:
|
|
108
|
+
skills = await store.list_skills()
|
|
109
|
+
repos = await store.list_repositories()
|
|
110
|
+
workflows = await store.list_workflows()
|
|
111
|
+
errors = await store.list_errors()
|
|
112
|
+
return json.dumps(
|
|
113
|
+
{
|
|
114
|
+
"skill_count": len(skills),
|
|
115
|
+
"repo_count": len(repos),
|
|
116
|
+
"workflow_count": len(workflows),
|
|
117
|
+
"error_count": len(errors),
|
|
118
|
+
},
|
|
119
|
+
indent=2,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
if graph_store is None:
|
|
123
|
+
return
|
|
124
|
+
|
|
125
|
+
@app.resource(
|
|
126
|
+
"minder://repos/{repo_name}/structure",
|
|
127
|
+
name="minder_repo_structure",
|
|
128
|
+
title="Minder Repository Structure",
|
|
129
|
+
description="Graph-backed structural summary for a repository, grouped by node type.",
|
|
130
|
+
mime_type="application/json",
|
|
131
|
+
)
|
|
132
|
+
async def repo_structure_resource(repo_name: str) -> str:
|
|
133
|
+
repo_nodes = await _repo_graph_nodes(graph_store, repo_name)
|
|
134
|
+
counts = Counter(str(getattr(node, "node_type", "")) for node in repo_nodes)
|
|
135
|
+
grouped: dict[str, list[dict[str, Any]]] = {}
|
|
136
|
+
for node in sorted(
|
|
137
|
+
repo_nodes,
|
|
138
|
+
key=lambda item: (
|
|
139
|
+
str(getattr(item, "node_type", "")),
|
|
140
|
+
str(getattr(item, "name", "")),
|
|
141
|
+
),
|
|
142
|
+
):
|
|
143
|
+
item = _serialize_graph_node(node)
|
|
144
|
+
grouped.setdefault(item["node_type"], []).append(item)
|
|
145
|
+
return json.dumps(
|
|
146
|
+
{
|
|
147
|
+
"repo_name": repo_name,
|
|
148
|
+
"counts": dict(counts),
|
|
149
|
+
"nodes": grouped,
|
|
150
|
+
},
|
|
151
|
+
indent=2,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
@app.resource(
|
|
155
|
+
"minder://repos/{repo_name}/todos",
|
|
156
|
+
name="minder_repo_todos",
|
|
157
|
+
title="Minder Repository TODOs",
|
|
158
|
+
description="Graph-backed TODO items extracted for a repository.",
|
|
159
|
+
mime_type="application/json",
|
|
160
|
+
)
|
|
161
|
+
async def repo_todos_resource(repo_name: str) -> str:
|
|
162
|
+
repo_nodes = await _repo_graph_nodes(graph_store, repo_name)
|
|
163
|
+
todos = [
|
|
164
|
+
_serialize_graph_node(node)
|
|
165
|
+
for node in repo_nodes
|
|
166
|
+
if str(getattr(node, "node_type", "")) == "todo"
|
|
167
|
+
]
|
|
168
|
+
todos.sort(
|
|
169
|
+
key=lambda item: (
|
|
170
|
+
str(item["metadata"].get("path", "")),
|
|
171
|
+
int(item["metadata"].get("line", 0) or 0),
|
|
172
|
+
item["name"],
|
|
173
|
+
)
|
|
174
|
+
)
|
|
175
|
+
return json.dumps(
|
|
176
|
+
{
|
|
177
|
+
"repo_name": repo_name,
|
|
178
|
+
"count": len(todos),
|
|
179
|
+
"items": todos,
|
|
180
|
+
},
|
|
181
|
+
indent=2,
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
@app.resource(
|
|
185
|
+
"minder://repos/{repo_name}/routes",
|
|
186
|
+
name="minder_repo_routes",
|
|
187
|
+
title="Minder Repository Routes",
|
|
188
|
+
description="Graph-backed route inventory for a repository.",
|
|
189
|
+
mime_type="application/json",
|
|
190
|
+
)
|
|
191
|
+
async def repo_routes_resource(repo_name: str) -> str:
|
|
192
|
+
repo_nodes = await _repo_graph_nodes(graph_store, repo_name)
|
|
193
|
+
routes = [
|
|
194
|
+
_serialize_graph_node(node)
|
|
195
|
+
for node in repo_nodes
|
|
196
|
+
if str(getattr(node, "node_type", "")) == "route"
|
|
197
|
+
]
|
|
198
|
+
routes.sort(
|
|
199
|
+
key=lambda item: (
|
|
200
|
+
str(item["metadata"].get("method", "")),
|
|
201
|
+
str(item["metadata"].get("route_path", "")),
|
|
202
|
+
item["name"],
|
|
203
|
+
)
|
|
204
|
+
)
|
|
205
|
+
return json.dumps(
|
|
206
|
+
{
|
|
207
|
+
"repo_name": repo_name,
|
|
208
|
+
"count": len(routes),
|
|
209
|
+
"items": routes,
|
|
210
|
+
},
|
|
211
|
+
indent=2,
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
@app.resource(
|
|
215
|
+
"minder://repos/{repo_name}/dependencies",
|
|
216
|
+
name="minder_repo_dependencies",
|
|
217
|
+
title="Minder Repository Dependencies",
|
|
218
|
+
description="Graph-backed internal and external dependency summary for a repository.",
|
|
219
|
+
mime_type="application/json",
|
|
220
|
+
)
|
|
221
|
+
async def repo_dependencies_resource(repo_name: str) -> str:
|
|
222
|
+
repo_nodes = await _repo_graph_nodes(graph_store, repo_name)
|
|
223
|
+
repo_node_ids = {str(getattr(node, "id")) for node in repo_nodes}
|
|
224
|
+
services = [node for node in repo_nodes if str(getattr(node, "node_type", "")) == "service"]
|
|
225
|
+
internal_dependencies: list[dict[str, Any]] = []
|
|
226
|
+
for service in services:
|
|
227
|
+
neighbors = await graph_store.get_neighbors(getattr(service, "id"), direction="out", relation="depends_on")
|
|
228
|
+
targets = [
|
|
229
|
+
{
|
|
230
|
+
"id": str(getattr(neighbor, "id")),
|
|
231
|
+
"name": str(getattr(neighbor, "name", "")),
|
|
232
|
+
"node_type": str(getattr(neighbor, "node_type", "")),
|
|
233
|
+
}
|
|
234
|
+
for neighbor in neighbors
|
|
235
|
+
if str(getattr(neighbor, "id")) in repo_node_ids
|
|
236
|
+
]
|
|
237
|
+
if targets:
|
|
238
|
+
internal_dependencies.append(
|
|
239
|
+
{
|
|
240
|
+
"service": str(getattr(service, "name", "")),
|
|
241
|
+
"depends_on": sorted(targets, key=lambda item: item["name"]),
|
|
242
|
+
}
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
external_apis = [
|
|
246
|
+
_serialize_graph_node(node)
|
|
247
|
+
for node in repo_nodes
|
|
248
|
+
if str(getattr(node, "node_type", "")) == "external_service_api"
|
|
249
|
+
]
|
|
250
|
+
external_apis.sort(key=lambda item: item["name"])
|
|
251
|
+
return json.dumps(
|
|
252
|
+
{
|
|
253
|
+
"repo_name": repo_name,
|
|
254
|
+
"internal_dependencies": internal_dependencies,
|
|
255
|
+
"external_services": external_apis,
|
|
256
|
+
},
|
|
257
|
+
indent=2,
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
@app.resource(
|
|
261
|
+
"minder://repos/{repo_name}/symbols",
|
|
262
|
+
name="minder_repo_symbols",
|
|
263
|
+
title="Minder Repository Symbols",
|
|
264
|
+
description="Graph-backed symbol inventory for functions, classes, controllers, and interfaces within a repository.",
|
|
265
|
+
mime_type="application/json",
|
|
266
|
+
)
|
|
267
|
+
async def repo_symbols_resource(repo_name: str) -> str:
|
|
268
|
+
repo_nodes = await _repo_graph_nodes(graph_store, repo_name)
|
|
269
|
+
symbol_types = {"function", "class", "controller", "interface", "abstract_class", "module"}
|
|
270
|
+
symbols = [
|
|
271
|
+
_serialize_graph_node(node)
|
|
272
|
+
for node in repo_nodes
|
|
273
|
+
if str(getattr(node, "node_type", "")) in symbol_types
|
|
274
|
+
]
|
|
275
|
+
symbols.sort(
|
|
276
|
+
key=lambda item: (
|
|
277
|
+
item["node_type"],
|
|
278
|
+
str(item["metadata"].get("path", "")),
|
|
279
|
+
item["name"],
|
|
280
|
+
)
|
|
281
|
+
)
|
|
282
|
+
return json.dumps(
|
|
283
|
+
{
|
|
284
|
+
"repo_name": repo_name,
|
|
285
|
+
"count": len(symbols),
|
|
286
|
+
"items": symbols,
|
|
287
|
+
},
|
|
288
|
+
indent=2,
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def _serialize_graph_node(node: Any) -> dict[str, Any]:
|
|
293
|
+
metadata = getattr(node, "node_metadata", {}) or {}
|
|
294
|
+
return {
|
|
295
|
+
"id": str(getattr(node, "id")),
|
|
296
|
+
"node_type": str(getattr(node, "node_type", "")),
|
|
297
|
+
"name": str(getattr(node, "name", "")),
|
|
298
|
+
"metadata": metadata if isinstance(metadata, dict) else {},
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
async def _repo_graph_nodes(graph_store: IGraphRepository, repo_name: str) -> list[Any]:
|
|
303
|
+
nodes = await graph_store.list_nodes()
|
|
304
|
+
selected: list[Any] = []
|
|
305
|
+
for node in nodes:
|
|
306
|
+
metadata = getattr(node, "node_metadata", {}) or {}
|
|
307
|
+
project = str(metadata.get("project", "") or "")
|
|
308
|
+
path_value = str(metadata.get("path", "") or "")
|
|
309
|
+
if project == repo_name:
|
|
310
|
+
selected.append(node)
|
|
311
|
+
continue
|
|
312
|
+
if path_value:
|
|
313
|
+
try:
|
|
314
|
+
if Path(path_value).name == repo_name:
|
|
315
|
+
selected.append(node)
|
|
316
|
+
except (TypeError, ValueError):
|
|
317
|
+
continue
|
|
318
|
+
return selected
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"""
|
|
2
|
+
BM25 + Vector hybrid retrieval.
|
|
3
|
+
|
|
4
|
+
Combines normalised BM25 keyword scores with normalised vector similarity
|
|
5
|
+
scores using a configurable alpha blend:
|
|
6
|
+
|
|
7
|
+
combined = alpha * vector_score + (1 - alpha) * bm25_score
|
|
8
|
+
|
|
9
|
+
alpha = 1.0 → pure vector search
|
|
10
|
+
alpha = 0.0 → pure BM25
|
|
11
|
+
alpha = 0.5 → equal blend (default)
|
|
12
|
+
|
|
13
|
+
BM25 is implemented in pure Python (no external index server required).
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import math
|
|
19
|
+
from collections import Counter
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# ---------------------------------------------------------------------------
|
|
24
|
+
# BM25 helpers
|
|
25
|
+
# ---------------------------------------------------------------------------
|
|
26
|
+
|
|
27
|
+
_BM25_K1 = 1.5
|
|
28
|
+
_BM25_B = 0.75
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _tokenize(text: str) -> list[str]:
|
|
32
|
+
return [tok for tok in text.lower().split() if len(tok) > 1]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _bm25_score(
|
|
36
|
+
query_terms: list[str],
|
|
37
|
+
doc_tokens: list[str],
|
|
38
|
+
doc_freq: dict[str, int],
|
|
39
|
+
num_docs: int,
|
|
40
|
+
avg_dl: float,
|
|
41
|
+
) -> float:
|
|
42
|
+
dl = len(doc_tokens)
|
|
43
|
+
tf_map: Counter[str] = Counter(doc_tokens)
|
|
44
|
+
score = 0.0
|
|
45
|
+
for term in query_terms:
|
|
46
|
+
tf = tf_map.get(term, 0)
|
|
47
|
+
if tf == 0:
|
|
48
|
+
continue
|
|
49
|
+
df = doc_freq.get(term, 0)
|
|
50
|
+
idf = math.log((num_docs - df + 0.5) / (df + 0.5) + 1.0)
|
|
51
|
+
tf_norm = (tf * (_BM25_K1 + 1.0)) / (
|
|
52
|
+
tf + _BM25_K1 * (1.0 - _BM25_B + _BM25_B * dl / max(avg_dl, 1.0))
|
|
53
|
+
)
|
|
54
|
+
score += idf * tf_norm
|
|
55
|
+
return score
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _min_max_normalize(scores: list[float]) -> list[float]:
|
|
59
|
+
if not scores:
|
|
60
|
+
return scores
|
|
61
|
+
lo, hi = min(scores), max(scores)
|
|
62
|
+
if math.isclose(hi, lo):
|
|
63
|
+
return [1.0] * len(scores)
|
|
64
|
+
span = hi - lo
|
|
65
|
+
return [(s - lo) / span for s in scores]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# ---------------------------------------------------------------------------
|
|
69
|
+
# HybridRetriever
|
|
70
|
+
# ---------------------------------------------------------------------------
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class HybridRetriever:
|
|
74
|
+
"""
|
|
75
|
+
Merge vector-search results with BM25 scores computed over a corpus.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
alpha: blend coefficient in [0, 1].
|
|
79
|
+
1.0 = pure vector, 0.0 = pure BM25.
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
def __init__(self, alpha: float = 0.5) -> None:
|
|
83
|
+
if not 0.0 <= alpha <= 1.0:
|
|
84
|
+
raise ValueError(f"alpha must be in [0.0, 1.0], got {alpha}")
|
|
85
|
+
self._alpha = alpha
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def alpha(self) -> float:
|
|
89
|
+
return self._alpha
|
|
90
|
+
|
|
91
|
+
def merge(
|
|
92
|
+
self,
|
|
93
|
+
query: str,
|
|
94
|
+
vector_results: list[dict[str, Any]],
|
|
95
|
+
corpus: list[dict[str, Any]],
|
|
96
|
+
*,
|
|
97
|
+
limit: int = 5,
|
|
98
|
+
content_key: str = "content",
|
|
99
|
+
id_key: str = "path",
|
|
100
|
+
) -> list[dict[str, Any]]:
|
|
101
|
+
"""
|
|
102
|
+
Merge *vector_results* with BM25 scores computed over *corpus*.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
query: original user query (used for BM25 term matching).
|
|
106
|
+
vector_results: documents returned by vector search, each with a
|
|
107
|
+
``"score"`` field (float, already normalised or raw cosine).
|
|
108
|
+
corpus: the full candidate set to build the BM25 index over.
|
|
109
|
+
Should include all docs in *vector_results* plus any extra
|
|
110
|
+
candidates. May equal *vector_results* when no corpus is
|
|
111
|
+
separately available.
|
|
112
|
+
limit: maximum number of merged results to return.
|
|
113
|
+
content_key: key in each doc dict that holds the text content.
|
|
114
|
+
id_key: key used to de-duplicate documents across the two lists.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
Merged, sorted list of doc dicts enriched with ``"score"``,
|
|
118
|
+
``"vector_score"``, and ``"bm25_score"`` fields.
|
|
119
|
+
"""
|
|
120
|
+
all_docs = corpus if corpus else vector_results
|
|
121
|
+
if not all_docs:
|
|
122
|
+
return []
|
|
123
|
+
|
|
124
|
+
# ---- BM25 index ----
|
|
125
|
+
tokenized = [_tokenize(str(doc.get(content_key, ""))) for doc in all_docs]
|
|
126
|
+
avg_dl = sum(len(t) for t in tokenized) / max(len(tokenized), 1)
|
|
127
|
+
doc_freq: Counter[str] = Counter()
|
|
128
|
+
for tokens in tokenized:
|
|
129
|
+
for term in set(tokens):
|
|
130
|
+
doc_freq[term] += 1
|
|
131
|
+
|
|
132
|
+
query_terms = _tokenize(query)
|
|
133
|
+
raw_bm25 = [
|
|
134
|
+
_bm25_score(query_terms, tokens, doc_freq, len(all_docs), avg_dl)
|
|
135
|
+
for tokens in tokenized
|
|
136
|
+
]
|
|
137
|
+
bm25_norm = _min_max_normalize(raw_bm25)
|
|
138
|
+
bm25_map: dict[str, float] = {
|
|
139
|
+
str(doc.get(id_key, i)): bm25_norm[i]
|
|
140
|
+
for i, doc in enumerate(all_docs)
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
# ---- Vector score map ----
|
|
144
|
+
raw_vec = [float(doc.get("score", 0.0)) for doc in vector_results]
|
|
145
|
+
vec_norm = _min_max_normalize(raw_vec)
|
|
146
|
+
vec_map: dict[str, float] = {
|
|
147
|
+
str(doc.get(id_key, i)): vec_norm[i]
|
|
148
|
+
for i, doc in enumerate(vector_results)
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
# ---- Union merge ----
|
|
152
|
+
vec_ids = {str(doc.get(id_key, "")) for doc in vector_results}
|
|
153
|
+
candidates = list(vector_results) + [
|
|
154
|
+
doc for doc in all_docs
|
|
155
|
+
if str(doc.get(id_key, "")) not in vec_ids
|
|
156
|
+
]
|
|
157
|
+
|
|
158
|
+
seen: set[str] = set()
|
|
159
|
+
merged: list[dict[str, Any]] = []
|
|
160
|
+
for doc in candidates:
|
|
161
|
+
key = str(doc.get(id_key, id(doc)))
|
|
162
|
+
if key in seen:
|
|
163
|
+
continue
|
|
164
|
+
seen.add(key)
|
|
165
|
+
v = vec_map.get(key, 0.0)
|
|
166
|
+
b = bm25_map.get(key, 0.0)
|
|
167
|
+
combined = round(self._alpha * v + (1.0 - self._alpha) * b, 6)
|
|
168
|
+
merged.append(
|
|
169
|
+
{
|
|
170
|
+
**doc,
|
|
171
|
+
"score": combined,
|
|
172
|
+
"vector_score": round(v, 6),
|
|
173
|
+
"bm25_score": round(b, 6),
|
|
174
|
+
}
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
merged.sort(key=lambda d: float(d["score"]), reverse=True)
|
|
178
|
+
return merged[:limit]
|