minder-cli 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- minder/__init__.py +12 -0
- minder/api/routers/prompts.py +177 -0
- minder/application/__init__.py +1 -0
- minder/application/admin/__init__.py +11 -0
- minder/application/admin/dto.py +453 -0
- minder/application/admin/jobs.py +327 -0
- minder/application/admin/use_cases.py +1895 -0
- minder/auth/__init__.py +12 -0
- minder/auth/context.py +26 -0
- minder/auth/middleware.py +70 -0
- minder/auth/principal.py +59 -0
- minder/auth/rate_limiter.py +89 -0
- minder/auth/rbac.py +60 -0
- minder/auth/service.py +541 -0
- minder/bootstrap/__init__.py +9 -0
- minder/bootstrap/providers.py +109 -0
- minder/bootstrap/transport.py +807 -0
- minder/cache/__init__.py +10 -0
- minder/cache/providers.py +140 -0
- minder/chunking/__init__.py +4 -0
- minder/chunking/code_splitter.py +184 -0
- minder/chunking/splitter.py +136 -0
- minder/cli.py +1542 -0
- minder/config.py +179 -0
- minder/continuity.py +363 -0
- minder/dev.py +160 -0
- minder/embedding/__init__.py +9 -0
- minder/embedding/base.py +7 -0
- minder/embedding/local.py +65 -0
- minder/embedding/openai.py +7 -0
- minder/graph/__init__.py +11 -0
- minder/graph/edges.py +13 -0
- minder/graph/executor.py +127 -0
- minder/graph/graph.py +263 -0
- minder/graph/nodes/__init__.py +27 -0
- minder/graph/nodes/evaluator.py +21 -0
- minder/graph/nodes/guard.py +64 -0
- minder/graph/nodes/llm.py +59 -0
- minder/graph/nodes/planning.py +30 -0
- minder/graph/nodes/reasoning.py +87 -0
- minder/graph/nodes/reranker.py +141 -0
- minder/graph/nodes/retriever.py +86 -0
- minder/graph/nodes/verification.py +230 -0
- minder/graph/nodes/workflow_planner.py +250 -0
- minder/graph/runtime.py +15 -0
- minder/graph/state.py +26 -0
- minder/llm/__init__.py +5 -0
- minder/llm/base.py +14 -0
- minder/llm/local.py +381 -0
- minder/llm/openai.py +89 -0
- minder/models/__init__.py +109 -0
- minder/models/base.py +10 -0
- minder/models/client.py +137 -0
- minder/models/document.py +34 -0
- minder/models/error.py +32 -0
- minder/models/graph.py +114 -0
- minder/models/history.py +32 -0
- minder/models/job.py +62 -0
- minder/models/prompt.py +41 -0
- minder/models/repository.py +62 -0
- minder/models/rule.py +68 -0
- minder/models/session.py +51 -0
- minder/models/skill.py +52 -0
- minder/models/user.py +41 -0
- minder/models/workflow.py +35 -0
- minder/observability/__init__.py +57 -0
- minder/observability/audit.py +243 -0
- minder/observability/logging.py +253 -0
- minder/observability/metrics.py +448 -0
- minder/observability/tracing.py +215 -0
- minder/presentation/__init__.py +1 -0
- minder/presentation/http/__init__.py +1 -0
- minder/presentation/http/admin/__init__.py +3 -0
- minder/presentation/http/admin/api.py +1309 -0
- minder/presentation/http/admin/context.py +94 -0
- minder/presentation/http/admin/dashboard.py +111 -0
- minder/presentation/http/admin/jobs.py +208 -0
- minder/presentation/http/admin/memories.py +185 -0
- minder/presentation/http/admin/prompts.py +219 -0
- minder/presentation/http/admin/routes.py +127 -0
- minder/presentation/http/admin/runtime.py +650 -0
- minder/presentation/http/admin/search.py +368 -0
- minder/presentation/http/admin/skills.py +230 -0
- minder/prompts/__init__.py +646 -0
- minder/prompts/formatter.py +142 -0
- minder/resources/__init__.py +318 -0
- minder/retrieval/__init__.py +5 -0
- minder/retrieval/hybrid.py +178 -0
- minder/retrieval/mmr.py +116 -0
- minder/retrieval/multi_hop.py +115 -0
- minder/runtime.py +15 -0
- minder/server.py +145 -0
- minder/store/__init__.py +64 -0
- minder/store/document.py +115 -0
- minder/store/error.py +82 -0
- minder/store/feedback.py +114 -0
- minder/store/graph.py +588 -0
- minder/store/history.py +57 -0
- minder/store/interfaces.py +512 -0
- minder/store/milvus/__init__.py +11 -0
- minder/store/milvus/client.py +26 -0
- minder/store/milvus/collections.py +15 -0
- minder/store/milvus/vector_store.py +232 -0
- minder/store/mongodb/__init__.py +11 -0
- minder/store/mongodb/client.py +49 -0
- minder/store/mongodb/indexes.py +90 -0
- minder/store/mongodb/operational_store.py +993 -0
- minder/store/relational.py +1087 -0
- minder/store/repo_state.py +58 -0
- minder/store/rule.py +93 -0
- minder/store/vector.py +79 -0
- minder/tools/__init__.py +47 -0
- minder/tools/auth.py +94 -0
- minder/tools/graph.py +839 -0
- minder/tools/ingest.py +353 -0
- minder/tools/memory.py +381 -0
- minder/tools/query.py +307 -0
- minder/tools/registry.py +269 -0
- minder/tools/repo_scanner.py +1266 -0
- minder/tools/search.py +15 -0
- minder/tools/session.py +316 -0
- minder/tools/skills.py +899 -0
- minder/tools/workflow.py +215 -0
- minder/transport/__init__.py +4 -0
- minder/transport/base.py +286 -0
- minder/transport/sse.py +252 -0
- minder/transport/stdio.py +29 -0
- minder_cli-0.2.0.dist-info/METADATA +318 -0
- minder_cli-0.2.0.dist-info/RECORD +132 -0
- minder_cli-0.2.0.dist-info/WHEEL +4 -0
- minder_cli-0.2.0.dist-info/entry_points.txt +2 -0
- minder_cli-0.2.0.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from minder.graph.state import GraphState
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class PlanningNode:
|
|
7
|
+
def run(self, state: GraphState) -> GraphState:
|
|
8
|
+
query = state.query.lower()
|
|
9
|
+
intent = "explain"
|
|
10
|
+
if any(word in query for word in ("fix", "implement", "write", "generate")):
|
|
11
|
+
intent = "code_gen"
|
|
12
|
+
elif any(word in query for word in ("debug", "bug", "trace", "error")):
|
|
13
|
+
intent = "debug"
|
|
14
|
+
elif "refactor" in query:
|
|
15
|
+
intent = "refactor"
|
|
16
|
+
elif any(word in query for word in ("search", "find", "look up")):
|
|
17
|
+
intent = "search"
|
|
18
|
+
|
|
19
|
+
retrieval_strategy = "hybrid"
|
|
20
|
+
if intent == "search":
|
|
21
|
+
retrieval_strategy = "lexical"
|
|
22
|
+
complexity = "high" if len(query.split()) > 12 else "medium"
|
|
23
|
+
|
|
24
|
+
state.plan = {
|
|
25
|
+
"intent": intent,
|
|
26
|
+
"knowledge_layer": "repository",
|
|
27
|
+
"retrieval_strategy": retrieval_strategy,
|
|
28
|
+
"complexity": complexity,
|
|
29
|
+
}
|
|
30
|
+
return state
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
from minder.graph.state import GraphState
|
|
6
|
+
from minder.prompts import PromptRegistry
|
|
7
|
+
from minder.tools.registry import tool_capability_manifest, tool_data_access_policy
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ReasoningNode:
|
|
11
|
+
def run(self, state: GraphState) -> GraphState:
|
|
12
|
+
sources = [
|
|
13
|
+
{"path": doc["path"], "title": doc["title"], "score": doc["score"]}
|
|
14
|
+
for doc in state.reranked_docs
|
|
15
|
+
]
|
|
16
|
+
snippets = []
|
|
17
|
+
for doc in state.reranked_docs[:3]:
|
|
18
|
+
content = str(doc["content"]).strip()
|
|
19
|
+
snippets.append(f"Source: {doc['path']}\n{content[:240]}")
|
|
20
|
+
|
|
21
|
+
guidance = state.workflow_context.get("guidance", "")
|
|
22
|
+
instruction_envelope = state.workflow_context.get("instruction_envelope", {})
|
|
23
|
+
continuity_brief = state.workflow_context.get("continuity_brief", {})
|
|
24
|
+
retry_reason = str(state.metadata.get("retry_reason", "") or "").strip()
|
|
25
|
+
continuity_packet = continuity_brief or {}
|
|
26
|
+
prompt_template = str(
|
|
27
|
+
state.metadata.get("query_prompt_template")
|
|
28
|
+
or (PromptRegistry.get_builtin_definition("query_reasoning") or {}).get(
|
|
29
|
+
"content_template", ""
|
|
30
|
+
)
|
|
31
|
+
)
|
|
32
|
+
prompt_defaults = dict(state.metadata.get("query_prompt_defaults", {}) or {})
|
|
33
|
+
prompt = PromptRegistry.render_content_template(
|
|
34
|
+
prompt_template,
|
|
35
|
+
{
|
|
36
|
+
"workflow_instruction": guidance,
|
|
37
|
+
"instruction_envelope": (
|
|
38
|
+
json.dumps(
|
|
39
|
+
instruction_envelope,
|
|
40
|
+
indent=2,
|
|
41
|
+
sort_keys=True,
|
|
42
|
+
)
|
|
43
|
+
if instruction_envelope
|
|
44
|
+
else "{}"
|
|
45
|
+
),
|
|
46
|
+
"continuity_brief": (
|
|
47
|
+
json.dumps(
|
|
48
|
+
continuity_brief,
|
|
49
|
+
indent=2,
|
|
50
|
+
sort_keys=True,
|
|
51
|
+
)
|
|
52
|
+
if continuity_brief
|
|
53
|
+
else "{}"
|
|
54
|
+
),
|
|
55
|
+
"continuity_packet": (
|
|
56
|
+
json.dumps(
|
|
57
|
+
continuity_packet,
|
|
58
|
+
indent=2,
|
|
59
|
+
sort_keys=True,
|
|
60
|
+
)
|
|
61
|
+
if continuity_packet
|
|
62
|
+
else "{}"
|
|
63
|
+
),
|
|
64
|
+
"tool_capabilities": tool_capability_manifest(),
|
|
65
|
+
"data_access_policy": tool_data_access_policy(),
|
|
66
|
+
"repository_context_note": (
|
|
67
|
+
"Repository context is available for repo-scoped reasoning."
|
|
68
|
+
if state.repo_path
|
|
69
|
+
else "No repository is currently selected. Minder can still describe its built-in tools and internal data capabilities, but repo-scoped code and graph inspection tools need repository context first."
|
|
70
|
+
),
|
|
71
|
+
"user_query": state.query,
|
|
72
|
+
"retrieved_context": (
|
|
73
|
+
"\n\n".join(snippets)
|
|
74
|
+
if snippets
|
|
75
|
+
else "No repository context found."
|
|
76
|
+
),
|
|
77
|
+
"correction_required": retry_reason,
|
|
78
|
+
},
|
|
79
|
+
defaults=prompt_defaults,
|
|
80
|
+
)
|
|
81
|
+
state.reasoning_output = {
|
|
82
|
+
"prompt": prompt,
|
|
83
|
+
"sources": sources,
|
|
84
|
+
"workflow_instruction": guidance,
|
|
85
|
+
"prompt_name": state.metadata.get("query_prompt_name", "query_reasoning"),
|
|
86
|
+
}
|
|
87
|
+
return state
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
"""
|
|
2
|
+
RerankerNode — re-ranks state.retrieved_docs into state.reranked_docs.
|
|
3
|
+
|
|
4
|
+
Runtime strategy (tried in order):
|
|
5
|
+
1. ``sentence_transformers.CrossEncoder`` — if the package is installed and
|
|
6
|
+
the model loads successfully.
|
|
7
|
+
2. MMR with document embeddings — if an ``embedding_provider`` is supplied.
|
|
8
|
+
3. Passthrough — sort by existing score, no re-scoring.
|
|
9
|
+
|
|
10
|
+
The node always writes ``state.reranked_docs`` and records
|
|
11
|
+
``state.metadata["reranker_runtime"]`` with the strategy used.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
from minder.embedding.base import EmbeddingProvider
|
|
19
|
+
from minder.graph.state import GraphState
|
|
20
|
+
from minder.retrieval.mmr import mmr_rerank
|
|
21
|
+
from minder.runtime import load_attr, module_available
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class RerankerNode:
|
|
25
|
+
"""
|
|
26
|
+
Args:
|
|
27
|
+
top_k: maximum number of documents to keep after re-ranking.
|
|
28
|
+
lambda_mult: MMR trade-off (0 = max diversity, 1 = max relevance).
|
|
29
|
+
cross_encoder_model: HuggingFace model id used when
|
|
30
|
+
``sentence_transformers`` is available.
|
|
31
|
+
embedding_provider: optional embedder used for MMR fallback.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
*,
|
|
37
|
+
top_k: int = 5,
|
|
38
|
+
lambda_mult: float = 0.5,
|
|
39
|
+
cross_encoder_model: str = "cross-encoder/ms-marco-MiniLM-L-6-v2",
|
|
40
|
+
embedding_provider: EmbeddingProvider | None = None,
|
|
41
|
+
) -> None:
|
|
42
|
+
self._top_k = top_k
|
|
43
|
+
self._lambda_mult = lambda_mult
|
|
44
|
+
self._cross_encoder_model = cross_encoder_model
|
|
45
|
+
self._embedding_provider = embedding_provider
|
|
46
|
+
self._cross_encoder: Any | None = None
|
|
47
|
+
|
|
48
|
+
# ------------------------------------------------------------------
|
|
49
|
+
# Runtime detection
|
|
50
|
+
# ------------------------------------------------------------------
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def runtime(self) -> str:
|
|
54
|
+
if module_available("sentence_transformers"):
|
|
55
|
+
return "cross_encoder"
|
|
56
|
+
if self._embedding_provider is not None:
|
|
57
|
+
return "mmr"
|
|
58
|
+
return "passthrough"
|
|
59
|
+
|
|
60
|
+
def _load_cross_encoder(self) -> Any | None:
|
|
61
|
+
if self._cross_encoder is not None:
|
|
62
|
+
return self._cross_encoder
|
|
63
|
+
ce_cls = load_attr("sentence_transformers", "CrossEncoder")
|
|
64
|
+
if ce_cls is None:
|
|
65
|
+
return None
|
|
66
|
+
try:
|
|
67
|
+
self._cross_encoder = ce_cls(self._cross_encoder_model)
|
|
68
|
+
except Exception: # noqa: BLE001
|
|
69
|
+
return None
|
|
70
|
+
return self._cross_encoder
|
|
71
|
+
|
|
72
|
+
# ------------------------------------------------------------------
|
|
73
|
+
# Main entry point
|
|
74
|
+
# ------------------------------------------------------------------
|
|
75
|
+
|
|
76
|
+
async def run(self, state: GraphState) -> GraphState:
|
|
77
|
+
docs = list(state.retrieved_docs)
|
|
78
|
+
if not docs:
|
|
79
|
+
state.reranked_docs = []
|
|
80
|
+
state.metadata["reranker_runtime"] = "passthrough"
|
|
81
|
+
return state
|
|
82
|
+
|
|
83
|
+
# ---- Strategy 1: cross-encoder ----
|
|
84
|
+
ce = self._load_cross_encoder()
|
|
85
|
+
if ce is not None:
|
|
86
|
+
reranked = self._run_cross_encoder(ce, state.query, docs)
|
|
87
|
+
if reranked is not None:
|
|
88
|
+
state.reranked_docs = reranked
|
|
89
|
+
state.metadata["reranker_runtime"] = "cross_encoder"
|
|
90
|
+
return state
|
|
91
|
+
|
|
92
|
+
# ---- Strategy 2: MMR with embeddings ----
|
|
93
|
+
if self._embedding_provider is not None:
|
|
94
|
+
state.reranked_docs = self._run_mmr(state.query, docs)
|
|
95
|
+
state.metadata["reranker_runtime"] = "mmr"
|
|
96
|
+
return state
|
|
97
|
+
|
|
98
|
+
# ---- Strategy 3: passthrough (score sort) ----
|
|
99
|
+
state.reranked_docs = sorted(
|
|
100
|
+
docs, key=lambda d: float(d.get("score", 0.0)), reverse=True
|
|
101
|
+
)[: self._top_k]
|
|
102
|
+
state.metadata["reranker_runtime"] = "passthrough"
|
|
103
|
+
return state
|
|
104
|
+
|
|
105
|
+
# ------------------------------------------------------------------
|
|
106
|
+
# Internals
|
|
107
|
+
# ------------------------------------------------------------------
|
|
108
|
+
|
|
109
|
+
def _run_cross_encoder(
|
|
110
|
+
self, ce: Any, query: str, docs: list[dict[str, Any]]
|
|
111
|
+
) -> list[dict[str, Any]] | None:
|
|
112
|
+
try:
|
|
113
|
+
pairs = [[query, str(doc.get("content", ""))] for doc in docs]
|
|
114
|
+
scores = ce.predict(pairs)
|
|
115
|
+
scored = [
|
|
116
|
+
{**doc, "score": float(score)}
|
|
117
|
+
for doc, score in zip(docs, scores, strict=False)
|
|
118
|
+
]
|
|
119
|
+
scored.sort(key=lambda d: float(d["score"]), reverse=True)
|
|
120
|
+
return scored[: self._top_k]
|
|
121
|
+
except Exception: # noqa: BLE001
|
|
122
|
+
return None
|
|
123
|
+
|
|
124
|
+
def _run_mmr(self, query: str, docs: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
125
|
+
assert self._embedding_provider is not None # guarded by caller
|
|
126
|
+
query_emb = self._embedding_provider.embed(query)
|
|
127
|
+
enriched: list[dict[str, Any]] = []
|
|
128
|
+
for doc in docs:
|
|
129
|
+
d = dict(doc)
|
|
130
|
+
if not isinstance(d.get("embedding"), list):
|
|
131
|
+
# Embed truncated content to stay within model context
|
|
132
|
+
d["embedding"] = self._embedding_provider.embed(
|
|
133
|
+
str(d.get("content", ""))[:512]
|
|
134
|
+
)
|
|
135
|
+
enriched.append(d)
|
|
136
|
+
return mmr_rerank(
|
|
137
|
+
query_emb,
|
|
138
|
+
enriched,
|
|
139
|
+
top_k=self._top_k,
|
|
140
|
+
lambda_mult=self._lambda_mult,
|
|
141
|
+
)
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
5
|
+
from typing import cast
|
|
6
|
+
|
|
7
|
+
from minder.embedding.base import EmbeddingProvider
|
|
8
|
+
from minder.graph.state import GraphState
|
|
9
|
+
from minder.store.interfaces import IVectorStore
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class RetrieverNode:
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
top_k: int = 5,
|
|
16
|
+
*,
|
|
17
|
+
embedding_provider: EmbeddingProvider | None = None,
|
|
18
|
+
vector_store: IVectorStore | None = None,
|
|
19
|
+
score_threshold: float = 0.0,
|
|
20
|
+
) -> None:
|
|
21
|
+
self._top_k = top_k
|
|
22
|
+
self._embedding_provider = embedding_provider
|
|
23
|
+
self._vector_store = vector_store
|
|
24
|
+
self._score_threshold = score_threshold
|
|
25
|
+
|
|
26
|
+
async def run(self, state: GraphState) -> GraphState:
|
|
27
|
+
project = state.metadata.get("project_name")
|
|
28
|
+
if state.repo_path is None and not isinstance(project, str):
|
|
29
|
+
state.retrieved_docs = []
|
|
30
|
+
state.reranked_docs = []
|
|
31
|
+
state.metadata["retrieval_mode"] = "none"
|
|
32
|
+
return state
|
|
33
|
+
|
|
34
|
+
if self._embedding_provider is not None and self._vector_store is not None:
|
|
35
|
+
embedded = self._embedding_provider.embed(state.query)
|
|
36
|
+
semantic_hits = await self._vector_store.search_documents(
|
|
37
|
+
embedded,
|
|
38
|
+
project=str(project) if isinstance(project, str) else None,
|
|
39
|
+
limit=self._top_k,
|
|
40
|
+
score_threshold=self._score_threshold,
|
|
41
|
+
)
|
|
42
|
+
if semantic_hits:
|
|
43
|
+
state.retrieved_docs = semantic_hits
|
|
44
|
+
state.reranked_docs = list(semantic_hits)
|
|
45
|
+
state.metadata["retrieval_mode"] = "vector"
|
|
46
|
+
return state
|
|
47
|
+
|
|
48
|
+
repo_path = Path(state.repo_path or ".")
|
|
49
|
+
query_terms = {term for term in state.query.lower().split() if len(term) > 2}
|
|
50
|
+
candidates: list[dict[str, Any]] = []
|
|
51
|
+
if repo_path.exists():
|
|
52
|
+
for path in repo_path.rglob("*"):
|
|
53
|
+
if not path.is_file():
|
|
54
|
+
continue
|
|
55
|
+
if any(
|
|
56
|
+
part.startswith(".") and part != ".minder" for part in path.parts
|
|
57
|
+
):
|
|
58
|
+
continue
|
|
59
|
+
if path.suffix not in {".py", ".md", ".txt", ".json"}:
|
|
60
|
+
continue
|
|
61
|
+
try:
|
|
62
|
+
content = path.read_text(encoding="utf-8")
|
|
63
|
+
except UnicodeDecodeError:
|
|
64
|
+
continue
|
|
65
|
+
lowered = content.lower()
|
|
66
|
+
score = sum(lowered.count(term) for term in query_terms)
|
|
67
|
+
if score == 0 and query_terms:
|
|
68
|
+
continue
|
|
69
|
+
candidates.append(
|
|
70
|
+
{
|
|
71
|
+
"title": path.name,
|
|
72
|
+
"path": str(path),
|
|
73
|
+
"content": content,
|
|
74
|
+
"score": float(score),
|
|
75
|
+
"doc_type": "code" if path.suffix == ".py" else "markdown",
|
|
76
|
+
}
|
|
77
|
+
)
|
|
78
|
+
ranked = sorted(
|
|
79
|
+
candidates,
|
|
80
|
+
key=lambda item: cast(float, item["score"]),
|
|
81
|
+
reverse=True,
|
|
82
|
+
)
|
|
83
|
+
state.retrieved_docs = ranked[: self._top_k]
|
|
84
|
+
state.reranked_docs = list(state.retrieved_docs)
|
|
85
|
+
state.metadata["retrieval_mode"] = "lexical"
|
|
86
|
+
return state
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import shutil
|
|
4
|
+
import subprocess
|
|
5
|
+
import sys
|
|
6
|
+
import tempfile
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
from typing import Protocol
|
|
10
|
+
|
|
11
|
+
from minder.graph.state import GraphState
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class VerificationRunner(Protocol):
|
|
15
|
+
def run_python(self, code: str, timeout_seconds: int, repo_path: str | None) -> dict[str, object]:
|
|
16
|
+
...
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class SubprocessVerificationRunner:
|
|
20
|
+
def run_python(
|
|
21
|
+
self,
|
|
22
|
+
code: str,
|
|
23
|
+
timeout_seconds: int,
|
|
24
|
+
repo_path: str | None,
|
|
25
|
+
) -> dict[str, object]:
|
|
26
|
+
cwd = repo_path or "."
|
|
27
|
+
try:
|
|
28
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
29
|
+
script_path = Path(temp_dir) / "snippet.py"
|
|
30
|
+
script_path.write_text(code, encoding="utf-8")
|
|
31
|
+
completed = subprocess.run(
|
|
32
|
+
[sys.executable, str(script_path)],
|
|
33
|
+
capture_output=True,
|
|
34
|
+
text=True,
|
|
35
|
+
cwd=cwd,
|
|
36
|
+
timeout=timeout_seconds,
|
|
37
|
+
check=False,
|
|
38
|
+
)
|
|
39
|
+
except subprocess.TimeoutExpired as exc:
|
|
40
|
+
return {
|
|
41
|
+
"passed": False,
|
|
42
|
+
"returncode": 124,
|
|
43
|
+
"stdout": exc.stdout or "",
|
|
44
|
+
"stderr": exc.stderr or "subprocess verification timed out",
|
|
45
|
+
"runner": "subprocess",
|
|
46
|
+
"timeout_seconds": timeout_seconds,
|
|
47
|
+
"failure_kind": "timeout",
|
|
48
|
+
"retryable": False,
|
|
49
|
+
}
|
|
50
|
+
return {
|
|
51
|
+
"passed": completed.returncode == 0,
|
|
52
|
+
"returncode": completed.returncode,
|
|
53
|
+
"stdout": completed.stdout,
|
|
54
|
+
"stderr": completed.stderr,
|
|
55
|
+
"runner": "subprocess",
|
|
56
|
+
"timeout_seconds": timeout_seconds,
|
|
57
|
+
"failure_kind": "runtime_error" if completed.returncode != 0 else None,
|
|
58
|
+
"retryable": False,
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class DockerSandboxRunner:
|
|
63
|
+
def __init__(self, image: str = "minder-sandbox:latest") -> None:
|
|
64
|
+
self._image = image
|
|
65
|
+
|
|
66
|
+
def run_python(
|
|
67
|
+
self,
|
|
68
|
+
code: str,
|
|
69
|
+
timeout_seconds: int,
|
|
70
|
+
repo_path: str | None,
|
|
71
|
+
) -> dict[str, object]:
|
|
72
|
+
docker_binary = shutil.which("docker")
|
|
73
|
+
if docker_binary is None:
|
|
74
|
+
return {
|
|
75
|
+
"passed": False,
|
|
76
|
+
"returncode": 127,
|
|
77
|
+
"stdout": "",
|
|
78
|
+
"stderr": "docker binary not available",
|
|
79
|
+
"runner": "docker",
|
|
80
|
+
"timeout_seconds": timeout_seconds,
|
|
81
|
+
"failure_kind": "docker_unavailable",
|
|
82
|
+
"retryable": False,
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
cwd = repo_path or "."
|
|
86
|
+
inspect = subprocess.run(
|
|
87
|
+
[docker_binary, "image", "inspect", self._image],
|
|
88
|
+
capture_output=True,
|
|
89
|
+
text=True,
|
|
90
|
+
cwd=cwd,
|
|
91
|
+
check=False,
|
|
92
|
+
)
|
|
93
|
+
failure_kind: str | None = None
|
|
94
|
+
if inspect.returncode != 0:
|
|
95
|
+
failure_kind = "image_missing"
|
|
96
|
+
stderr = inspect.stderr or f"docker image '{self._image}' not available"
|
|
97
|
+
lowered = stderr.lower()
|
|
98
|
+
if "permission denied" in lowered or "cannot connect" in lowered or "daemon" in lowered:
|
|
99
|
+
failure_kind = "docker_daemon_unavailable"
|
|
100
|
+
return {
|
|
101
|
+
"passed": False,
|
|
102
|
+
"returncode": inspect.returncode,
|
|
103
|
+
"stdout": inspect.stdout,
|
|
104
|
+
"stderr": stderr,
|
|
105
|
+
"runner": "docker",
|
|
106
|
+
"timeout_seconds": timeout_seconds,
|
|
107
|
+
"failure_kind": failure_kind,
|
|
108
|
+
"retryable": False,
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
try:
|
|
112
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
113
|
+
script_path = Path(temp_dir) / "snippet.py"
|
|
114
|
+
script_path.write_text(code, encoding="utf-8")
|
|
115
|
+
completed = subprocess.run(
|
|
116
|
+
[
|
|
117
|
+
docker_binary,
|
|
118
|
+
"run",
|
|
119
|
+
"--rm",
|
|
120
|
+
"--network",
|
|
121
|
+
"none",
|
|
122
|
+
"--read-only",
|
|
123
|
+
"-v",
|
|
124
|
+
f"{temp_dir}:/workspace:ro",
|
|
125
|
+
"-w",
|
|
126
|
+
"/workspace",
|
|
127
|
+
self._image,
|
|
128
|
+
"python",
|
|
129
|
+
"snippet.py",
|
|
130
|
+
],
|
|
131
|
+
capture_output=True,
|
|
132
|
+
text=True,
|
|
133
|
+
cwd=cwd,
|
|
134
|
+
timeout=timeout_seconds,
|
|
135
|
+
check=False,
|
|
136
|
+
)
|
|
137
|
+
except subprocess.TimeoutExpired as exc:
|
|
138
|
+
return {
|
|
139
|
+
"passed": False,
|
|
140
|
+
"returncode": 124,
|
|
141
|
+
"stdout": exc.stdout or "",
|
|
142
|
+
"stderr": exc.stderr or "docker verification timed out",
|
|
143
|
+
"runner": "docker",
|
|
144
|
+
"timeout_seconds": timeout_seconds,
|
|
145
|
+
"failure_kind": "timeout",
|
|
146
|
+
"retryable": False,
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
failure_kind = None
|
|
150
|
+
retryable = False
|
|
151
|
+
if completed.returncode != 0:
|
|
152
|
+
failure_kind = "container_error"
|
|
153
|
+
retryable = False
|
|
154
|
+
return {
|
|
155
|
+
"passed": completed.returncode == 0,
|
|
156
|
+
"returncode": completed.returncode,
|
|
157
|
+
"stdout": completed.stdout,
|
|
158
|
+
"stderr": completed.stderr,
|
|
159
|
+
"runner": "docker",
|
|
160
|
+
"timeout_seconds": timeout_seconds,
|
|
161
|
+
"repo_path": repo_path,
|
|
162
|
+
"failure_kind": failure_kind,
|
|
163
|
+
"retryable": retryable,
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
class VerificationNode:
|
|
168
|
+
def __init__(
|
|
169
|
+
self,
|
|
170
|
+
sandbox: str = "docker",
|
|
171
|
+
timeout_seconds: int = 30,
|
|
172
|
+
docker_runner: VerificationRunner | None = None,
|
|
173
|
+
subprocess_runner: VerificationRunner | None = None,
|
|
174
|
+
image: str = "minder-sandbox:latest",
|
|
175
|
+
) -> None:
|
|
176
|
+
self._sandbox = sandbox
|
|
177
|
+
self._timeout_seconds = timeout_seconds
|
|
178
|
+
self._docker_runner = docker_runner or DockerSandboxRunner(image=image)
|
|
179
|
+
self._subprocess_runner = subprocess_runner or SubprocessVerificationRunner()
|
|
180
|
+
|
|
181
|
+
def run(self, state: GraphState) -> GraphState:
|
|
182
|
+
payload = state.metadata.get("verification_payload")
|
|
183
|
+
if payload is None:
|
|
184
|
+
state.verification_result = {
|
|
185
|
+
"passed": True,
|
|
186
|
+
"returncode": 0,
|
|
187
|
+
"stdout": "",
|
|
188
|
+
"stderr": "",
|
|
189
|
+
"runner": self._sandbox,
|
|
190
|
+
"skipped": True,
|
|
191
|
+
"timeout_seconds": self._timeout_seconds,
|
|
192
|
+
"failure_kind": None,
|
|
193
|
+
"retryable": False,
|
|
194
|
+
}
|
|
195
|
+
return state
|
|
196
|
+
|
|
197
|
+
if payload.get("language") != "python":
|
|
198
|
+
state.verification_result = {
|
|
199
|
+
"passed": False,
|
|
200
|
+
"returncode": 1,
|
|
201
|
+
"stdout": "",
|
|
202
|
+
"runner": self._sandbox,
|
|
203
|
+
"stderr": "Unsupported verification language",
|
|
204
|
+
"timeout_seconds": self._timeout_seconds,
|
|
205
|
+
"failure_kind": "unsupported_language",
|
|
206
|
+
"retryable": False,
|
|
207
|
+
}
|
|
208
|
+
return state
|
|
209
|
+
|
|
210
|
+
code = str(payload.get("code", ""))
|
|
211
|
+
if self._sandbox == "subprocess":
|
|
212
|
+
result = self._subprocess_runner.run_python(
|
|
213
|
+
code, self._timeout_seconds, state.repo_path
|
|
214
|
+
)
|
|
215
|
+
else:
|
|
216
|
+
result = self._docker_runner.run_python(
|
|
217
|
+
code, self._timeout_seconds, state.repo_path
|
|
218
|
+
)
|
|
219
|
+
state.verification_result = self._normalize_result(result)
|
|
220
|
+
return state
|
|
221
|
+
|
|
222
|
+
@staticmethod
|
|
223
|
+
def _normalize_result(result: dict[str, object]) -> dict[str, object]:
|
|
224
|
+
normalized: dict[str, Any] = dict(result)
|
|
225
|
+
normalized.setdefault("failure_kind", None if normalized.get("passed") else "runtime_error")
|
|
226
|
+
normalized.setdefault("retryable", False)
|
|
227
|
+
normalized.setdefault("stdout", "")
|
|
228
|
+
normalized.setdefault("stderr", "")
|
|
229
|
+
normalized.setdefault("returncode", 0 if normalized.get("passed") else 1)
|
|
230
|
+
return normalized
|