minder-cli 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- minder/__init__.py +12 -0
- minder/api/routers/prompts.py +177 -0
- minder/application/__init__.py +1 -0
- minder/application/admin/__init__.py +11 -0
- minder/application/admin/dto.py +453 -0
- minder/application/admin/jobs.py +327 -0
- minder/application/admin/use_cases.py +1895 -0
- minder/auth/__init__.py +12 -0
- minder/auth/context.py +26 -0
- minder/auth/middleware.py +70 -0
- minder/auth/principal.py +59 -0
- minder/auth/rate_limiter.py +89 -0
- minder/auth/rbac.py +60 -0
- minder/auth/service.py +541 -0
- minder/bootstrap/__init__.py +9 -0
- minder/bootstrap/providers.py +109 -0
- minder/bootstrap/transport.py +807 -0
- minder/cache/__init__.py +10 -0
- minder/cache/providers.py +140 -0
- minder/chunking/__init__.py +4 -0
- minder/chunking/code_splitter.py +184 -0
- minder/chunking/splitter.py +136 -0
- minder/cli.py +1542 -0
- minder/config.py +179 -0
- minder/continuity.py +363 -0
- minder/dev.py +160 -0
- minder/embedding/__init__.py +9 -0
- minder/embedding/base.py +7 -0
- minder/embedding/local.py +65 -0
- minder/embedding/openai.py +7 -0
- minder/graph/__init__.py +11 -0
- minder/graph/edges.py +13 -0
- minder/graph/executor.py +127 -0
- minder/graph/graph.py +263 -0
- minder/graph/nodes/__init__.py +27 -0
- minder/graph/nodes/evaluator.py +21 -0
- minder/graph/nodes/guard.py +64 -0
- minder/graph/nodes/llm.py +59 -0
- minder/graph/nodes/planning.py +30 -0
- minder/graph/nodes/reasoning.py +87 -0
- minder/graph/nodes/reranker.py +141 -0
- minder/graph/nodes/retriever.py +86 -0
- minder/graph/nodes/verification.py +230 -0
- minder/graph/nodes/workflow_planner.py +250 -0
- minder/graph/runtime.py +15 -0
- minder/graph/state.py +26 -0
- minder/llm/__init__.py +5 -0
- minder/llm/base.py +14 -0
- minder/llm/local.py +381 -0
- minder/llm/openai.py +89 -0
- minder/models/__init__.py +109 -0
- minder/models/base.py +10 -0
- minder/models/client.py +137 -0
- minder/models/document.py +34 -0
- minder/models/error.py +32 -0
- minder/models/graph.py +114 -0
- minder/models/history.py +32 -0
- minder/models/job.py +62 -0
- minder/models/prompt.py +41 -0
- minder/models/repository.py +62 -0
- minder/models/rule.py +68 -0
- minder/models/session.py +51 -0
- minder/models/skill.py +52 -0
- minder/models/user.py +41 -0
- minder/models/workflow.py +35 -0
- minder/observability/__init__.py +57 -0
- minder/observability/audit.py +243 -0
- minder/observability/logging.py +253 -0
- minder/observability/metrics.py +448 -0
- minder/observability/tracing.py +215 -0
- minder/presentation/__init__.py +1 -0
- minder/presentation/http/__init__.py +1 -0
- minder/presentation/http/admin/__init__.py +3 -0
- minder/presentation/http/admin/api.py +1309 -0
- minder/presentation/http/admin/context.py +94 -0
- minder/presentation/http/admin/dashboard.py +111 -0
- minder/presentation/http/admin/jobs.py +208 -0
- minder/presentation/http/admin/memories.py +185 -0
- minder/presentation/http/admin/prompts.py +219 -0
- minder/presentation/http/admin/routes.py +127 -0
- minder/presentation/http/admin/runtime.py +650 -0
- minder/presentation/http/admin/search.py +368 -0
- minder/presentation/http/admin/skills.py +230 -0
- minder/prompts/__init__.py +646 -0
- minder/prompts/formatter.py +142 -0
- minder/resources/__init__.py +318 -0
- minder/retrieval/__init__.py +5 -0
- minder/retrieval/hybrid.py +178 -0
- minder/retrieval/mmr.py +116 -0
- minder/retrieval/multi_hop.py +115 -0
- minder/runtime.py +15 -0
- minder/server.py +145 -0
- minder/store/__init__.py +64 -0
- minder/store/document.py +115 -0
- minder/store/error.py +82 -0
- minder/store/feedback.py +114 -0
- minder/store/graph.py +588 -0
- minder/store/history.py +57 -0
- minder/store/interfaces.py +512 -0
- minder/store/milvus/__init__.py +11 -0
- minder/store/milvus/client.py +26 -0
- minder/store/milvus/collections.py +15 -0
- minder/store/milvus/vector_store.py +232 -0
- minder/store/mongodb/__init__.py +11 -0
- minder/store/mongodb/client.py +49 -0
- minder/store/mongodb/indexes.py +90 -0
- minder/store/mongodb/operational_store.py +993 -0
- minder/store/relational.py +1087 -0
- minder/store/repo_state.py +58 -0
- minder/store/rule.py +93 -0
- minder/store/vector.py +79 -0
- minder/tools/__init__.py +47 -0
- minder/tools/auth.py +94 -0
- minder/tools/graph.py +839 -0
- minder/tools/ingest.py +353 -0
- minder/tools/memory.py +381 -0
- minder/tools/query.py +307 -0
- minder/tools/registry.py +269 -0
- minder/tools/repo_scanner.py +1266 -0
- minder/tools/search.py +15 -0
- minder/tools/session.py +316 -0
- minder/tools/skills.py +899 -0
- minder/tools/workflow.py +215 -0
- minder/transport/__init__.py +4 -0
- minder/transport/base.py +286 -0
- minder/transport/sse.py +252 -0
- minder/transport/stdio.py +29 -0
- minder_cli-0.2.0.dist-info/METADATA +318 -0
- minder_cli-0.2.0.dist-info/RECORD +132 -0
- minder_cli-0.2.0.dist-info/WHEEL +4 -0
- minder_cli-0.2.0.dist-info/entry_points.txt +2 -0
- minder_cli-0.2.0.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
from collections.abc import Awaitable
|
|
7
|
+
|
|
8
|
+
from minder.continuity import build_continuity_brief, build_instruction_envelope
|
|
9
|
+
from minder.graph.state import GraphState
|
|
10
|
+
from minder.store.interfaces import IGraphRepository, IOperationalStore
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _explicit_attribute(value: Any, name: str) -> Any | None:
|
|
16
|
+
if value is None:
|
|
17
|
+
return None
|
|
18
|
+
if hasattr(value, "__dict__") and name in value.__dict__:
|
|
19
|
+
return value.__dict__[name]
|
|
20
|
+
return getattr(value, name, None)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class WorkflowPlannerNode:
|
|
24
|
+
"""Plan the workflow step and optionally enrich guidance with graph context.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
store: Operational store for workflow / session data.
|
|
28
|
+
graph_store: Optional knowledge-graph store. When provided the node
|
|
29
|
+
queries the graph for cross-service dependencies and
|
|
30
|
+
failing-test artefacts belonging to the current repo and
|
|
31
|
+
appends that context to the ``guidance`` string.
|
|
32
|
+
Omitting it (or passing ``None``) restores the original
|
|
33
|
+
behaviour and is fully backwards-compatible.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
store: IOperationalStore,
|
|
39
|
+
*,
|
|
40
|
+
graph_store: IGraphRepository | None = None,
|
|
41
|
+
) -> None:
|
|
42
|
+
self._store = store
|
|
43
|
+
self._graph_store = graph_store
|
|
44
|
+
|
|
45
|
+
async def _load_session(self, session_id: Any) -> Any:
|
|
46
|
+
result = self._store.get_session_by_id(session_id)
|
|
47
|
+
if isinstance(result, Awaitable):
|
|
48
|
+
return await result
|
|
49
|
+
return result
|
|
50
|
+
|
|
51
|
+
async def run(self, state: GraphState) -> GraphState:
|
|
52
|
+
workflow = None
|
|
53
|
+
if state.workflow_context.get("workflow_name"):
|
|
54
|
+
workflow = await self._store.get_workflow_by_name(
|
|
55
|
+
state.workflow_context["workflow_name"]
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
if workflow is None:
|
|
59
|
+
workflows = await self._store.list_workflows()
|
|
60
|
+
workflow = next((item for item in workflows if item.default_for_repo), None)
|
|
61
|
+
if workflow is None and workflows:
|
|
62
|
+
workflow = workflows[0]
|
|
63
|
+
|
|
64
|
+
workflow_steps: list[dict[str, Any]] = []
|
|
65
|
+
if workflow is not None:
|
|
66
|
+
raw_steps: list[Any] = (
|
|
67
|
+
workflow.steps if isinstance(workflow.steps, list) else []
|
|
68
|
+
)
|
|
69
|
+
workflow_steps = [step for step in raw_steps if isinstance(step, dict)]
|
|
70
|
+
|
|
71
|
+
workflow_state = None
|
|
72
|
+
if state.repo_id is not None:
|
|
73
|
+
workflow_state = await self._store.get_workflow_state_by_repo(state.repo_id)
|
|
74
|
+
|
|
75
|
+
if not workflow_steps:
|
|
76
|
+
workflow_steps = [{"name": "Test Writing"}, {"name": "Implementation"}]
|
|
77
|
+
|
|
78
|
+
current_step = workflow_steps[0]["name"]
|
|
79
|
+
completed_steps: list[str] = []
|
|
80
|
+
blocked_by: list[str] = []
|
|
81
|
+
artifacts: dict[str, Any] = {}
|
|
82
|
+
if workflow_state is not None:
|
|
83
|
+
current_step = workflow_state.current_step
|
|
84
|
+
completed_steps = list(workflow_state.completed_steps)
|
|
85
|
+
blocked_by = list(workflow_state.blocked_by)
|
|
86
|
+
artifacts = dict(workflow_state.artifacts)
|
|
87
|
+
|
|
88
|
+
next_step = self._next_step(current_step, workflow_steps)
|
|
89
|
+
guidance = self._guidance_for_step(current_step)
|
|
90
|
+
instruction_envelope: dict[str, Any] | None = None
|
|
91
|
+
continuity_brief: dict[str, Any] | None = None
|
|
92
|
+
session = None
|
|
93
|
+
workflow_state_session_id = _explicit_attribute(workflow_state, "session_id")
|
|
94
|
+
if state.session_id is not None:
|
|
95
|
+
session = await self._load_session(state.session_id)
|
|
96
|
+
elif workflow_state_session_id is not None:
|
|
97
|
+
session = await self._load_session(workflow_state_session_id)
|
|
98
|
+
|
|
99
|
+
if workflow is not None and workflow_state is not None:
|
|
100
|
+
instruction_envelope = build_instruction_envelope(
|
|
101
|
+
workflow=workflow,
|
|
102
|
+
workflow_state=workflow_state,
|
|
103
|
+
)
|
|
104
|
+
if session is not None:
|
|
105
|
+
continuity_brief = build_continuity_brief(
|
|
106
|
+
session=session,
|
|
107
|
+
workflow_state=workflow_state,
|
|
108
|
+
workflow=workflow,
|
|
109
|
+
)
|
|
110
|
+
guidance = self._format_guidance(
|
|
111
|
+
guidance,
|
|
112
|
+
instruction_envelope=instruction_envelope,
|
|
113
|
+
continuity_brief=continuity_brief,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
# ------------------------------------------------------------------
|
|
117
|
+
# Graph-enriched guidance (P3-T12)
|
|
118
|
+
# ------------------------------------------------------------------
|
|
119
|
+
if self._graph_store is not None and state.repo_path:
|
|
120
|
+
graph_guidance = await self._build_graph_guidance(
|
|
121
|
+
state.repo_path, artifacts
|
|
122
|
+
)
|
|
123
|
+
if graph_guidance:
|
|
124
|
+
guidance = guidance + "\n\n" + graph_guidance
|
|
125
|
+
|
|
126
|
+
cross_repo_context = str(
|
|
127
|
+
state.workflow_context.get("cross_repo_context", "") or ""
|
|
128
|
+
).strip()
|
|
129
|
+
if cross_repo_context:
|
|
130
|
+
guidance = guidance + "\n\n" + cross_repo_context
|
|
131
|
+
|
|
132
|
+
state.workflow_context.update(
|
|
133
|
+
{
|
|
134
|
+
"workflow_name": workflow.name if workflow is not None else "default",
|
|
135
|
+
"workflow_steps": workflow_steps,
|
|
136
|
+
"current_step": current_step,
|
|
137
|
+
"next_step": next_step,
|
|
138
|
+
"completed_steps": completed_steps,
|
|
139
|
+
"blocked_by": blocked_by,
|
|
140
|
+
"artifacts": artifacts,
|
|
141
|
+
"guidance": guidance,
|
|
142
|
+
"instruction_envelope": instruction_envelope or {},
|
|
143
|
+
"continuity_brief": continuity_brief or {},
|
|
144
|
+
}
|
|
145
|
+
)
|
|
146
|
+
return state
|
|
147
|
+
|
|
148
|
+
# ------------------------------------------------------------------
|
|
149
|
+
# Graph guidance helper
|
|
150
|
+
# ------------------------------------------------------------------
|
|
151
|
+
|
|
152
|
+
async def _build_graph_guidance(
|
|
153
|
+
self, repo_path: str, artifacts: dict[str, Any]
|
|
154
|
+
) -> str:
|
|
155
|
+
"""Query the graph store for dependency context for *repo_path*.
|
|
156
|
+
|
|
157
|
+
Returns a non-empty string with additional guidance lines, or an
|
|
158
|
+
empty string when no relevant graph data is found. All exceptions
|
|
159
|
+
from the graph store are caught and logged so that a graph query
|
|
160
|
+
failure never breaks the workflow pipeline.
|
|
161
|
+
"""
|
|
162
|
+
repo_name = Path(repo_path).name
|
|
163
|
+
lines: list[str] = []
|
|
164
|
+
|
|
165
|
+
try:
|
|
166
|
+
service_nodes = await self._graph_store.query_by_type("service") # type: ignore[union-attr]
|
|
167
|
+
except Exception as exc: # pragma: no cover
|
|
168
|
+
logger.debug("graph_store.query_by_type failed: %s", exc)
|
|
169
|
+
return ""
|
|
170
|
+
|
|
171
|
+
# Find the service node that best matches this repo path.
|
|
172
|
+
matching = [
|
|
173
|
+
n
|
|
174
|
+
for n in service_nodes
|
|
175
|
+
if n.name == repo_name
|
|
176
|
+
or (
|
|
177
|
+
hasattr(n, "node_metadata")
|
|
178
|
+
and n.node_metadata.get("path", "").startswith(repo_path)
|
|
179
|
+
)
|
|
180
|
+
]
|
|
181
|
+
|
|
182
|
+
if not matching:
|
|
183
|
+
return ""
|
|
184
|
+
|
|
185
|
+
service = matching[0]
|
|
186
|
+
|
|
187
|
+
# Cross-service dependencies via ``depends_on`` edges.
|
|
188
|
+
try:
|
|
189
|
+
dep_neighbors = await self._graph_store.get_neighbors( # type: ignore[union-attr]
|
|
190
|
+
service.id,
|
|
191
|
+
direction="out",
|
|
192
|
+
relation="depends_on",
|
|
193
|
+
)
|
|
194
|
+
except Exception as exc: # pragma: no cover
|
|
195
|
+
logger.debug("graph_store.get_neighbors failed: %s", exc)
|
|
196
|
+
dep_neighbors = []
|
|
197
|
+
|
|
198
|
+
if dep_neighbors:
|
|
199
|
+
dep_names = sorted({n.name for n in dep_neighbors})
|
|
200
|
+
lines.append(
|
|
201
|
+
f"Cross-service dependencies detected: {', '.join(dep_names)}. "
|
|
202
|
+
"Ensure changes do not break these dependency contracts."
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
# Failing tests recorded in the last workflow artefact run.
|
|
206
|
+
failing = artifacts.get("failing_tests")
|
|
207
|
+
if failing:
|
|
208
|
+
lines.append(f"Failing tests from last run: {failing}")
|
|
209
|
+
|
|
210
|
+
return "\n".join(lines)
|
|
211
|
+
|
|
212
|
+
# ------------------------------------------------------------------
|
|
213
|
+
# Static helpers
|
|
214
|
+
# ------------------------------------------------------------------
|
|
215
|
+
|
|
216
|
+
@staticmethod
|
|
217
|
+
def _next_step(current_step: str, steps: list[dict[str, Any]]) -> str | None:
|
|
218
|
+
names = [step["name"] for step in steps]
|
|
219
|
+
if current_step not in names:
|
|
220
|
+
return names[0] if names else None
|
|
221
|
+
current_index = names.index(current_step)
|
|
222
|
+
if current_index + 1 >= len(names):
|
|
223
|
+
return None
|
|
224
|
+
return names[current_index + 1]
|
|
225
|
+
|
|
226
|
+
@staticmethod
|
|
227
|
+
def _guidance_for_step(current_step: str) -> str:
|
|
228
|
+
lowered = current_step.lower()
|
|
229
|
+
if "test" in lowered:
|
|
230
|
+
return "Current step: Test Writing. Write tests before implementation."
|
|
231
|
+
if "implement" in lowered:
|
|
232
|
+
return "Current step: Implementation. Use existing failing tests as the contract."
|
|
233
|
+
if "review" in lowered:
|
|
234
|
+
return (
|
|
235
|
+
"Current step: Review. Focus on correctness, regressions, and coverage."
|
|
236
|
+
)
|
|
237
|
+
return f"Current step: {current_step}. Follow the workflow and do not skip prerequisites."
|
|
238
|
+
|
|
239
|
+
@staticmethod
|
|
240
|
+
def _format_guidance(
|
|
241
|
+
base_guidance: str,
|
|
242
|
+
*,
|
|
243
|
+
instruction_envelope: dict[str, Any],
|
|
244
|
+
continuity_brief: dict[str, Any] | None,
|
|
245
|
+
) -> str:
|
|
246
|
+
sections = [base_guidance]
|
|
247
|
+
sections.append(f"Instruction envelope: {instruction_envelope}")
|
|
248
|
+
if continuity_brief:
|
|
249
|
+
sections.append(f"Continuity brief: {continuity_brief}")
|
|
250
|
+
return "\n\n".join(sections)
|
minder/graph/runtime.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from minder.runtime import load_attr, module_available
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def graph_runtime_name(preferred: str = "langgraph") -> str:
|
|
9
|
+
if preferred == "langgraph" and module_available("langgraph"):
|
|
10
|
+
return "langgraph"
|
|
11
|
+
return "internal"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def load_langgraph_state_graph() -> Any | None:
|
|
15
|
+
return load_attr("langgraph.graph", "StateGraph")
|
minder/graph/state.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import uuid
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel, Field
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class GraphState(BaseModel):
|
|
10
|
+
query: str
|
|
11
|
+
session_id: uuid.UUID | None = None
|
|
12
|
+
user_id: uuid.UUID | None = None
|
|
13
|
+
repo_id: uuid.UUID | None = None
|
|
14
|
+
repo_path: str | None = None
|
|
15
|
+
plan: dict[str, Any] = Field(default_factory=dict)
|
|
16
|
+
retrieved_docs: list[dict[str, Any]] = Field(default_factory=list)
|
|
17
|
+
reranked_docs: list[dict[str, Any]] = Field(default_factory=list)
|
|
18
|
+
workflow_context: dict[str, Any] = Field(default_factory=dict)
|
|
19
|
+
reasoning_output: dict[str, Any] = Field(default_factory=dict)
|
|
20
|
+
llm_output: dict[str, Any] = Field(default_factory=dict)
|
|
21
|
+
guard_result: dict[str, Any] = Field(default_factory=dict)
|
|
22
|
+
verification_result: dict[str, Any] = Field(default_factory=dict)
|
|
23
|
+
evaluation: dict[str, Any] = Field(default_factory=dict)
|
|
24
|
+
transition_log: list[dict[str, Any]] = Field(default_factory=list)
|
|
25
|
+
retry_count: int = 0
|
|
26
|
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
minder/llm/__init__.py
ADDED
minder/llm/base.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Generator
|
|
4
|
+
from typing import Protocol
|
|
5
|
+
|
|
6
|
+
from minder.graph.state import GraphState
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class LLMClient(Protocol):
|
|
10
|
+
def generate(self, state: GraphState) -> dict[str, object]: ...
|
|
11
|
+
|
|
12
|
+
def stream_generate(
|
|
13
|
+
self, state: GraphState
|
|
14
|
+
) -> Generator[dict[str, object], None, None]: ...
|
minder/llm/local.py
ADDED
|
@@ -0,0 +1,381 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Generator
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from minder.graph.state import GraphState
|
|
8
|
+
from minder.runtime import load_attr, module_available
|
|
9
|
+
|
|
10
|
+
_RUNTIME_LOG_MARKERS = (
|
|
11
|
+
"Using chat eos_token:",
|
|
12
|
+
"Using chat bos_token:",
|
|
13
|
+
"llama_perf_context_print:",
|
|
14
|
+
"~llama_context:",
|
|
15
|
+
"ggml_metal_free:",
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
_CHAT_TEMPLATE_MARKERS = (
|
|
19
|
+
"{{- '<turn|>\\n' -}}",
|
|
20
|
+
"{%- if add_generation_prompt -%}",
|
|
21
|
+
"{%- endif -%}",
|
|
22
|
+
"{%- endfor -%}",
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class LocalModelLLM:
|
|
27
|
+
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
model_path: str,
|
|
31
|
+
fail: bool = False,
|
|
32
|
+
runtime: str = "mock",
|
|
33
|
+
context_length: int = 131072,
|
|
34
|
+
) -> None:
|
|
35
|
+
self._model_path = model_path
|
|
36
|
+
self._fail = fail
|
|
37
|
+
self._runtime = runtime
|
|
38
|
+
self._context_length = max(512, context_length)
|
|
39
|
+
self._client: Any | None = None
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def runtime(self) -> str:
|
|
43
|
+
runtime = self._runtime
|
|
44
|
+
model_exists = Path(self._model_path).expanduser().exists()
|
|
45
|
+
if runtime == "auto":
|
|
46
|
+
return (
|
|
47
|
+
"llama_cpp"
|
|
48
|
+
if model_exists and module_available("llama_cpp")
|
|
49
|
+
else "mock"
|
|
50
|
+
)
|
|
51
|
+
return runtime
|
|
52
|
+
|
|
53
|
+
def generate(self, state: GraphState) -> dict[str, object]:
|
|
54
|
+
if self._fail:
|
|
55
|
+
raise RuntimeError("Local model unavailable")
|
|
56
|
+
|
|
57
|
+
runtime = self.runtime
|
|
58
|
+
|
|
59
|
+
source_paths = [doc["path"] for doc in state.reranked_docs[:3]]
|
|
60
|
+
guidance = state.workflow_context.get("guidance", "")
|
|
61
|
+
text = (
|
|
62
|
+
f"{guidance}\n"
|
|
63
|
+
f"Plan intent: {state.plan.get('intent', 'unknown')}.\n"
|
|
64
|
+
f"Answer: grounded response for '{state.query}'.\n"
|
|
65
|
+
f"Sources: {', '.join(source_paths) if source_paths else 'none'}."
|
|
66
|
+
)
|
|
67
|
+
if runtime == "llama_cpp":
|
|
68
|
+
text = self._generate_with_llama_cpp(state, fallback=text)
|
|
69
|
+
|
|
70
|
+
return {
|
|
71
|
+
"text": text,
|
|
72
|
+
"sources": source_paths,
|
|
73
|
+
"provider": "local_llm",
|
|
74
|
+
"model": "gemma-4-e2b-it",
|
|
75
|
+
"model_path": self._model_path,
|
|
76
|
+
"runtime": runtime,
|
|
77
|
+
"stream": [line for line in text.splitlines() if line],
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
def stream_generate(
|
|
81
|
+
self, state: GraphState
|
|
82
|
+
) -> Generator[dict[str, object], None, None]:
|
|
83
|
+
if self._fail:
|
|
84
|
+
raise RuntimeError("Local model unavailable")
|
|
85
|
+
|
|
86
|
+
runtime = self.runtime
|
|
87
|
+
source_paths = [doc["path"] for doc in state.reranked_docs[:3]]
|
|
88
|
+
guidance = state.workflow_context.get("guidance", "")
|
|
89
|
+
fallback = (
|
|
90
|
+
f"{guidance}\n"
|
|
91
|
+
f"Plan intent: {state.plan.get('intent', 'unknown')}.\n"
|
|
92
|
+
f"Answer: grounded response for '{state.query}'.\n"
|
|
93
|
+
f"Sources: {', '.join(source_paths) if source_paths else 'none'}."
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
if runtime != "llama_cpp":
|
|
97
|
+
if fallback:
|
|
98
|
+
yield {"type": "chunk", "delta": fallback}
|
|
99
|
+
yield {
|
|
100
|
+
"type": "result",
|
|
101
|
+
"result": {
|
|
102
|
+
"text": fallback,
|
|
103
|
+
"sources": source_paths,
|
|
104
|
+
"provider": "local_llm",
|
|
105
|
+
"model": "gemma-4-e2b-it",
|
|
106
|
+
"model_path": self._model_path,
|
|
107
|
+
"runtime": runtime,
|
|
108
|
+
"stream": [fallback] if fallback else [],
|
|
109
|
+
},
|
|
110
|
+
}
|
|
111
|
+
return
|
|
112
|
+
|
|
113
|
+
client = self._llama_client()
|
|
114
|
+
if client is None:
|
|
115
|
+
if fallback:
|
|
116
|
+
yield {"type": "chunk", "delta": fallback}
|
|
117
|
+
yield {
|
|
118
|
+
"type": "result",
|
|
119
|
+
"result": {
|
|
120
|
+
"text": fallback,
|
|
121
|
+
"sources": source_paths,
|
|
122
|
+
"provider": "local_llm",
|
|
123
|
+
"model": "gemma-4-e2b-it",
|
|
124
|
+
"model_path": self._model_path,
|
|
125
|
+
"runtime": runtime,
|
|
126
|
+
"stream": [fallback] if fallback else [],
|
|
127
|
+
},
|
|
128
|
+
}
|
|
129
|
+
return
|
|
130
|
+
|
|
131
|
+
reasoning_output = getattr(state, "reasoning_output", {}) or {}
|
|
132
|
+
prompt = str(reasoning_output.get("prompt") or state.query)
|
|
133
|
+
deltas: list[str] = []
|
|
134
|
+
try:
|
|
135
|
+
for delta in self._stream_with_llama_cpp(
|
|
136
|
+
client,
|
|
137
|
+
prompt=prompt,
|
|
138
|
+
max_tokens=256,
|
|
139
|
+
temperature=0.1,
|
|
140
|
+
):
|
|
141
|
+
cleaned_delta = self._clean_stream_delta(delta)
|
|
142
|
+
if not cleaned_delta:
|
|
143
|
+
continue
|
|
144
|
+
deltas.append(cleaned_delta)
|
|
145
|
+
yield {"type": "chunk", "delta": cleaned_delta}
|
|
146
|
+
except Exception:
|
|
147
|
+
if fallback:
|
|
148
|
+
yield {"type": "chunk", "delta": fallback}
|
|
149
|
+
deltas = [fallback] if fallback else []
|
|
150
|
+
|
|
151
|
+
text = self._clean_generated_text("".join(deltas)) or fallback
|
|
152
|
+
yield {
|
|
153
|
+
"type": "result",
|
|
154
|
+
"result": {
|
|
155
|
+
"text": text,
|
|
156
|
+
"sources": source_paths,
|
|
157
|
+
"provider": "local_llm",
|
|
158
|
+
"model": "gemma-4-e2b-it",
|
|
159
|
+
"model_path": self._model_path,
|
|
160
|
+
"runtime": runtime,
|
|
161
|
+
"stream": deltas if deltas else ([text] if text else []),
|
|
162
|
+
},
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
def complete_text(
|
|
166
|
+
self,
|
|
167
|
+
prompt: str,
|
|
168
|
+
*,
|
|
169
|
+
max_tokens: int = 512,
|
|
170
|
+
temperature: float = 0.1,
|
|
171
|
+
fallback: str = "",
|
|
172
|
+
) -> str:
|
|
173
|
+
if self._fail:
|
|
174
|
+
raise RuntimeError("Local model unavailable")
|
|
175
|
+
|
|
176
|
+
if self.runtime != "llama_cpp":
|
|
177
|
+
return fallback
|
|
178
|
+
|
|
179
|
+
client = self._llama_client()
|
|
180
|
+
if client is None:
|
|
181
|
+
return fallback
|
|
182
|
+
|
|
183
|
+
try:
|
|
184
|
+
response = self._complete_with_llama_cpp(
|
|
185
|
+
client,
|
|
186
|
+
prompt=prompt,
|
|
187
|
+
max_tokens=max_tokens,
|
|
188
|
+
temperature=temperature,
|
|
189
|
+
)
|
|
190
|
+
except Exception:
|
|
191
|
+
return fallback
|
|
192
|
+
|
|
193
|
+
text = self._response_text(response, fallback=fallback)
|
|
194
|
+
cleaned = self._clean_generated_text(text)
|
|
195
|
+
return cleaned or fallback
|
|
196
|
+
|
|
197
|
+
def _generate_with_llama_cpp(self, state: GraphState, *, fallback: str) -> str:
|
|
198
|
+
reasoning_output = getattr(state, "reasoning_output", {}) or {}
|
|
199
|
+
prompt = reasoning_output.get("prompt") or state.query
|
|
200
|
+
return self.complete_text(
|
|
201
|
+
str(prompt),
|
|
202
|
+
max_tokens=256,
|
|
203
|
+
temperature=0.1,
|
|
204
|
+
fallback=fallback,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
def _complete_with_llama_cpp(
|
|
208
|
+
self,
|
|
209
|
+
client: Any,
|
|
210
|
+
*,
|
|
211
|
+
prompt: str,
|
|
212
|
+
max_tokens: int,
|
|
213
|
+
temperature: float,
|
|
214
|
+
) -> Any:
|
|
215
|
+
chat_completion = getattr(client, "create_chat_completion", None)
|
|
216
|
+
if callable(chat_completion):
|
|
217
|
+
return chat_completion(
|
|
218
|
+
messages=[
|
|
219
|
+
{
|
|
220
|
+
"role": "user",
|
|
221
|
+
"content": prompt,
|
|
222
|
+
}
|
|
223
|
+
],
|
|
224
|
+
max_tokens=max_tokens,
|
|
225
|
+
temperature=temperature,
|
|
226
|
+
)
|
|
227
|
+
return client.create_completion(
|
|
228
|
+
prompt=prompt,
|
|
229
|
+
max_tokens=max_tokens,
|
|
230
|
+
temperature=temperature,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
def _stream_with_llama_cpp(
|
|
234
|
+
self,
|
|
235
|
+
client: Any,
|
|
236
|
+
*,
|
|
237
|
+
prompt: str,
|
|
238
|
+
max_tokens: int,
|
|
239
|
+
temperature: float,
|
|
240
|
+
) -> Generator[str, None, None]:
|
|
241
|
+
chat_completion = getattr(client, "create_chat_completion", None)
|
|
242
|
+
if callable(chat_completion):
|
|
243
|
+
response = chat_completion(
|
|
244
|
+
messages=[{"role": "user", "content": prompt}],
|
|
245
|
+
max_tokens=max_tokens,
|
|
246
|
+
temperature=temperature,
|
|
247
|
+
stream=True,
|
|
248
|
+
)
|
|
249
|
+
yield from self._extract_stream_deltas(response)
|
|
250
|
+
return
|
|
251
|
+
|
|
252
|
+
response = client.create_completion(
|
|
253
|
+
prompt=prompt,
|
|
254
|
+
max_tokens=max_tokens,
|
|
255
|
+
temperature=temperature,
|
|
256
|
+
stream=True,
|
|
257
|
+
)
|
|
258
|
+
yield from self._extract_stream_deltas(response)
|
|
259
|
+
|
|
260
|
+
def _extract_stream_deltas(self, response: Any) -> Generator[str, None, None]:
|
|
261
|
+
for chunk in response:
|
|
262
|
+
delta = self._response_delta(chunk)
|
|
263
|
+
if delta:
|
|
264
|
+
yield delta
|
|
265
|
+
|
|
266
|
+
def _response_delta(self, chunk: Any) -> str:
|
|
267
|
+
if isinstance(chunk, dict):
|
|
268
|
+
choices = chunk.get("choices", [])
|
|
269
|
+
else:
|
|
270
|
+
choices = getattr(chunk, "choices", [])
|
|
271
|
+
if not choices:
|
|
272
|
+
return ""
|
|
273
|
+
|
|
274
|
+
first = choices[0]
|
|
275
|
+
if isinstance(first, dict):
|
|
276
|
+
delta = first.get("delta")
|
|
277
|
+
if isinstance(delta, dict):
|
|
278
|
+
content = delta.get("content")
|
|
279
|
+
if isinstance(content, str):
|
|
280
|
+
return content
|
|
281
|
+
text = first.get("text")
|
|
282
|
+
if isinstance(text, str):
|
|
283
|
+
return text
|
|
284
|
+
delta = getattr(first, "delta", None)
|
|
285
|
+
content = getattr(delta, "content", None) if delta is not None else None
|
|
286
|
+
if isinstance(content, str):
|
|
287
|
+
return content
|
|
288
|
+
text = getattr(first, "text", None)
|
|
289
|
+
if isinstance(text, str):
|
|
290
|
+
return text
|
|
291
|
+
return ""
|
|
292
|
+
|
|
293
|
+
def _response_text(self, response: Any, *, fallback: str) -> str:
|
|
294
|
+
if isinstance(response, dict):
|
|
295
|
+
choices = response.get("choices", [])
|
|
296
|
+
else:
|
|
297
|
+
choices = getattr(response, "choices", [])
|
|
298
|
+
if not choices:
|
|
299
|
+
return fallback
|
|
300
|
+
|
|
301
|
+
first = choices[0]
|
|
302
|
+
if isinstance(first, dict):
|
|
303
|
+
message = first.get("message")
|
|
304
|
+
if isinstance(message, dict):
|
|
305
|
+
content = message.get("content")
|
|
306
|
+
if isinstance(content, str):
|
|
307
|
+
return content.strip() or fallback
|
|
308
|
+
if isinstance(content, list):
|
|
309
|
+
parts = [
|
|
310
|
+
str(item.get("text", "")).strip()
|
|
311
|
+
for item in content
|
|
312
|
+
if isinstance(item, dict) and str(item.get("text", "")).strip()
|
|
313
|
+
]
|
|
314
|
+
if parts:
|
|
315
|
+
return "\n".join(parts)
|
|
316
|
+
text = first.get("text")
|
|
317
|
+
if isinstance(text, str):
|
|
318
|
+
return text.strip() or fallback
|
|
319
|
+
|
|
320
|
+
message = getattr(first, "message", None)
|
|
321
|
+
content = getattr(message, "content", None) if message is not None else None
|
|
322
|
+
if isinstance(content, str):
|
|
323
|
+
return content.strip() or fallback
|
|
324
|
+
text = getattr(first, "text", None)
|
|
325
|
+
if isinstance(text, str):
|
|
326
|
+
return text.strip() or fallback
|
|
327
|
+
return fallback
|
|
328
|
+
|
|
329
|
+
def _clean_generated_text(self, text: str) -> str:
|
|
330
|
+
if not text:
|
|
331
|
+
return ""
|
|
332
|
+
|
|
333
|
+
kept_lines: list[str] = []
|
|
334
|
+
for line in text.splitlines():
|
|
335
|
+
stripped = line.strip()
|
|
336
|
+
if not stripped:
|
|
337
|
+
if kept_lines and kept_lines[-1] != "":
|
|
338
|
+
kept_lines.append("")
|
|
339
|
+
continue
|
|
340
|
+
if any(marker in stripped for marker in _RUNTIME_LOG_MARKERS):
|
|
341
|
+
continue
|
|
342
|
+
if any(marker in stripped for marker in _CHAT_TEMPLATE_MARKERS):
|
|
343
|
+
continue
|
|
344
|
+
kept_lines.append(line)
|
|
345
|
+
|
|
346
|
+
return "\n".join(kept_lines).strip()
|
|
347
|
+
|
|
348
|
+
def _clean_stream_delta(self, delta: str) -> str:
|
|
349
|
+
stripped = delta.strip()
|
|
350
|
+
if not stripped:
|
|
351
|
+
return delta
|
|
352
|
+
if any(marker in stripped for marker in _RUNTIME_LOG_MARKERS):
|
|
353
|
+
return ""
|
|
354
|
+
if any(marker in stripped for marker in _CHAT_TEMPLATE_MARKERS):
|
|
355
|
+
return ""
|
|
356
|
+
return delta
|
|
357
|
+
|
|
358
|
+
def _llama_client(self) -> Any | None:
|
|
359
|
+
if self._client is not None:
|
|
360
|
+
return self._client
|
|
361
|
+
llama_cls = load_attr("llama_cpp", "Llama")
|
|
362
|
+
if llama_cls is None:
|
|
363
|
+
return None
|
|
364
|
+
base_kwargs = {
|
|
365
|
+
"model_path": str(Path(self._model_path).expanduser()),
|
|
366
|
+
"verbose": False,
|
|
367
|
+
"n_ctx": self._context_length,
|
|
368
|
+
}
|
|
369
|
+
try:
|
|
370
|
+
self._client = llama_cls(
|
|
371
|
+
**base_kwargs,
|
|
372
|
+
flash_attn=True,
|
|
373
|
+
)
|
|
374
|
+
except TypeError:
|
|
375
|
+
try:
|
|
376
|
+
self._client = llama_cls(**base_kwargs)
|
|
377
|
+
except Exception:
|
|
378
|
+
return None
|
|
379
|
+
except Exception:
|
|
380
|
+
return None
|
|
381
|
+
return self._client
|