minder-cli 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. minder/__init__.py +12 -0
  2. minder/api/routers/prompts.py +177 -0
  3. minder/application/__init__.py +1 -0
  4. minder/application/admin/__init__.py +11 -0
  5. minder/application/admin/dto.py +453 -0
  6. minder/application/admin/jobs.py +327 -0
  7. minder/application/admin/use_cases.py +1895 -0
  8. minder/auth/__init__.py +12 -0
  9. minder/auth/context.py +26 -0
  10. minder/auth/middleware.py +70 -0
  11. minder/auth/principal.py +59 -0
  12. minder/auth/rate_limiter.py +89 -0
  13. minder/auth/rbac.py +60 -0
  14. minder/auth/service.py +541 -0
  15. minder/bootstrap/__init__.py +9 -0
  16. minder/bootstrap/providers.py +109 -0
  17. minder/bootstrap/transport.py +807 -0
  18. minder/cache/__init__.py +10 -0
  19. minder/cache/providers.py +140 -0
  20. minder/chunking/__init__.py +4 -0
  21. minder/chunking/code_splitter.py +184 -0
  22. minder/chunking/splitter.py +136 -0
  23. minder/cli.py +1542 -0
  24. minder/config.py +179 -0
  25. minder/continuity.py +363 -0
  26. minder/dev.py +160 -0
  27. minder/embedding/__init__.py +9 -0
  28. minder/embedding/base.py +7 -0
  29. minder/embedding/local.py +65 -0
  30. minder/embedding/openai.py +7 -0
  31. minder/graph/__init__.py +11 -0
  32. minder/graph/edges.py +13 -0
  33. minder/graph/executor.py +127 -0
  34. minder/graph/graph.py +263 -0
  35. minder/graph/nodes/__init__.py +27 -0
  36. minder/graph/nodes/evaluator.py +21 -0
  37. minder/graph/nodes/guard.py +64 -0
  38. minder/graph/nodes/llm.py +59 -0
  39. minder/graph/nodes/planning.py +30 -0
  40. minder/graph/nodes/reasoning.py +87 -0
  41. minder/graph/nodes/reranker.py +141 -0
  42. minder/graph/nodes/retriever.py +86 -0
  43. minder/graph/nodes/verification.py +230 -0
  44. minder/graph/nodes/workflow_planner.py +250 -0
  45. minder/graph/runtime.py +15 -0
  46. minder/graph/state.py +26 -0
  47. minder/llm/__init__.py +5 -0
  48. minder/llm/base.py +14 -0
  49. minder/llm/local.py +381 -0
  50. minder/llm/openai.py +89 -0
  51. minder/models/__init__.py +109 -0
  52. minder/models/base.py +10 -0
  53. minder/models/client.py +137 -0
  54. minder/models/document.py +34 -0
  55. minder/models/error.py +32 -0
  56. minder/models/graph.py +114 -0
  57. minder/models/history.py +32 -0
  58. minder/models/job.py +62 -0
  59. minder/models/prompt.py +41 -0
  60. minder/models/repository.py +62 -0
  61. minder/models/rule.py +68 -0
  62. minder/models/session.py +51 -0
  63. minder/models/skill.py +52 -0
  64. minder/models/user.py +41 -0
  65. minder/models/workflow.py +35 -0
  66. minder/observability/__init__.py +57 -0
  67. minder/observability/audit.py +243 -0
  68. minder/observability/logging.py +253 -0
  69. minder/observability/metrics.py +448 -0
  70. minder/observability/tracing.py +215 -0
  71. minder/presentation/__init__.py +1 -0
  72. minder/presentation/http/__init__.py +1 -0
  73. minder/presentation/http/admin/__init__.py +3 -0
  74. minder/presentation/http/admin/api.py +1309 -0
  75. minder/presentation/http/admin/context.py +94 -0
  76. minder/presentation/http/admin/dashboard.py +111 -0
  77. minder/presentation/http/admin/jobs.py +208 -0
  78. minder/presentation/http/admin/memories.py +185 -0
  79. minder/presentation/http/admin/prompts.py +219 -0
  80. minder/presentation/http/admin/routes.py +127 -0
  81. minder/presentation/http/admin/runtime.py +650 -0
  82. minder/presentation/http/admin/search.py +368 -0
  83. minder/presentation/http/admin/skills.py +230 -0
  84. minder/prompts/__init__.py +646 -0
  85. minder/prompts/formatter.py +142 -0
  86. minder/resources/__init__.py +318 -0
  87. minder/retrieval/__init__.py +5 -0
  88. minder/retrieval/hybrid.py +178 -0
  89. minder/retrieval/mmr.py +116 -0
  90. minder/retrieval/multi_hop.py +115 -0
  91. minder/runtime.py +15 -0
  92. minder/server.py +145 -0
  93. minder/store/__init__.py +64 -0
  94. minder/store/document.py +115 -0
  95. minder/store/error.py +82 -0
  96. minder/store/feedback.py +114 -0
  97. minder/store/graph.py +588 -0
  98. minder/store/history.py +57 -0
  99. minder/store/interfaces.py +512 -0
  100. minder/store/milvus/__init__.py +11 -0
  101. minder/store/milvus/client.py +26 -0
  102. minder/store/milvus/collections.py +15 -0
  103. minder/store/milvus/vector_store.py +232 -0
  104. minder/store/mongodb/__init__.py +11 -0
  105. minder/store/mongodb/client.py +49 -0
  106. minder/store/mongodb/indexes.py +90 -0
  107. minder/store/mongodb/operational_store.py +993 -0
  108. minder/store/relational.py +1087 -0
  109. minder/store/repo_state.py +58 -0
  110. minder/store/rule.py +93 -0
  111. minder/store/vector.py +79 -0
  112. minder/tools/__init__.py +47 -0
  113. minder/tools/auth.py +94 -0
  114. minder/tools/graph.py +839 -0
  115. minder/tools/ingest.py +353 -0
  116. minder/tools/memory.py +381 -0
  117. minder/tools/query.py +307 -0
  118. minder/tools/registry.py +269 -0
  119. minder/tools/repo_scanner.py +1266 -0
  120. minder/tools/search.py +15 -0
  121. minder/tools/session.py +316 -0
  122. minder/tools/skills.py +899 -0
  123. minder/tools/workflow.py +215 -0
  124. minder/transport/__init__.py +4 -0
  125. minder/transport/base.py +286 -0
  126. minder/transport/sse.py +252 -0
  127. minder/transport/stdio.py +29 -0
  128. minder_cli-0.2.0.dist-info/METADATA +318 -0
  129. minder_cli-0.2.0.dist-info/RECORD +132 -0
  130. minder_cli-0.2.0.dist-info/WHEEL +4 -0
  131. minder_cli-0.2.0.dist-info/entry_points.txt +2 -0
  132. minder_cli-0.2.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,250 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from pathlib import Path
5
+ from typing import Any
6
+ from collections.abc import Awaitable
7
+
8
+ from minder.continuity import build_continuity_brief, build_instruction_envelope
9
+ from minder.graph.state import GraphState
10
+ from minder.store.interfaces import IGraphRepository, IOperationalStore
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def _explicit_attribute(value: Any, name: str) -> Any | None:
16
+ if value is None:
17
+ return None
18
+ if hasattr(value, "__dict__") and name in value.__dict__:
19
+ return value.__dict__[name]
20
+ return getattr(value, name, None)
21
+
22
+
23
+ class WorkflowPlannerNode:
24
+ """Plan the workflow step and optionally enrich guidance with graph context.
25
+
26
+ Args:
27
+ store: Operational store for workflow / session data.
28
+ graph_store: Optional knowledge-graph store. When provided the node
29
+ queries the graph for cross-service dependencies and
30
+ failing-test artefacts belonging to the current repo and
31
+ appends that context to the ``guidance`` string.
32
+ Omitting it (or passing ``None``) restores the original
33
+ behaviour and is fully backwards-compatible.
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ store: IOperationalStore,
39
+ *,
40
+ graph_store: IGraphRepository | None = None,
41
+ ) -> None:
42
+ self._store = store
43
+ self._graph_store = graph_store
44
+
45
+ async def _load_session(self, session_id: Any) -> Any:
46
+ result = self._store.get_session_by_id(session_id)
47
+ if isinstance(result, Awaitable):
48
+ return await result
49
+ return result
50
+
51
+ async def run(self, state: GraphState) -> GraphState:
52
+ workflow = None
53
+ if state.workflow_context.get("workflow_name"):
54
+ workflow = await self._store.get_workflow_by_name(
55
+ state.workflow_context["workflow_name"]
56
+ )
57
+
58
+ if workflow is None:
59
+ workflows = await self._store.list_workflows()
60
+ workflow = next((item for item in workflows if item.default_for_repo), None)
61
+ if workflow is None and workflows:
62
+ workflow = workflows[0]
63
+
64
+ workflow_steps: list[dict[str, Any]] = []
65
+ if workflow is not None:
66
+ raw_steps: list[Any] = (
67
+ workflow.steps if isinstance(workflow.steps, list) else []
68
+ )
69
+ workflow_steps = [step for step in raw_steps if isinstance(step, dict)]
70
+
71
+ workflow_state = None
72
+ if state.repo_id is not None:
73
+ workflow_state = await self._store.get_workflow_state_by_repo(state.repo_id)
74
+
75
+ if not workflow_steps:
76
+ workflow_steps = [{"name": "Test Writing"}, {"name": "Implementation"}]
77
+
78
+ current_step = workflow_steps[0]["name"]
79
+ completed_steps: list[str] = []
80
+ blocked_by: list[str] = []
81
+ artifacts: dict[str, Any] = {}
82
+ if workflow_state is not None:
83
+ current_step = workflow_state.current_step
84
+ completed_steps = list(workflow_state.completed_steps)
85
+ blocked_by = list(workflow_state.blocked_by)
86
+ artifacts = dict(workflow_state.artifacts)
87
+
88
+ next_step = self._next_step(current_step, workflow_steps)
89
+ guidance = self._guidance_for_step(current_step)
90
+ instruction_envelope: dict[str, Any] | None = None
91
+ continuity_brief: dict[str, Any] | None = None
92
+ session = None
93
+ workflow_state_session_id = _explicit_attribute(workflow_state, "session_id")
94
+ if state.session_id is not None:
95
+ session = await self._load_session(state.session_id)
96
+ elif workflow_state_session_id is not None:
97
+ session = await self._load_session(workflow_state_session_id)
98
+
99
+ if workflow is not None and workflow_state is not None:
100
+ instruction_envelope = build_instruction_envelope(
101
+ workflow=workflow,
102
+ workflow_state=workflow_state,
103
+ )
104
+ if session is not None:
105
+ continuity_brief = build_continuity_brief(
106
+ session=session,
107
+ workflow_state=workflow_state,
108
+ workflow=workflow,
109
+ )
110
+ guidance = self._format_guidance(
111
+ guidance,
112
+ instruction_envelope=instruction_envelope,
113
+ continuity_brief=continuity_brief,
114
+ )
115
+
116
+ # ------------------------------------------------------------------
117
+ # Graph-enriched guidance (P3-T12)
118
+ # ------------------------------------------------------------------
119
+ if self._graph_store is not None and state.repo_path:
120
+ graph_guidance = await self._build_graph_guidance(
121
+ state.repo_path, artifacts
122
+ )
123
+ if graph_guidance:
124
+ guidance = guidance + "\n\n" + graph_guidance
125
+
126
+ cross_repo_context = str(
127
+ state.workflow_context.get("cross_repo_context", "") or ""
128
+ ).strip()
129
+ if cross_repo_context:
130
+ guidance = guidance + "\n\n" + cross_repo_context
131
+
132
+ state.workflow_context.update(
133
+ {
134
+ "workflow_name": workflow.name if workflow is not None else "default",
135
+ "workflow_steps": workflow_steps,
136
+ "current_step": current_step,
137
+ "next_step": next_step,
138
+ "completed_steps": completed_steps,
139
+ "blocked_by": blocked_by,
140
+ "artifacts": artifacts,
141
+ "guidance": guidance,
142
+ "instruction_envelope": instruction_envelope or {},
143
+ "continuity_brief": continuity_brief or {},
144
+ }
145
+ )
146
+ return state
147
+
148
+ # ------------------------------------------------------------------
149
+ # Graph guidance helper
150
+ # ------------------------------------------------------------------
151
+
152
+ async def _build_graph_guidance(
153
+ self, repo_path: str, artifacts: dict[str, Any]
154
+ ) -> str:
155
+ """Query the graph store for dependency context for *repo_path*.
156
+
157
+ Returns a non-empty string with additional guidance lines, or an
158
+ empty string when no relevant graph data is found. All exceptions
159
+ from the graph store are caught and logged so that a graph query
160
+ failure never breaks the workflow pipeline.
161
+ """
162
+ repo_name = Path(repo_path).name
163
+ lines: list[str] = []
164
+
165
+ try:
166
+ service_nodes = await self._graph_store.query_by_type("service") # type: ignore[union-attr]
167
+ except Exception as exc: # pragma: no cover
168
+ logger.debug("graph_store.query_by_type failed: %s", exc)
169
+ return ""
170
+
171
+ # Find the service node that best matches this repo path.
172
+ matching = [
173
+ n
174
+ for n in service_nodes
175
+ if n.name == repo_name
176
+ or (
177
+ hasattr(n, "node_metadata")
178
+ and n.node_metadata.get("path", "").startswith(repo_path)
179
+ )
180
+ ]
181
+
182
+ if not matching:
183
+ return ""
184
+
185
+ service = matching[0]
186
+
187
+ # Cross-service dependencies via ``depends_on`` edges.
188
+ try:
189
+ dep_neighbors = await self._graph_store.get_neighbors( # type: ignore[union-attr]
190
+ service.id,
191
+ direction="out",
192
+ relation="depends_on",
193
+ )
194
+ except Exception as exc: # pragma: no cover
195
+ logger.debug("graph_store.get_neighbors failed: %s", exc)
196
+ dep_neighbors = []
197
+
198
+ if dep_neighbors:
199
+ dep_names = sorted({n.name for n in dep_neighbors})
200
+ lines.append(
201
+ f"Cross-service dependencies detected: {', '.join(dep_names)}. "
202
+ "Ensure changes do not break these dependency contracts."
203
+ )
204
+
205
+ # Failing tests recorded in the last workflow artefact run.
206
+ failing = artifacts.get("failing_tests")
207
+ if failing:
208
+ lines.append(f"Failing tests from last run: {failing}")
209
+
210
+ return "\n".join(lines)
211
+
212
+ # ------------------------------------------------------------------
213
+ # Static helpers
214
+ # ------------------------------------------------------------------
215
+
216
+ @staticmethod
217
+ def _next_step(current_step: str, steps: list[dict[str, Any]]) -> str | None:
218
+ names = [step["name"] for step in steps]
219
+ if current_step not in names:
220
+ return names[0] if names else None
221
+ current_index = names.index(current_step)
222
+ if current_index + 1 >= len(names):
223
+ return None
224
+ return names[current_index + 1]
225
+
226
+ @staticmethod
227
+ def _guidance_for_step(current_step: str) -> str:
228
+ lowered = current_step.lower()
229
+ if "test" in lowered:
230
+ return "Current step: Test Writing. Write tests before implementation."
231
+ if "implement" in lowered:
232
+ return "Current step: Implementation. Use existing failing tests as the contract."
233
+ if "review" in lowered:
234
+ return (
235
+ "Current step: Review. Focus on correctness, regressions, and coverage."
236
+ )
237
+ return f"Current step: {current_step}. Follow the workflow and do not skip prerequisites."
238
+
239
+ @staticmethod
240
+ def _format_guidance(
241
+ base_guidance: str,
242
+ *,
243
+ instruction_envelope: dict[str, Any],
244
+ continuity_brief: dict[str, Any] | None,
245
+ ) -> str:
246
+ sections = [base_guidance]
247
+ sections.append(f"Instruction envelope: {instruction_envelope}")
248
+ if continuity_brief:
249
+ sections.append(f"Continuity brief: {continuity_brief}")
250
+ return "\n\n".join(sections)
@@ -0,0 +1,15 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ from minder.runtime import load_attr, module_available
6
+
7
+
8
+ def graph_runtime_name(preferred: str = "langgraph") -> str:
9
+ if preferred == "langgraph" and module_available("langgraph"):
10
+ return "langgraph"
11
+ return "internal"
12
+
13
+
14
+ def load_langgraph_state_graph() -> Any | None:
15
+ return load_attr("langgraph.graph", "StateGraph")
minder/graph/state.py ADDED
@@ -0,0 +1,26 @@
1
+ from __future__ import annotations
2
+
3
+ import uuid
4
+ from typing import Any
5
+
6
+ from pydantic import BaseModel, Field
7
+
8
+
9
+ class GraphState(BaseModel):
10
+ query: str
11
+ session_id: uuid.UUID | None = None
12
+ user_id: uuid.UUID | None = None
13
+ repo_id: uuid.UUID | None = None
14
+ repo_path: str | None = None
15
+ plan: dict[str, Any] = Field(default_factory=dict)
16
+ retrieved_docs: list[dict[str, Any]] = Field(default_factory=list)
17
+ reranked_docs: list[dict[str, Any]] = Field(default_factory=list)
18
+ workflow_context: dict[str, Any] = Field(default_factory=dict)
19
+ reasoning_output: dict[str, Any] = Field(default_factory=dict)
20
+ llm_output: dict[str, Any] = Field(default_factory=dict)
21
+ guard_result: dict[str, Any] = Field(default_factory=dict)
22
+ verification_result: dict[str, Any] = Field(default_factory=dict)
23
+ evaluation: dict[str, Any] = Field(default_factory=dict)
24
+ transition_log: list[dict[str, Any]] = Field(default_factory=list)
25
+ retry_count: int = 0
26
+ metadata: dict[str, Any] = Field(default_factory=dict)
minder/llm/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ from .base import LLMClient
2
+ from .local import LocalModelLLM
3
+ from .openai import OpenAIFallbackLLM
4
+
5
+ __all__ = ["LLMClient", "LocalModelLLM", "OpenAIFallbackLLM"]
minder/llm/base.py ADDED
@@ -0,0 +1,14 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Generator
4
+ from typing import Protocol
5
+
6
+ from minder.graph.state import GraphState
7
+
8
+
9
+ class LLMClient(Protocol):
10
+ def generate(self, state: GraphState) -> dict[str, object]: ...
11
+
12
+ def stream_generate(
13
+ self, state: GraphState
14
+ ) -> Generator[dict[str, object], None, None]: ...
minder/llm/local.py ADDED
@@ -0,0 +1,381 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Generator
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+ from minder.graph.state import GraphState
8
+ from minder.runtime import load_attr, module_available
9
+
10
+ _RUNTIME_LOG_MARKERS = (
11
+ "Using chat eos_token:",
12
+ "Using chat bos_token:",
13
+ "llama_perf_context_print:",
14
+ "~llama_context:",
15
+ "ggml_metal_free:",
16
+ )
17
+
18
+ _CHAT_TEMPLATE_MARKERS = (
19
+ "{{- '<turn|>\\n' -}}",
20
+ "{%- if add_generation_prompt -%}",
21
+ "{%- endif -%}",
22
+ "{%- endfor -%}",
23
+ )
24
+
25
+
26
+ class LocalModelLLM:
27
+
28
+ def __init__(
29
+ self,
30
+ model_path: str,
31
+ fail: bool = False,
32
+ runtime: str = "mock",
33
+ context_length: int = 131072,
34
+ ) -> None:
35
+ self._model_path = model_path
36
+ self._fail = fail
37
+ self._runtime = runtime
38
+ self._context_length = max(512, context_length)
39
+ self._client: Any | None = None
40
+
41
+ @property
42
+ def runtime(self) -> str:
43
+ runtime = self._runtime
44
+ model_exists = Path(self._model_path).expanduser().exists()
45
+ if runtime == "auto":
46
+ return (
47
+ "llama_cpp"
48
+ if model_exists and module_available("llama_cpp")
49
+ else "mock"
50
+ )
51
+ return runtime
52
+
53
+ def generate(self, state: GraphState) -> dict[str, object]:
54
+ if self._fail:
55
+ raise RuntimeError("Local model unavailable")
56
+
57
+ runtime = self.runtime
58
+
59
+ source_paths = [doc["path"] for doc in state.reranked_docs[:3]]
60
+ guidance = state.workflow_context.get("guidance", "")
61
+ text = (
62
+ f"{guidance}\n"
63
+ f"Plan intent: {state.plan.get('intent', 'unknown')}.\n"
64
+ f"Answer: grounded response for '{state.query}'.\n"
65
+ f"Sources: {', '.join(source_paths) if source_paths else 'none'}."
66
+ )
67
+ if runtime == "llama_cpp":
68
+ text = self._generate_with_llama_cpp(state, fallback=text)
69
+
70
+ return {
71
+ "text": text,
72
+ "sources": source_paths,
73
+ "provider": "local_llm",
74
+ "model": "gemma-4-e2b-it",
75
+ "model_path": self._model_path,
76
+ "runtime": runtime,
77
+ "stream": [line for line in text.splitlines() if line],
78
+ }
79
+
80
+ def stream_generate(
81
+ self, state: GraphState
82
+ ) -> Generator[dict[str, object], None, None]:
83
+ if self._fail:
84
+ raise RuntimeError("Local model unavailable")
85
+
86
+ runtime = self.runtime
87
+ source_paths = [doc["path"] for doc in state.reranked_docs[:3]]
88
+ guidance = state.workflow_context.get("guidance", "")
89
+ fallback = (
90
+ f"{guidance}\n"
91
+ f"Plan intent: {state.plan.get('intent', 'unknown')}.\n"
92
+ f"Answer: grounded response for '{state.query}'.\n"
93
+ f"Sources: {', '.join(source_paths) if source_paths else 'none'}."
94
+ )
95
+
96
+ if runtime != "llama_cpp":
97
+ if fallback:
98
+ yield {"type": "chunk", "delta": fallback}
99
+ yield {
100
+ "type": "result",
101
+ "result": {
102
+ "text": fallback,
103
+ "sources": source_paths,
104
+ "provider": "local_llm",
105
+ "model": "gemma-4-e2b-it",
106
+ "model_path": self._model_path,
107
+ "runtime": runtime,
108
+ "stream": [fallback] if fallback else [],
109
+ },
110
+ }
111
+ return
112
+
113
+ client = self._llama_client()
114
+ if client is None:
115
+ if fallback:
116
+ yield {"type": "chunk", "delta": fallback}
117
+ yield {
118
+ "type": "result",
119
+ "result": {
120
+ "text": fallback,
121
+ "sources": source_paths,
122
+ "provider": "local_llm",
123
+ "model": "gemma-4-e2b-it",
124
+ "model_path": self._model_path,
125
+ "runtime": runtime,
126
+ "stream": [fallback] if fallback else [],
127
+ },
128
+ }
129
+ return
130
+
131
+ reasoning_output = getattr(state, "reasoning_output", {}) or {}
132
+ prompt = str(reasoning_output.get("prompt") or state.query)
133
+ deltas: list[str] = []
134
+ try:
135
+ for delta in self._stream_with_llama_cpp(
136
+ client,
137
+ prompt=prompt,
138
+ max_tokens=256,
139
+ temperature=0.1,
140
+ ):
141
+ cleaned_delta = self._clean_stream_delta(delta)
142
+ if not cleaned_delta:
143
+ continue
144
+ deltas.append(cleaned_delta)
145
+ yield {"type": "chunk", "delta": cleaned_delta}
146
+ except Exception:
147
+ if fallback:
148
+ yield {"type": "chunk", "delta": fallback}
149
+ deltas = [fallback] if fallback else []
150
+
151
+ text = self._clean_generated_text("".join(deltas)) or fallback
152
+ yield {
153
+ "type": "result",
154
+ "result": {
155
+ "text": text,
156
+ "sources": source_paths,
157
+ "provider": "local_llm",
158
+ "model": "gemma-4-e2b-it",
159
+ "model_path": self._model_path,
160
+ "runtime": runtime,
161
+ "stream": deltas if deltas else ([text] if text else []),
162
+ },
163
+ }
164
+
165
+ def complete_text(
166
+ self,
167
+ prompt: str,
168
+ *,
169
+ max_tokens: int = 512,
170
+ temperature: float = 0.1,
171
+ fallback: str = "",
172
+ ) -> str:
173
+ if self._fail:
174
+ raise RuntimeError("Local model unavailable")
175
+
176
+ if self.runtime != "llama_cpp":
177
+ return fallback
178
+
179
+ client = self._llama_client()
180
+ if client is None:
181
+ return fallback
182
+
183
+ try:
184
+ response = self._complete_with_llama_cpp(
185
+ client,
186
+ prompt=prompt,
187
+ max_tokens=max_tokens,
188
+ temperature=temperature,
189
+ )
190
+ except Exception:
191
+ return fallback
192
+
193
+ text = self._response_text(response, fallback=fallback)
194
+ cleaned = self._clean_generated_text(text)
195
+ return cleaned or fallback
196
+
197
+ def _generate_with_llama_cpp(self, state: GraphState, *, fallback: str) -> str:
198
+ reasoning_output = getattr(state, "reasoning_output", {}) or {}
199
+ prompt = reasoning_output.get("prompt") or state.query
200
+ return self.complete_text(
201
+ str(prompt),
202
+ max_tokens=256,
203
+ temperature=0.1,
204
+ fallback=fallback,
205
+ )
206
+
207
+ def _complete_with_llama_cpp(
208
+ self,
209
+ client: Any,
210
+ *,
211
+ prompt: str,
212
+ max_tokens: int,
213
+ temperature: float,
214
+ ) -> Any:
215
+ chat_completion = getattr(client, "create_chat_completion", None)
216
+ if callable(chat_completion):
217
+ return chat_completion(
218
+ messages=[
219
+ {
220
+ "role": "user",
221
+ "content": prompt,
222
+ }
223
+ ],
224
+ max_tokens=max_tokens,
225
+ temperature=temperature,
226
+ )
227
+ return client.create_completion(
228
+ prompt=prompt,
229
+ max_tokens=max_tokens,
230
+ temperature=temperature,
231
+ )
232
+
233
+ def _stream_with_llama_cpp(
234
+ self,
235
+ client: Any,
236
+ *,
237
+ prompt: str,
238
+ max_tokens: int,
239
+ temperature: float,
240
+ ) -> Generator[str, None, None]:
241
+ chat_completion = getattr(client, "create_chat_completion", None)
242
+ if callable(chat_completion):
243
+ response = chat_completion(
244
+ messages=[{"role": "user", "content": prompt}],
245
+ max_tokens=max_tokens,
246
+ temperature=temperature,
247
+ stream=True,
248
+ )
249
+ yield from self._extract_stream_deltas(response)
250
+ return
251
+
252
+ response = client.create_completion(
253
+ prompt=prompt,
254
+ max_tokens=max_tokens,
255
+ temperature=temperature,
256
+ stream=True,
257
+ )
258
+ yield from self._extract_stream_deltas(response)
259
+
260
+ def _extract_stream_deltas(self, response: Any) -> Generator[str, None, None]:
261
+ for chunk in response:
262
+ delta = self._response_delta(chunk)
263
+ if delta:
264
+ yield delta
265
+
266
+ def _response_delta(self, chunk: Any) -> str:
267
+ if isinstance(chunk, dict):
268
+ choices = chunk.get("choices", [])
269
+ else:
270
+ choices = getattr(chunk, "choices", [])
271
+ if not choices:
272
+ return ""
273
+
274
+ first = choices[0]
275
+ if isinstance(first, dict):
276
+ delta = first.get("delta")
277
+ if isinstance(delta, dict):
278
+ content = delta.get("content")
279
+ if isinstance(content, str):
280
+ return content
281
+ text = first.get("text")
282
+ if isinstance(text, str):
283
+ return text
284
+ delta = getattr(first, "delta", None)
285
+ content = getattr(delta, "content", None) if delta is not None else None
286
+ if isinstance(content, str):
287
+ return content
288
+ text = getattr(first, "text", None)
289
+ if isinstance(text, str):
290
+ return text
291
+ return ""
292
+
293
+ def _response_text(self, response: Any, *, fallback: str) -> str:
294
+ if isinstance(response, dict):
295
+ choices = response.get("choices", [])
296
+ else:
297
+ choices = getattr(response, "choices", [])
298
+ if not choices:
299
+ return fallback
300
+
301
+ first = choices[0]
302
+ if isinstance(first, dict):
303
+ message = first.get("message")
304
+ if isinstance(message, dict):
305
+ content = message.get("content")
306
+ if isinstance(content, str):
307
+ return content.strip() or fallback
308
+ if isinstance(content, list):
309
+ parts = [
310
+ str(item.get("text", "")).strip()
311
+ for item in content
312
+ if isinstance(item, dict) and str(item.get("text", "")).strip()
313
+ ]
314
+ if parts:
315
+ return "\n".join(parts)
316
+ text = first.get("text")
317
+ if isinstance(text, str):
318
+ return text.strip() or fallback
319
+
320
+ message = getattr(first, "message", None)
321
+ content = getattr(message, "content", None) if message is not None else None
322
+ if isinstance(content, str):
323
+ return content.strip() or fallback
324
+ text = getattr(first, "text", None)
325
+ if isinstance(text, str):
326
+ return text.strip() or fallback
327
+ return fallback
328
+
329
+ def _clean_generated_text(self, text: str) -> str:
330
+ if not text:
331
+ return ""
332
+
333
+ kept_lines: list[str] = []
334
+ for line in text.splitlines():
335
+ stripped = line.strip()
336
+ if not stripped:
337
+ if kept_lines and kept_lines[-1] != "":
338
+ kept_lines.append("")
339
+ continue
340
+ if any(marker in stripped for marker in _RUNTIME_LOG_MARKERS):
341
+ continue
342
+ if any(marker in stripped for marker in _CHAT_TEMPLATE_MARKERS):
343
+ continue
344
+ kept_lines.append(line)
345
+
346
+ return "\n".join(kept_lines).strip()
347
+
348
+ def _clean_stream_delta(self, delta: str) -> str:
349
+ stripped = delta.strip()
350
+ if not stripped:
351
+ return delta
352
+ if any(marker in stripped for marker in _RUNTIME_LOG_MARKERS):
353
+ return ""
354
+ if any(marker in stripped for marker in _CHAT_TEMPLATE_MARKERS):
355
+ return ""
356
+ return delta
357
+
358
+ def _llama_client(self) -> Any | None:
359
+ if self._client is not None:
360
+ return self._client
361
+ llama_cls = load_attr("llama_cpp", "Llama")
362
+ if llama_cls is None:
363
+ return None
364
+ base_kwargs = {
365
+ "model_path": str(Path(self._model_path).expanduser()),
366
+ "verbose": False,
367
+ "n_ctx": self._context_length,
368
+ }
369
+ try:
370
+ self._client = llama_cls(
371
+ **base_kwargs,
372
+ flash_attn=True,
373
+ )
374
+ except TypeError:
375
+ try:
376
+ self._client = llama_cls(**base_kwargs)
377
+ except Exception:
378
+ return None
379
+ except Exception:
380
+ return None
381
+ return self._client