PyPI - minder-cli - Versions diffs - 0.6.3__tar.gz → 0.6.4__tar.gz - Mend

minder-cli 0.6.3tar.gz → 0.6.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (167) hide show

{minder_cli-0.6.3 → minder_cli-0.6.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: minder-cli
-Version: 0.6.3
+Version: 0.6.4
 Summary: Minder CLI is the command-line interface for the Minder self-hosted MCP platform.
 Project-URL: Homepage, https://github.com/hiimtrung/minder
 Project-URL: Repository, https://github.com/hiimtrung/minder

{minder_cli-0.6.3 → minder_cli-0.6.4}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "minder-cli"
-version = "0.6.3"
+version = "0.6.4"
 description = "Minder CLI is the command-line interface for the Minder self-hosted MCP platform."
 readme = "README.md"
 requires-python = ">=3.14"

{minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/config.py RENAMED Viewed

@@ -11,6 +11,7 @@ class ServerConfig(BaseModel):
     host: str = "0.0.0.0"
     port: int = 8800
     log_level: str = "info"
+    http_timeout_keep_alive: int = 10  # uvicorn keep-alive timeout (seconds)
 class DashboardConfig(BaseModel):
@@ -48,6 +49,8 @@ class LLMConfig(BaseModel):
     temperature: float = 0.1
     openai_api_key: Optional[str] = None
     openai_model: str = "gpt-4o-mini"
+    timeout_seconds: float = 120.0  # wall-clock budget per LLM call
+    max_concurrent: int = 1  # max simultaneous LLM inferences
 class VectorStoreConfig(BaseModel):

minder_cli-0.6.4/src/minder/graph/concurrency.py ADDED Viewed

@@ -0,0 +1,169 @@
+"""Concurrency utilities for CPU-bound graph inference.
+LLM inference (llama.cpp) is CPU-bound and can run for 5–30 s.  Running it
+directly on the asyncio event loop starves every other in-flight request.
+This module provides:
+  - An asyncio.Semaphore that caps simultaneous LLM inferences.
+  - ``run_in_thread`` — wraps a blocking callable in asyncio.to_thread with an
+    optional timeout so the event loop stays responsive.
+  - ``stream_sync_generator`` — converts a blocking sync generator (e.g.
+    LLM token stream) into an async generator via a thread + queue, allowing
+    real token-by-token streaming without blocking the event loop.
+"""
+from __future__ import annotations
+import asyncio
+import logging
+from collections.abc import AsyncGenerator, Generator
+from concurrent.futures import ThreadPoolExecutor
+from typing import Any, Callable, TypeVar
+logger = logging.getLogger(__name__)
+_semaphore: asyncio.Semaphore | None = None
+_max_concurrent: int = 1
+_timeout_seconds: float = 120.0
+# Dedicated thread pool for LLM / embedding inference.
+# Using a bounded pool prevents runaway thread creation under load.
+_INFERENCE_POOL = ThreadPoolExecutor(max_workers=4, thread_name_prefix="minder-inference")
+T = TypeVar("T")
+_SENTINEL = object()
+def configure(*, max_concurrent: int = 1, timeout_seconds: float = 120.0) -> None:
+    """Call once at startup to set inference concurrency and timeout budgets."""
+    global _max_concurrent, _timeout_seconds, _semaphore
+    _max_concurrent = max(1, max_concurrent)
+    _timeout_seconds = max(10.0, timeout_seconds)
+    _semaphore = asyncio.Semaphore(_max_concurrent)
+def _get_semaphore() -> asyncio.Semaphore:
+    global _semaphore
+    if _semaphore is None:
+        _semaphore = asyncio.Semaphore(_max_concurrent)
+    return _semaphore
+async def run_in_thread(
+    fn: Callable[..., T],
+    /,
+    *args: Any,
+    timeout: float | None = None,
+    use_llm_semaphore: bool = False,
+) -> T:
+    """Run a blocking callable in the inference thread pool.
+    Args:
+        fn: Blocking callable.
+        *args: Positional arguments forwarded to fn.
+        timeout: Maximum seconds to wait.  Defaults to the configured global
+            timeout when ``use_llm_semaphore`` is True, otherwise no timeout.
+        use_llm_semaphore: Acquire the global LLM concurrency semaphore before
+            running.  Use this for actual LLM inference calls so we never run
+            more than ``max_concurrent`` inferences simultaneously.
+    """
+    effective_timeout = timeout or (_timeout_seconds if use_llm_semaphore else None)
+    async def _inner() -> T:
+        loop = asyncio.get_running_loop()
+        return await loop.run_in_executor(_INFERENCE_POOL, fn, *args)
+    if use_llm_semaphore:
+        sem = _get_semaphore()
+        try:
+            async with sem:
+                if effective_timeout:
+                    return await asyncio.wait_for(_inner(), timeout=effective_timeout)
+                return await _inner()
+        except asyncio.TimeoutError:
+            logger.warning("LLM inference timed out after %.0f s", effective_timeout)
+            raise
+    else:
+        if effective_timeout:
+            return await asyncio.wait_for(_inner(), timeout=effective_timeout)
+        return await _inner()
+async def stream_sync_generator(
+    gen_fn: Callable[..., Generator[Any, None, None]],
+    /,
+    *args: Any,
+    timeout: float | None = None,
+    use_llm_semaphore: bool = True,
+) -> AsyncGenerator[Any, None]:
+    """Adapt a blocking sync generator into an async generator.
+    The generator runs inside the inference thread pool so the asyncio event
+    loop is never blocked.  Items are forwarded through an asyncio.Queue so
+    consumers receive them as they are produced.
+    Usage::
+        async for event in stream_sync_generator(llm_node.stream, state):
+            yield event
+    """
+    loop = asyncio.get_running_loop()
+    queue: asyncio.Queue[Any] = asyncio.Queue(maxsize=64)
+    effective_timeout = timeout or (_timeout_seconds if use_llm_semaphore else None)
+    def _producer() -> None:
+        try:
+            for item in gen_fn(*args):
+                # Put items synchronously from the thread, waking up the consumer.
+                asyncio.run_coroutine_threadsafe(queue.put(item), loop).result()
+        except Exception as exc:
+            asyncio.run_coroutine_threadsafe(queue.put(exc), loop).result()
+        finally:
+            asyncio.run_coroutine_threadsafe(queue.put(_SENTINEL), loop).result()
+    async def _generate() -> AsyncGenerator[Any, None]:
+        future = loop.run_in_executor(_INFERENCE_POOL, _producer)
+        deadline = (
+            loop.time() + effective_timeout if effective_timeout else None
+        )
+        try:
+            while True:
+                remaining = (
+                    max(0.1, deadline - loop.time()) if deadline else None
+                )
+                try:
+                    item = await asyncio.wait_for(
+                        queue.get(), timeout=remaining
+                    )
+                except asyncio.TimeoutError:
+                    logger.warning(
+                        "LLM stream timed out after %.0f s", effective_timeout
+                    )
+                    future.cancel()
+                    return
+                if item is _SENTINEL:
+                    break
+                if isinstance(item, Exception):
+                    raise item
+                yield item
+        finally:
+            # Drain queue to unblock any waiting producer thread.
+            while not queue.empty():
+                try:
+                    queue.get_nowait()
+                except asyncio.QueueEmpty:
+                    break
+            try:
+                await future
+            except Exception:
+                pass
+    sem = _get_semaphore()
+    if use_llm_semaphore:
+        async with sem:
+            async for item in _generate():
+                yield item
+    else:
+        async for item in _generate():
+            yield item

{minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/graph/executor.py RENAMED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
+import asyncio
 from dataclasses import dataclass, field
 import inspect
 import uuid
@@ -7,9 +8,11 @@ from typing import Any
 from minder.config import MinderConfig
 from minder.graph.checkpoint import MinderCheckpointSaver
+from minder.graph.concurrency import run_in_thread
 from minder.graph.edges import determine_next_edge
 from minder.graph.nodes import (
     ClarificationNode,
+    ContextEnricherNode,
     EvaluatorNode,
     GuardNode,
     LLMNode,
@@ -41,6 +44,7 @@ class GraphNodes:
     evaluator: EvaluatorNode
     reranker: RerankerNode | None = field(default=None)
     reflection: ReflectionNode | None = field(default=None)
+    context_enricher: ContextEnricherNode | None = field(default=None)
 class InternalGraphExecutor:
@@ -52,22 +56,32 @@ class InternalGraphExecutor:
         state.metadata.setdefault("attempt_failures", [])
         state.metadata["orchestration_runtime"] = "internal"
         state = await self._nodes.workflow_planner.run(state)
-        state = self._nodes.planning.run(state)
-        state = self._nodes.clarification.run(state)
+        # Fast sync nodes — run in thread to yield control to the event loop
+        state = await run_in_thread(self._nodes.planning.run, state)
+        state = await run_in_thread(self._nodes.clarification.run, state)
         if state.metadata.get("needs_clarification"):
             return state
         state = await self._nodes.retriever.run(state)
         if self._nodes.reranker is not None:
             state = await self._nodes.reranker.run(state)
+        if self._nodes.context_enricher is not None:
+            state = await self._nodes.context_enricher.run(state)
         attempt = 0
         while True:
             attempt += 1
             state.retry_count = attempt - 1
-            state = self._nodes.reasoning.run(state)
-            state = self._nodes.llm.run(state)
-            state = self._nodes.guard.run(state)
-            state = self._nodes.verification.run(state)
+            # reasoning builds the prompt (CPU-bound string work)
+            state = await run_in_thread(self._nodes.reasoning.run, state)
+            # LLM inference is the main bottleneck — run in dedicated thread
+            # with semaphore + timeout so other requests keep moving
+            state = await run_in_thread(
+                self._nodes.llm.run,
+                state,
+                use_llm_semaphore=True,
+            )
+            state = await run_in_thread(self._nodes.guard.run, state)
+            state = await run_in_thread(self._nodes.verification.run, state)
             edge = determine_next_edge(state)
             state.transition_log.append(
                 {
@@ -101,7 +115,7 @@ class InternalGraphExecutor:
             )
             state.metadata["retry_reason"] = retry_reason
-        state = self._nodes.evaluator.run(state)
+        state = await run_in_thread(self._nodes.evaluator.run, state)
         state.metadata["edge"] = determine_next_edge(state)
         if self._nodes.reflection is not None:
@@ -228,6 +242,12 @@ class LangGraphExecutorAdapter:
                 "reranker", self._wrap_state_handler(self._nodes.reranker.run)
             )
+        if self._nodes.context_enricher is not None:
+            workflow.add_node(
+                "context_enricher",
+                self._wrap_state_handler(self._nodes.context_enricher.run),
+            )
         workflow.add_node(
             "reasoning", self._wrap_state_handler(self._node_reasoning_wrapper)
         )
@@ -295,11 +315,18 @@ class LangGraphExecutorAdapter:
         else:
             retrieval_end_node = "retriever"
+        has_enricher = self._nodes.context_enricher is not None
         if self._nodes.reranker is not None:
             workflow.add_edge(retrieval_end_node, "reranker")
-            workflow.add_edge("reranker", "reasoning")
+            post_retrieval_node = "reranker"
         else:
-            workflow.add_edge(retrieval_end_node, "reasoning")
+            post_retrieval_node = retrieval_end_node
+        if has_enricher:
+            workflow.add_edge(post_retrieval_node, "context_enricher")
+            workflow.add_edge("context_enricher", "reasoning")
+        else:
+            workflow.add_edge(post_retrieval_node, "reasoning")
         workflow.add_edge("reasoning", "llm")
         workflow.add_edge("llm", "guard")
@@ -444,11 +471,16 @@ class LangGraphExecutorAdapter:
     @staticmethod
     def _wrap_state_handler(handler):  # noqa: ANN001
+        is_async = inspect.iscoroutinefunction(handler)
         async def wrapped(state):  # noqa: ANN001
             graph_state = GraphState.model_validate(state)
-            result = handler(graph_state)
-            if inspect.isawaitable(result):
-                result = await result
+            if is_async:
+                result = await handler(graph_state)
+            else:
+                # Run blocking sync handlers in a thread pool to avoid
+                # stalling the event loop during CPU-bound LLM inference.
+                result = await asyncio.to_thread(handler, graph_state)
             if isinstance(result, GraphState):
                 return dict(result)
             return result

{minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/graph/graph.py RENAMED Viewed

@@ -6,6 +6,8 @@ from time import perf_counter
 from minder.config import MinderConfig
 from minder.embedding.local import LocalEmbeddingProvider
+from minder.graph import concurrency as _concurrency
+from minder.graph.concurrency import run_in_thread, stream_sync_generator
 from minder.graph.edges import determine_next_edge
 from minder.graph.executor import (
     GraphNodes,
@@ -14,6 +16,7 @@ from minder.graph.executor import (
 )
 from minder.graph.nodes import (
     ClarificationNode,
+    ContextEnricherNode,
     EvaluatorNode,
     GuardNode,
     LLMNode,
@@ -45,6 +48,7 @@ class MinderGraph:
         clarification: ClarificationNode | None = None,
         retriever: RetrieverNode | None = None,
         reranker: RerankerNode | None = None,
+        context_enricher: ContextEnricherNode | None = None,
         reasoning: ReasoningNode | None = None,
         llm: LLMNode | None = None,
         guard: GuardNode | None = None,
@@ -75,6 +79,7 @@ class MinderGraph:
             score_threshold=config.retrieval.similarity_threshold,
         )
         self._reranker = reranker  # None by default; pass RerankerNode(...) to activate
+        self._context_enricher = context_enricher or ContextEnricherNode(store)
         self._reasoning = reasoning or ReasoningNode()
         self._llm = llm or LLMNode(
             primary=create_llm(config.llm),
@@ -94,12 +99,18 @@ class MinderGraph:
         self._error_store = error_store or store
         self._graph_tools = graph_tools
         self._cached_executor: InternalGraphExecutor | LangGraphExecutorAdapter | None = None
+        # Apply LLM concurrency and timeout settings from config
+        _concurrency.configure(
+            max_concurrent=config.llm.max_concurrent,
+            timeout_seconds=config.llm.timeout_seconds,
+        )
         self._nodes = GraphNodes(
             workflow_planner=self._workflow_planner,
             planning=self._planning,
             clarification=self._clarification,
             retriever=self._retriever,
             reranker=self._reranker,
+            context_enricher=self._context_enricher,
             reasoning=self._reasoning,
             llm=self._llm,
             guard=self._guard,
@@ -144,19 +155,32 @@ class MinderGraph:
         state = await self._nodes.retriever.run(state)
         if self._nodes.reranker is not None:
             state = await self._nodes.reranker.run(state)
+        if self._nodes.context_enricher is not None:
+            state = await self._nodes.context_enricher.run(state)
         attempt = 0
         while True:
             attempt += 1
             state.retry_count = attempt - 1
-            state = self._nodes.reasoning.run(state)
+            state = await run_in_thread(self._nodes.reasoning.run, state)
             yield {"type": "attempt", "attempt": attempt}
-            for event in self._nodes.llm.stream(state):
+            # Stream LLM tokens without blocking the event loop.
+            # stream_sync_generator runs the sync generator in the inference
+            # thread pool and forwards items through an asyncio.Queue.
+            async for event in stream_sync_generator(
+                self._nodes.llm.stream,
+                state,
+                use_llm_semaphore=True,
+            ):
                 if str(event.get("type")) == "result":
+                    # Capture the final LLM output written back to state
+                    result_data = dict(event.get("result", {}) or {})
+                    if result_data:
+                        state.llm_output = result_data
                     continue
                 yield {**event, "attempt": attempt}
-            state = self._nodes.guard.run(state)
-            state = self._nodes.verification.run(state)
+            state = await run_in_thread(self._nodes.guard.run, state)
+            state = await run_in_thread(self._nodes.verification.run, state)
             edge = determine_next_edge(state)
             state.transition_log.append(
                 {
@@ -196,7 +220,7 @@ class MinderGraph:
                 "edge": edge,
             }
-        state = self._nodes.evaluator.run(state)
+        state = await run_in_thread(self._nodes.evaluator.run, state)
         state.metadata["edge"] = determine_next_edge(state)
         await self._persist_history(state)
         await self._persist_error_if_needed(state)

{minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/graph/nodes/__init__.py RENAMED Viewed

@@ -1,4 +1,5 @@
 from .clarification import ClarificationNode
+from .context_enricher import ContextEnricherNode
 from .evaluator import EvaluatorNode
 from .guard import GuardNode
 from .llm import LLMNode
@@ -17,6 +18,7 @@ from .workflow_planner import WorkflowPlannerNode
 __all__ = [
     "ClarificationNode",
+    "ContextEnricherNode",
     "DockerSandboxRunner",
     "EvaluatorNode",
     "GuardNode",

minder_cli-0.6.4/src/minder/graph/nodes/context_enricher.py ADDED Viewed

@@ -0,0 +1,186 @@
+from __future__ import annotations
+import logging
+from typing import Any
+from minder.graph.state import GraphState
+from minder.store.interfaces import IOperationalStore
+logger = logging.getLogger(__name__)
+# Nouns that identify a data type the user is asking about
+_SKILL_NOUNS = frozenset({
+    "skill", "skills", "kỹ năng", "snippet", "snippets", "function",
+    "method", "pattern", "utility", "helper", "code pattern",
+})
+_MEMORY_NOUNS = frozenset({
+    "memory", "memories", "note", "notes", "tài liệu", "kiến thức",
+    "ghi chú", "lưu ý", "reminder", "fact", "facts",
+})
+_ERROR_NOUNS = frozenset({
+    "error", "errors", "bug", "bugs", "exception", "lỗi", "issue",
+    "issues", "problem", "problems", "crash",
+})
+# Verbs / phrases that signal the user wants analysis / enumeration
+_ANALYSIS_VERBS = frozenset({
+    "analyze", "analysis", "phân tích", "tóm tắt", "summarize", "summary",
+    "list", "liệt kê", "show", "give me", "tôi có", "what", "how many",
+    "xem", "tất cả", "all", "overview", "review", "explain",
+    "mô tả", "kể", "nêu", "describe", "tell me", "breakdown",
+})
+# Tech tags that can appear as query words
+_KNOWN_TAGS = [
+    "backend", "frontend", "api", "database", "auth", "authentication",
+    "authorization", "testing", "deployment", "ci", "cd", "docker",
+    "kubernetes", "k8s", "python", "javascript", "typescript", "react",
+    "fastapi", "django", "flask", "sqlalchemy", "redis", "postgresql",
+    "sqlite", "mongodb", "async", "microservice", "security", "logging",
+    "monitoring", "refactor", "pattern", "utility", "helper", "caching",
+]
+_MAX_ENRICHED_ITEMS = 30
+_MAX_CONTENT_CHARS = 1200
+def _query_lower(state: GraphState) -> str:
+    return str(state.query or "").lower()
+def _hits(query: str, keywords: frozenset[str]) -> bool:
+    return any(kw in query for kw in keywords)
+def _extract_tag_hints(query: str) -> list[str]:
+    return [tag for tag in _KNOWN_TAGS if tag in query]
+class ContextEnricherNode:
+    """Fetch structured store data (skills, memories, errors) when the query
+    requests analysis or enumeration of those items.
+    The vector retriever only searches ingested code documents.  Skills and
+    memories live in a separate table and are never seen by the LLM unless
+    explicitly fetched here.  This node detects the intent and populates
+    ``state.metadata["enriched_context"]`` before the reasoning node builds
+    the LLM prompt.
+    """
+    def __init__(self, store: IOperationalStore) -> None:
+        self._store = store
+    async def run(self, state: GraphState) -> GraphState:
+        query = _query_lower(state)
+        wants_skills = _hits(query, _SKILL_NOUNS)
+        wants_memories = _hits(query, _MEMORY_NOUNS)
+        wants_errors = _hits(query, _ERROR_NOUNS)
+        # An explicit data-type noun is required — analysis verbs alone are not enough
+        # to avoid false positives on general questions (e.g. "what is X?").
+        if not (wants_skills or wants_memories or wants_errors):
+            return state
+        tag_hints = _extract_tag_hints(query)
+        enriched: list[dict[str, Any]] = []
+        if wants_skills:
+            enriched += await self._fetch_skills(state, tag_hints)
+        if wants_memories:
+            enriched += await self._fetch_memories(state, tag_hints)
+        if wants_errors:
+            enriched += await self._fetch_errors()
+        if enriched:
+            state.metadata["enriched_context"] = enriched
+            logger.debug(
+                "ContextEnricher: %d items fetched for query %r",
+                len(enriched),
+                state.query[:80],
+            )
+        return state
+    async def _fetch_skills(
+        self, state: GraphState, tag_hints: list[str]
+    ) -> list[dict[str, Any]]:
+        try:
+            items = await self._store.list_skills_by_kind(
+                is_memory=False,
+                owner_id=state.user_id,
+            )
+        except Exception as exc:
+            logger.debug("ContextEnricher.list_skills failed: %s", exc)
+            return []
+        return _format_items(items, tag_hints, item_type="skill")
+    async def _fetch_memories(
+        self, state: GraphState, tag_hints: list[str]
+    ) -> list[dict[str, Any]]:
+        try:
+            items = await self._store.list_skills_by_kind(
+                is_memory=True,
+                owner_id=state.user_id,
+            )
+        except Exception as exc:
+            logger.debug("ContextEnricher.list_memories failed: %s", exc)
+            return []
+        return _format_items(items, tag_hints, item_type="memory")
+    async def _fetch_errors(self) -> list[dict[str, Any]]:
+        try:
+            errors = await self._store.list_errors()
+        except Exception as exc:
+            logger.debug("ContextEnricher.list_errors failed: %s", exc)
+            return []
+        return [
+            {
+                "type": "error",
+                "title": str(getattr(e, "error_code", "") or ""),
+                "content": str(getattr(e, "error_message", "") or ""),
+                "tags": [],
+                "quality_score": 0.0,
+                "language": "",
+            }
+            for e in errors[:_MAX_ENRICHED_ITEMS]
+        ]
+def _relevance(item: Any, tag_hints: list[str]) -> float:
+    tags = [t.lower() for t in (getattr(item, "tags", None) or [])]
+    tag_score = sum(1.5 for hint in tag_hints if hint in tags)
+    return tag_score + float(getattr(item, "quality_score", 0) or 0)
+def _format_items(
+    items: list[Any], tag_hints: list[str], *, item_type: str
+) -> list[dict[str, Any]]:
+    scored = sorted(items, key=lambda it: _relevance(it, tag_hints), reverse=True)
+    # When tag hints given, prefer tag-matching items; fall back to all
+    if tag_hints:
+        matched = [
+            it for it in scored
+            if any(
+                h in [t.lower() for t in (getattr(it, "tags", None) or [])]
+                for h in tag_hints
+            )
+        ]
+        pool = matched if matched else scored
+    else:
+        pool = scored
+    return [
+        {
+            "type": item_type,
+            "title": str(getattr(item, "title", "") or ""),
+            "content": str(getattr(item, "content", "") or "")[:_MAX_CONTENT_CHARS],
+            "tags": list(getattr(item, "tags", None) or []),
+            "quality_score": float(getattr(item, "quality_score", 0) or 0),
+            "language": str(getattr(item, "language", "") or ""),
+        }
+        for item in pool[:_MAX_ENRICHED_ITEMS]
+    ]

minder-cli 0.6.3__tar.gz → 0.6.4__tar.gz

minder-cli 0.6.3tar.gz → 0.6.4tar.gz