minder-cli 0.6.3__tar.gz → 0.6.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. {minder_cli-0.6.3 → minder_cli-0.6.4}/PKG-INFO +1 -1
  2. {minder_cli-0.6.3 → minder_cli-0.6.4}/pyproject.toml +1 -1
  3. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/config.py +3 -0
  4. minder_cli-0.6.4/src/minder/graph/concurrency.py +169 -0
  5. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/graph/executor.py +44 -12
  6. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/graph/graph.py +29 -5
  7. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/graph/nodes/__init__.py +2 -0
  8. minder_cli-0.6.4/src/minder/graph/nodes/context_enricher.py +186 -0
  9. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/graph/nodes/reasoning.py +83 -9
  10. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/graph/nodes/retriever.py +5 -1
  11. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/graph/nodes/workflow_planner.py +9 -2
  12. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/presentation/http/admin/runtime.py +12 -0
  13. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/tools/query.py +7 -2
  14. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/transport/sse.py +6 -0
  15. {minder_cli-0.6.3 → minder_cli-0.6.4}/.gitignore +0 -0
  16. {minder_cli-0.6.3 → minder_cli-0.6.4}/LICENSE +0 -0
  17. {minder_cli-0.6.3 → minder_cli-0.6.4}/README-pypi.md +0 -0
  18. {minder_cli-0.6.3 → minder_cli-0.6.4}/README.md +0 -0
  19. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/__init__.py +0 -0
  20. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/api/routers/prompts.py +0 -0
  21. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/application/__init__.py +0 -0
  22. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/application/admin/__init__.py +0 -0
  23. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/application/admin/dto.py +0 -0
  24. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/application/admin/jobs.py +0 -0
  25. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/application/admin/use_cases.py +0 -0
  26. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/auth/__init__.py +0 -0
  27. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/auth/context.py +0 -0
  28. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/auth/middleware.py +0 -0
  29. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/auth/principal.py +0 -0
  30. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/auth/rate_limiter.py +0 -0
  31. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/auth/rbac.py +0 -0
  32. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/auth/service.py +0 -0
  33. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/bootstrap/__init__.py +0 -0
  34. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/bootstrap/agent_seeder.py +0 -0
  35. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/bootstrap/providers.py +0 -0
  36. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/bootstrap/transport.py +0 -0
  37. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/bootstrap/workflow_seeder.py +0 -0
  38. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/cache/__init__.py +0 -0
  39. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/cache/providers.py +0 -0
  40. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/chunking/__init__.py +0 -0
  41. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/chunking/code_splitter.py +0 -0
  42. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/chunking/splitter.py +0 -0
  43. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/cli.py +0 -0
  44. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/context_compactor.py +0 -0
  45. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/continuity.py +0 -0
  46. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/dev.py +0 -0
  47. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/embedding/__init__.py +0 -0
  48. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/embedding/base.py +0 -0
  49. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/embedding/local.py +0 -0
  50. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/embedding/openai.py +0 -0
  51. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/graph/__init__.py +0 -0
  52. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/graph/checkpoint.py +0 -0
  53. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/graph/edges.py +0 -0
  54. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/graph/memory_graph.py +0 -0
  55. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/graph/nodes/clarification.py +0 -0
  56. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/graph/nodes/evaluator.py +0 -0
  57. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/graph/nodes/guard.py +0 -0
  58. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/graph/nodes/llm.py +0 -0
  59. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/graph/nodes/parallel_retriever.py +0 -0
  60. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/graph/nodes/planning.py +0 -0
  61. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/graph/nodes/reflection.py +0 -0
  62. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/graph/nodes/reranker.py +0 -0
  63. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/graph/nodes/verification.py +0 -0
  64. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/graph/runtime.py +0 -0
  65. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/graph/session_graph.py +0 -0
  66. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/graph/state.py +0 -0
  67. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/graph/supervisor.py +0 -0
  68. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/learning/__init__.py +0 -0
  69. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/learning/error_learner.py +0 -0
  70. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/learning/pattern_extractor.py +0 -0
  71. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/learning/quality_optimizer.py +0 -0
  72. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/learning/skill_synthesizer.py +0 -0
  73. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/llm/__init__.py +0 -0
  74. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/llm/base.py +0 -0
  75. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/llm/factory.py +0 -0
  76. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/llm/llama_cpp_llm.py +0 -0
  77. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/llm/openai.py +0 -0
  78. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/model_bootstrap.py +0 -0
  79. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/models/__init__.py +0 -0
  80. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/models/agent.py +0 -0
  81. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/models/base.py +0 -0
  82. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/models/checkpoint.py +0 -0
  83. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/models/client.py +0 -0
  84. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/models/document.py +0 -0
  85. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/models/error.py +0 -0
  86. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/models/graph.py +0 -0
  87. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/models/history.py +0 -0
  88. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/models/job.py +0 -0
  89. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/models/prompt.py +0 -0
  90. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/models/repository.py +0 -0
  91. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/models/rule.py +0 -0
  92. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/models/session.py +0 -0
  93. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/models/skill.py +0 -0
  94. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/models/user.py +0 -0
  95. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/models/workflow.py +0 -0
  96. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/observability/__init__.py +0 -0
  97. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/observability/audit.py +0 -0
  98. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/observability/logging.py +0 -0
  99. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/observability/metrics.py +0 -0
  100. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/observability/tracing.py +0 -0
  101. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/presentation/__init__.py +0 -0
  102. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/presentation/cli/__init__.py +0 -0
  103. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/presentation/cli/commands/agent.py +0 -0
  104. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/presentation/cli/commands/auth.py +0 -0
  105. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/presentation/cli/commands/mcp.py +0 -0
  106. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/presentation/cli/commands/sync.py +0 -0
  107. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/presentation/cli/commands/update.py +0 -0
  108. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/presentation/cli/main.py +0 -0
  109. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/presentation/cli/utils/common.py +0 -0
  110. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/presentation/cli/utils/config.py +0 -0
  111. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/presentation/cli/utils/git.py +0 -0
  112. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/presentation/cli/utils/version.py +0 -0
  113. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/presentation/http/__init__.py +0 -0
  114. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/presentation/http/admin/__init__.py +0 -0
  115. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/presentation/http/admin/agents.py +0 -0
  116. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/presentation/http/admin/api.py +0 -0
  117. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/presentation/http/admin/context.py +0 -0
  118. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/presentation/http/admin/dashboard.py +0 -0
  119. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/presentation/http/admin/jobs.py +0 -0
  120. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/presentation/http/admin/memories.py +0 -0
  121. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/presentation/http/admin/prompts.py +0 -0
  122. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/presentation/http/admin/routes.py +0 -0
  123. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/presentation/http/admin/search.py +0 -0
  124. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/presentation/http/admin/skills.py +0 -0
  125. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/prompts/__init__.py +0 -0
  126. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/prompts/formatter.py +0 -0
  127. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/resources/__init__.py +0 -0
  128. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/retrieval/__init__.py +0 -0
  129. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/retrieval/hybrid.py +0 -0
  130. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/retrieval/mmr.py +0 -0
  131. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/retrieval/multi_hop.py +0 -0
  132. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/runtime.py +0 -0
  133. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/server.py +0 -0
  134. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/store/__init__.py +0 -0
  135. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/store/document.py +0 -0
  136. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/store/error.py +0 -0
  137. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/store/feedback.py +0 -0
  138. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/store/graph.py +0 -0
  139. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/store/history.py +0 -0
  140. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/store/interfaces.py +0 -0
  141. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/store/qdrant/__init__.py +0 -0
  142. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/store/qdrant/client.py +0 -0
  143. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/store/qdrant/crud.py +0 -0
  144. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/store/qdrant/graph_store.py +0 -0
  145. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/store/qdrant/operational_store.py +0 -0
  146. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/store/qdrant/vector_store.py +0 -0
  147. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/store/relational.py +0 -0
  148. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/store/repo_state.py +0 -0
  149. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/store/rule.py +0 -0
  150. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/store/vector.py +0 -0
  151. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/tools/__init__.py +0 -0
  152. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/tools/agents.py +0 -0
  153. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/tools/auth.py +0 -0
  154. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/tools/graph.py +0 -0
  155. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/tools/ingest.py +0 -0
  156. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/tools/memory.py +0 -0
  157. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/tools/registry.py +0 -0
  158. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/tools/repo_scanner.py +0 -0
  159. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/tools/seeds/__init__.py +0 -0
  160. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/tools/seeds/default_agents.py +0 -0
  161. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/tools/session.py +0 -0
  162. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/tools/skills.py +0 -0
  163. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/tools/workflow.py +0 -0
  164. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/transport/__init__.py +0 -0
  165. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/transport/base.py +0 -0
  166. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/transport/stdio.py +0 -0
  167. {minder_cli-0.6.3 → minder_cli-0.6.4}/src/minder/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: minder-cli
3
- Version: 0.6.3
3
+ Version: 0.6.4
4
4
  Summary: Minder CLI is the command-line interface for the Minder self-hosted MCP platform.
5
5
  Project-URL: Homepage, https://github.com/hiimtrung/minder
6
6
  Project-URL: Repository, https://github.com/hiimtrung/minder
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "minder-cli"
7
- version = "0.6.3"
7
+ version = "0.6.4"
8
8
  description = "Minder CLI is the command-line interface for the Minder self-hosted MCP platform."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.14"
@@ -11,6 +11,7 @@ class ServerConfig(BaseModel):
11
11
  host: str = "0.0.0.0"
12
12
  port: int = 8800
13
13
  log_level: str = "info"
14
+ http_timeout_keep_alive: int = 10 # uvicorn keep-alive timeout (seconds)
14
15
 
15
16
 
16
17
  class DashboardConfig(BaseModel):
@@ -48,6 +49,8 @@ class LLMConfig(BaseModel):
48
49
  temperature: float = 0.1
49
50
  openai_api_key: Optional[str] = None
50
51
  openai_model: str = "gpt-4o-mini"
52
+ timeout_seconds: float = 120.0 # wall-clock budget per LLM call
53
+ max_concurrent: int = 1 # max simultaneous LLM inferences
51
54
 
52
55
 
53
56
  class VectorStoreConfig(BaseModel):
@@ -0,0 +1,169 @@
1
+ """Concurrency utilities for CPU-bound graph inference.
2
+
3
+ LLM inference (llama.cpp) is CPU-bound and can run for 5–30 s. Running it
4
+ directly on the asyncio event loop starves every other in-flight request.
5
+
6
+ This module provides:
7
+ - An asyncio.Semaphore that caps simultaneous LLM inferences.
8
+ - ``run_in_thread`` — wraps a blocking callable in asyncio.to_thread with an
9
+ optional timeout so the event loop stays responsive.
10
+ - ``stream_sync_generator`` — converts a blocking sync generator (e.g.
11
+ LLM token stream) into an async generator via a thread + queue, allowing
12
+ real token-by-token streaming without blocking the event loop.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import asyncio
18
+ import logging
19
+ from collections.abc import AsyncGenerator, Generator
20
+ from concurrent.futures import ThreadPoolExecutor
21
+ from typing import Any, Callable, TypeVar
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+ _semaphore: asyncio.Semaphore | None = None
26
+ _max_concurrent: int = 1
27
+ _timeout_seconds: float = 120.0
28
+
29
+ # Dedicated thread pool for LLM / embedding inference.
30
+ # Using a bounded pool prevents runaway thread creation under load.
31
+ _INFERENCE_POOL = ThreadPoolExecutor(max_workers=4, thread_name_prefix="minder-inference")
32
+
33
+ T = TypeVar("T")
34
+
35
+ _SENTINEL = object()
36
+
37
+
38
+ def configure(*, max_concurrent: int = 1, timeout_seconds: float = 120.0) -> None:
39
+ """Call once at startup to set inference concurrency and timeout budgets."""
40
+ global _max_concurrent, _timeout_seconds, _semaphore
41
+ _max_concurrent = max(1, max_concurrent)
42
+ _timeout_seconds = max(10.0, timeout_seconds)
43
+ _semaphore = asyncio.Semaphore(_max_concurrent)
44
+
45
+
46
+ def _get_semaphore() -> asyncio.Semaphore:
47
+ global _semaphore
48
+ if _semaphore is None:
49
+ _semaphore = asyncio.Semaphore(_max_concurrent)
50
+ return _semaphore
51
+
52
+
53
+ async def run_in_thread(
54
+ fn: Callable[..., T],
55
+ /,
56
+ *args: Any,
57
+ timeout: float | None = None,
58
+ use_llm_semaphore: bool = False,
59
+ ) -> T:
60
+ """Run a blocking callable in the inference thread pool.
61
+
62
+ Args:
63
+ fn: Blocking callable.
64
+ *args: Positional arguments forwarded to fn.
65
+ timeout: Maximum seconds to wait. Defaults to the configured global
66
+ timeout when ``use_llm_semaphore`` is True, otherwise no timeout.
67
+ use_llm_semaphore: Acquire the global LLM concurrency semaphore before
68
+ running. Use this for actual LLM inference calls so we never run
69
+ more than ``max_concurrent`` inferences simultaneously.
70
+ """
71
+ effective_timeout = timeout or (_timeout_seconds if use_llm_semaphore else None)
72
+
73
+ async def _inner() -> T:
74
+ loop = asyncio.get_running_loop()
75
+ return await loop.run_in_executor(_INFERENCE_POOL, fn, *args)
76
+
77
+ if use_llm_semaphore:
78
+ sem = _get_semaphore()
79
+ try:
80
+ async with sem:
81
+ if effective_timeout:
82
+ return await asyncio.wait_for(_inner(), timeout=effective_timeout)
83
+ return await _inner()
84
+ except asyncio.TimeoutError:
85
+ logger.warning("LLM inference timed out after %.0f s", effective_timeout)
86
+ raise
87
+ else:
88
+ if effective_timeout:
89
+ return await asyncio.wait_for(_inner(), timeout=effective_timeout)
90
+ return await _inner()
91
+
92
+
93
+ async def stream_sync_generator(
94
+ gen_fn: Callable[..., Generator[Any, None, None]],
95
+ /,
96
+ *args: Any,
97
+ timeout: float | None = None,
98
+ use_llm_semaphore: bool = True,
99
+ ) -> AsyncGenerator[Any, None]:
100
+ """Adapt a blocking sync generator into an async generator.
101
+
102
+ The generator runs inside the inference thread pool so the asyncio event
103
+ loop is never blocked. Items are forwarded through an asyncio.Queue so
104
+ consumers receive them as they are produced.
105
+
106
+ Usage::
107
+
108
+ async for event in stream_sync_generator(llm_node.stream, state):
109
+ yield event
110
+ """
111
+ loop = asyncio.get_running_loop()
112
+ queue: asyncio.Queue[Any] = asyncio.Queue(maxsize=64)
113
+ effective_timeout = timeout or (_timeout_seconds if use_llm_semaphore else None)
114
+
115
+ def _producer() -> None:
116
+ try:
117
+ for item in gen_fn(*args):
118
+ # Put items synchronously from the thread, waking up the consumer.
119
+ asyncio.run_coroutine_threadsafe(queue.put(item), loop).result()
120
+ except Exception as exc:
121
+ asyncio.run_coroutine_threadsafe(queue.put(exc), loop).result()
122
+ finally:
123
+ asyncio.run_coroutine_threadsafe(queue.put(_SENTINEL), loop).result()
124
+
125
+ async def _generate() -> AsyncGenerator[Any, None]:
126
+ future = loop.run_in_executor(_INFERENCE_POOL, _producer)
127
+ deadline = (
128
+ loop.time() + effective_timeout if effective_timeout else None
129
+ )
130
+ try:
131
+ while True:
132
+ remaining = (
133
+ max(0.1, deadline - loop.time()) if deadline else None
134
+ )
135
+ try:
136
+ item = await asyncio.wait_for(
137
+ queue.get(), timeout=remaining
138
+ )
139
+ except asyncio.TimeoutError:
140
+ logger.warning(
141
+ "LLM stream timed out after %.0f s", effective_timeout
142
+ )
143
+ future.cancel()
144
+ return
145
+ if item is _SENTINEL:
146
+ break
147
+ if isinstance(item, Exception):
148
+ raise item
149
+ yield item
150
+ finally:
151
+ # Drain queue to unblock any waiting producer thread.
152
+ while not queue.empty():
153
+ try:
154
+ queue.get_nowait()
155
+ except asyncio.QueueEmpty:
156
+ break
157
+ try:
158
+ await future
159
+ except Exception:
160
+ pass
161
+
162
+ sem = _get_semaphore()
163
+ if use_llm_semaphore:
164
+ async with sem:
165
+ async for item in _generate():
166
+ yield item
167
+ else:
168
+ async for item in _generate():
169
+ yield item
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import asyncio
3
4
  from dataclasses import dataclass, field
4
5
  import inspect
5
6
  import uuid
@@ -7,9 +8,11 @@ from typing import Any
7
8
 
8
9
  from minder.config import MinderConfig
9
10
  from minder.graph.checkpoint import MinderCheckpointSaver
11
+ from minder.graph.concurrency import run_in_thread
10
12
  from minder.graph.edges import determine_next_edge
11
13
  from minder.graph.nodes import (
12
14
  ClarificationNode,
15
+ ContextEnricherNode,
13
16
  EvaluatorNode,
14
17
  GuardNode,
15
18
  LLMNode,
@@ -41,6 +44,7 @@ class GraphNodes:
41
44
  evaluator: EvaluatorNode
42
45
  reranker: RerankerNode | None = field(default=None)
43
46
  reflection: ReflectionNode | None = field(default=None)
47
+ context_enricher: ContextEnricherNode | None = field(default=None)
44
48
 
45
49
 
46
50
  class InternalGraphExecutor:
@@ -52,22 +56,32 @@ class InternalGraphExecutor:
52
56
  state.metadata.setdefault("attempt_failures", [])
53
57
  state.metadata["orchestration_runtime"] = "internal"
54
58
  state = await self._nodes.workflow_planner.run(state)
55
- state = self._nodes.planning.run(state)
56
- state = self._nodes.clarification.run(state)
59
+ # Fast sync nodes — run in thread to yield control to the event loop
60
+ state = await run_in_thread(self._nodes.planning.run, state)
61
+ state = await run_in_thread(self._nodes.clarification.run, state)
57
62
  if state.metadata.get("needs_clarification"):
58
63
  return state
59
64
  state = await self._nodes.retriever.run(state)
60
65
  if self._nodes.reranker is not None:
61
66
  state = await self._nodes.reranker.run(state)
67
+ if self._nodes.context_enricher is not None:
68
+ state = await self._nodes.context_enricher.run(state)
62
69
 
63
70
  attempt = 0
64
71
  while True:
65
72
  attempt += 1
66
73
  state.retry_count = attempt - 1
67
- state = self._nodes.reasoning.run(state)
68
- state = self._nodes.llm.run(state)
69
- state = self._nodes.guard.run(state)
70
- state = self._nodes.verification.run(state)
74
+ # reasoning builds the prompt (CPU-bound string work)
75
+ state = await run_in_thread(self._nodes.reasoning.run, state)
76
+ # LLM inference is the main bottleneck — run in dedicated thread
77
+ # with semaphore + timeout so other requests keep moving
78
+ state = await run_in_thread(
79
+ self._nodes.llm.run,
80
+ state,
81
+ use_llm_semaphore=True,
82
+ )
83
+ state = await run_in_thread(self._nodes.guard.run, state)
84
+ state = await run_in_thread(self._nodes.verification.run, state)
71
85
  edge = determine_next_edge(state)
72
86
  state.transition_log.append(
73
87
  {
@@ -101,7 +115,7 @@ class InternalGraphExecutor:
101
115
  )
102
116
  state.metadata["retry_reason"] = retry_reason
103
117
 
104
- state = self._nodes.evaluator.run(state)
118
+ state = await run_in_thread(self._nodes.evaluator.run, state)
105
119
  state.metadata["edge"] = determine_next_edge(state)
106
120
 
107
121
  if self._nodes.reflection is not None:
@@ -228,6 +242,12 @@ class LangGraphExecutorAdapter:
228
242
  "reranker", self._wrap_state_handler(self._nodes.reranker.run)
229
243
  )
230
244
 
245
+ if self._nodes.context_enricher is not None:
246
+ workflow.add_node(
247
+ "context_enricher",
248
+ self._wrap_state_handler(self._nodes.context_enricher.run),
249
+ )
250
+
231
251
  workflow.add_node(
232
252
  "reasoning", self._wrap_state_handler(self._node_reasoning_wrapper)
233
253
  )
@@ -295,11 +315,18 @@ class LangGraphExecutorAdapter:
295
315
  else:
296
316
  retrieval_end_node = "retriever"
297
317
 
318
+ has_enricher = self._nodes.context_enricher is not None
298
319
  if self._nodes.reranker is not None:
299
320
  workflow.add_edge(retrieval_end_node, "reranker")
300
- workflow.add_edge("reranker", "reasoning")
321
+ post_retrieval_node = "reranker"
301
322
  else:
302
- workflow.add_edge(retrieval_end_node, "reasoning")
323
+ post_retrieval_node = retrieval_end_node
324
+
325
+ if has_enricher:
326
+ workflow.add_edge(post_retrieval_node, "context_enricher")
327
+ workflow.add_edge("context_enricher", "reasoning")
328
+ else:
329
+ workflow.add_edge(post_retrieval_node, "reasoning")
303
330
 
304
331
  workflow.add_edge("reasoning", "llm")
305
332
  workflow.add_edge("llm", "guard")
@@ -444,11 +471,16 @@ class LangGraphExecutorAdapter:
444
471
 
445
472
  @staticmethod
446
473
  def _wrap_state_handler(handler): # noqa: ANN001
474
+ is_async = inspect.iscoroutinefunction(handler)
475
+
447
476
  async def wrapped(state): # noqa: ANN001
448
477
  graph_state = GraphState.model_validate(state)
449
- result = handler(graph_state)
450
- if inspect.isawaitable(result):
451
- result = await result
478
+ if is_async:
479
+ result = await handler(graph_state)
480
+ else:
481
+ # Run blocking sync handlers in a thread pool to avoid
482
+ # stalling the event loop during CPU-bound LLM inference.
483
+ result = await asyncio.to_thread(handler, graph_state)
452
484
  if isinstance(result, GraphState):
453
485
  return dict(result)
454
486
  return result
@@ -6,6 +6,8 @@ from time import perf_counter
6
6
 
7
7
  from minder.config import MinderConfig
8
8
  from minder.embedding.local import LocalEmbeddingProvider
9
+ from minder.graph import concurrency as _concurrency
10
+ from minder.graph.concurrency import run_in_thread, stream_sync_generator
9
11
  from minder.graph.edges import determine_next_edge
10
12
  from minder.graph.executor import (
11
13
  GraphNodes,
@@ -14,6 +16,7 @@ from minder.graph.executor import (
14
16
  )
15
17
  from minder.graph.nodes import (
16
18
  ClarificationNode,
19
+ ContextEnricherNode,
17
20
  EvaluatorNode,
18
21
  GuardNode,
19
22
  LLMNode,
@@ -45,6 +48,7 @@ class MinderGraph:
45
48
  clarification: ClarificationNode | None = None,
46
49
  retriever: RetrieverNode | None = None,
47
50
  reranker: RerankerNode | None = None,
51
+ context_enricher: ContextEnricherNode | None = None,
48
52
  reasoning: ReasoningNode | None = None,
49
53
  llm: LLMNode | None = None,
50
54
  guard: GuardNode | None = None,
@@ -75,6 +79,7 @@ class MinderGraph:
75
79
  score_threshold=config.retrieval.similarity_threshold,
76
80
  )
77
81
  self._reranker = reranker # None by default; pass RerankerNode(...) to activate
82
+ self._context_enricher = context_enricher or ContextEnricherNode(store)
78
83
  self._reasoning = reasoning or ReasoningNode()
79
84
  self._llm = llm or LLMNode(
80
85
  primary=create_llm(config.llm),
@@ -94,12 +99,18 @@ class MinderGraph:
94
99
  self._error_store = error_store or store
95
100
  self._graph_tools = graph_tools
96
101
  self._cached_executor: InternalGraphExecutor | LangGraphExecutorAdapter | None = None
102
+ # Apply LLM concurrency and timeout settings from config
103
+ _concurrency.configure(
104
+ max_concurrent=config.llm.max_concurrent,
105
+ timeout_seconds=config.llm.timeout_seconds,
106
+ )
97
107
  self._nodes = GraphNodes(
98
108
  workflow_planner=self._workflow_planner,
99
109
  planning=self._planning,
100
110
  clarification=self._clarification,
101
111
  retriever=self._retriever,
102
112
  reranker=self._reranker,
113
+ context_enricher=self._context_enricher,
103
114
  reasoning=self._reasoning,
104
115
  llm=self._llm,
105
116
  guard=self._guard,
@@ -144,19 +155,32 @@ class MinderGraph:
144
155
  state = await self._nodes.retriever.run(state)
145
156
  if self._nodes.reranker is not None:
146
157
  state = await self._nodes.reranker.run(state)
158
+ if self._nodes.context_enricher is not None:
159
+ state = await self._nodes.context_enricher.run(state)
147
160
 
148
161
  attempt = 0
149
162
  while True:
150
163
  attempt += 1
151
164
  state.retry_count = attempt - 1
152
- state = self._nodes.reasoning.run(state)
165
+ state = await run_in_thread(self._nodes.reasoning.run, state)
153
166
  yield {"type": "attempt", "attempt": attempt}
154
- for event in self._nodes.llm.stream(state):
167
+ # Stream LLM tokens without blocking the event loop.
168
+ # stream_sync_generator runs the sync generator in the inference
169
+ # thread pool and forwards items through an asyncio.Queue.
170
+ async for event in stream_sync_generator(
171
+ self._nodes.llm.stream,
172
+ state,
173
+ use_llm_semaphore=True,
174
+ ):
155
175
  if str(event.get("type")) == "result":
176
+ # Capture the final LLM output written back to state
177
+ result_data = dict(event.get("result", {}) or {})
178
+ if result_data:
179
+ state.llm_output = result_data
156
180
  continue
157
181
  yield {**event, "attempt": attempt}
158
- state = self._nodes.guard.run(state)
159
- state = self._nodes.verification.run(state)
182
+ state = await run_in_thread(self._nodes.guard.run, state)
183
+ state = await run_in_thread(self._nodes.verification.run, state)
160
184
  edge = determine_next_edge(state)
161
185
  state.transition_log.append(
162
186
  {
@@ -196,7 +220,7 @@ class MinderGraph:
196
220
  "edge": edge,
197
221
  }
198
222
 
199
- state = self._nodes.evaluator.run(state)
223
+ state = await run_in_thread(self._nodes.evaluator.run, state)
200
224
  state.metadata["edge"] = determine_next_edge(state)
201
225
  await self._persist_history(state)
202
226
  await self._persist_error_if_needed(state)
@@ -1,4 +1,5 @@
1
1
  from .clarification import ClarificationNode
2
+ from .context_enricher import ContextEnricherNode
2
3
  from .evaluator import EvaluatorNode
3
4
  from .guard import GuardNode
4
5
  from .llm import LLMNode
@@ -17,6 +18,7 @@ from .workflow_planner import WorkflowPlannerNode
17
18
 
18
19
  __all__ = [
19
20
  "ClarificationNode",
21
+ "ContextEnricherNode",
20
22
  "DockerSandboxRunner",
21
23
  "EvaluatorNode",
22
24
  "GuardNode",
@@ -0,0 +1,186 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from typing import Any
5
+
6
+ from minder.graph.state import GraphState
7
+ from minder.store.interfaces import IOperationalStore
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ # Nouns that identify a data type the user is asking about
12
+ _SKILL_NOUNS = frozenset({
13
+ "skill", "skills", "kỹ năng", "snippet", "snippets", "function",
14
+ "method", "pattern", "utility", "helper", "code pattern",
15
+ })
16
+ _MEMORY_NOUNS = frozenset({
17
+ "memory", "memories", "note", "notes", "tài liệu", "kiến thức",
18
+ "ghi chú", "lưu ý", "reminder", "fact", "facts",
19
+ })
20
+ _ERROR_NOUNS = frozenset({
21
+ "error", "errors", "bug", "bugs", "exception", "lỗi", "issue",
22
+ "issues", "problem", "problems", "crash",
23
+ })
24
+
25
+ # Verbs / phrases that signal the user wants analysis / enumeration
26
+ _ANALYSIS_VERBS = frozenset({
27
+ "analyze", "analysis", "phân tích", "tóm tắt", "summarize", "summary",
28
+ "list", "liệt kê", "show", "give me", "tôi có", "what", "how many",
29
+ "xem", "tất cả", "all", "overview", "review", "explain",
30
+ "mô tả", "kể", "nêu", "describe", "tell me", "breakdown",
31
+ })
32
+
33
+ # Tech tags that can appear as query words
34
+ _KNOWN_TAGS = [
35
+ "backend", "frontend", "api", "database", "auth", "authentication",
36
+ "authorization", "testing", "deployment", "ci", "cd", "docker",
37
+ "kubernetes", "k8s", "python", "javascript", "typescript", "react",
38
+ "fastapi", "django", "flask", "sqlalchemy", "redis", "postgresql",
39
+ "sqlite", "mongodb", "async", "microservice", "security", "logging",
40
+ "monitoring", "refactor", "pattern", "utility", "helper", "caching",
41
+ ]
42
+
43
+ _MAX_ENRICHED_ITEMS = 30
44
+ _MAX_CONTENT_CHARS = 1200
45
+
46
+
47
+ def _query_lower(state: GraphState) -> str:
48
+ return str(state.query or "").lower()
49
+
50
+
51
+ def _hits(query: str, keywords: frozenset[str]) -> bool:
52
+ return any(kw in query for kw in keywords)
53
+
54
+
55
+ def _extract_tag_hints(query: str) -> list[str]:
56
+ return [tag for tag in _KNOWN_TAGS if tag in query]
57
+
58
+
59
+ class ContextEnricherNode:
60
+ """Fetch structured store data (skills, memories, errors) when the query
61
+ requests analysis or enumeration of those items.
62
+
63
+ The vector retriever only searches ingested code documents. Skills and
64
+ memories live in a separate table and are never seen by the LLM unless
65
+ explicitly fetched here. This node detects the intent and populates
66
+ ``state.metadata["enriched_context"]`` before the reasoning node builds
67
+ the LLM prompt.
68
+ """
69
+
70
+ def __init__(self, store: IOperationalStore) -> None:
71
+ self._store = store
72
+
73
+ async def run(self, state: GraphState) -> GraphState:
74
+ query = _query_lower(state)
75
+
76
+ wants_skills = _hits(query, _SKILL_NOUNS)
77
+ wants_memories = _hits(query, _MEMORY_NOUNS)
78
+ wants_errors = _hits(query, _ERROR_NOUNS)
79
+
80
+ # An explicit data-type noun is required — analysis verbs alone are not enough
81
+ # to avoid false positives on general questions (e.g. "what is X?").
82
+ if not (wants_skills or wants_memories or wants_errors):
83
+ return state
84
+
85
+ tag_hints = _extract_tag_hints(query)
86
+ enriched: list[dict[str, Any]] = []
87
+
88
+ if wants_skills:
89
+ enriched += await self._fetch_skills(state, tag_hints)
90
+
91
+ if wants_memories:
92
+ enriched += await self._fetch_memories(state, tag_hints)
93
+
94
+ if wants_errors:
95
+ enriched += await self._fetch_errors()
96
+
97
+ if enriched:
98
+ state.metadata["enriched_context"] = enriched
99
+ logger.debug(
100
+ "ContextEnricher: %d items fetched for query %r",
101
+ len(enriched),
102
+ state.query[:80],
103
+ )
104
+
105
+ return state
106
+
107
+ async def _fetch_skills(
108
+ self, state: GraphState, tag_hints: list[str]
109
+ ) -> list[dict[str, Any]]:
110
+ try:
111
+ items = await self._store.list_skills_by_kind(
112
+ is_memory=False,
113
+ owner_id=state.user_id,
114
+ )
115
+ except Exception as exc:
116
+ logger.debug("ContextEnricher.list_skills failed: %s", exc)
117
+ return []
118
+ return _format_items(items, tag_hints, item_type="skill")
119
+
120
+ async def _fetch_memories(
121
+ self, state: GraphState, tag_hints: list[str]
122
+ ) -> list[dict[str, Any]]:
123
+ try:
124
+ items = await self._store.list_skills_by_kind(
125
+ is_memory=True,
126
+ owner_id=state.user_id,
127
+ )
128
+ except Exception as exc:
129
+ logger.debug("ContextEnricher.list_memories failed: %s", exc)
130
+ return []
131
+ return _format_items(items, tag_hints, item_type="memory")
132
+
133
+ async def _fetch_errors(self) -> list[dict[str, Any]]:
134
+ try:
135
+ errors = await self._store.list_errors()
136
+ except Exception as exc:
137
+ logger.debug("ContextEnricher.list_errors failed: %s", exc)
138
+ return []
139
+ return [
140
+ {
141
+ "type": "error",
142
+ "title": str(getattr(e, "error_code", "") or ""),
143
+ "content": str(getattr(e, "error_message", "") or ""),
144
+ "tags": [],
145
+ "quality_score": 0.0,
146
+ "language": "",
147
+ }
148
+ for e in errors[:_MAX_ENRICHED_ITEMS]
149
+ ]
150
+
151
+
152
+ def _relevance(item: Any, tag_hints: list[str]) -> float:
153
+ tags = [t.lower() for t in (getattr(item, "tags", None) or [])]
154
+ tag_score = sum(1.5 for hint in tag_hints if hint in tags)
155
+ return tag_score + float(getattr(item, "quality_score", 0) or 0)
156
+
157
+
158
+ def _format_items(
159
+ items: list[Any], tag_hints: list[str], *, item_type: str
160
+ ) -> list[dict[str, Any]]:
161
+ scored = sorted(items, key=lambda it: _relevance(it, tag_hints), reverse=True)
162
+
163
+ # When tag hints given, prefer tag-matching items; fall back to all
164
+ if tag_hints:
165
+ matched = [
166
+ it for it in scored
167
+ if any(
168
+ h in [t.lower() for t in (getattr(it, "tags", None) or [])]
169
+ for h in tag_hints
170
+ )
171
+ ]
172
+ pool = matched if matched else scored
173
+ else:
174
+ pool = scored
175
+
176
+ return [
177
+ {
178
+ "type": item_type,
179
+ "title": str(getattr(item, "title", "") or ""),
180
+ "content": str(getattr(item, "content", "") or "")[:_MAX_CONTENT_CHARS],
181
+ "tags": list(getattr(item, "tags", None) or []),
182
+ "quality_score": float(getattr(item, "quality_score", 0) or 0),
183
+ "language": str(getattr(item, "language", "") or ""),
184
+ }
185
+ for item in pool[:_MAX_ENRICHED_ITEMS]
186
+ ]