minder-cli 0.4.9__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. {minder_cli-0.4.9 → minder_cli-0.5.0}/PKG-INFO +2 -3
  2. {minder_cli-0.4.9 → minder_cli-0.5.0}/pyproject.toml +2 -3
  3. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/config.py +8 -10
  4. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/continuity.py +1 -1
  5. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/embedding/local.py +53 -33
  6. minder_cli-0.5.0/src/minder/graph/executor.py +247 -0
  7. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/graph.py +2 -2
  8. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/llm/__init__.py +2 -2
  9. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/llm/factory.py +7 -10
  10. minder_cli-0.5.0/src/minder/llm/llama_cpp_llm.py +257 -0
  11. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/http/admin/context.py +2 -2
  12. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/http/admin/memories.py +2 -2
  13. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/prompts/formatter.py +1 -1
  14. minder_cli-0.5.0/src/minder/runtime.py +66 -0
  15. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/server.py +11 -22
  16. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/tools/memory.py +2 -2
  17. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/tools/query.py +2 -2
  18. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/tools/skills.py +2 -2
  19. minder_cli-0.4.9/src/minder/graph/executor.py +0 -138
  20. minder_cli-0.4.9/src/minder/llm/litert.py +0 -331
  21. minder_cli-0.4.9/src/minder/runtime.py +0 -15
  22. {minder_cli-0.4.9 → minder_cli-0.5.0}/.gitignore +0 -0
  23. {minder_cli-0.4.9 → minder_cli-0.5.0}/LICENSE +0 -0
  24. {minder_cli-0.4.9 → minder_cli-0.5.0}/README-pypi.md +0 -0
  25. {minder_cli-0.4.9 → minder_cli-0.5.0}/README.md +0 -0
  26. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/__init__.py +0 -0
  27. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/api/routers/prompts.py +0 -0
  28. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/application/__init__.py +0 -0
  29. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/application/admin/__init__.py +0 -0
  30. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/application/admin/dto.py +0 -0
  31. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/application/admin/jobs.py +0 -0
  32. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/application/admin/use_cases.py +0 -0
  33. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/auth/__init__.py +0 -0
  34. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/auth/context.py +0 -0
  35. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/auth/middleware.py +0 -0
  36. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/auth/principal.py +0 -0
  37. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/auth/rate_limiter.py +0 -0
  38. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/auth/rbac.py +0 -0
  39. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/auth/service.py +0 -0
  40. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/bootstrap/__init__.py +0 -0
  41. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/bootstrap/providers.py +0 -0
  42. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/bootstrap/transport.py +0 -0
  43. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/cache/__init__.py +0 -0
  44. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/cache/providers.py +0 -0
  45. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/chunking/__init__.py +0 -0
  46. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/chunking/code_splitter.py +0 -0
  47. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/chunking/splitter.py +0 -0
  48. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/cli.py +0 -0
  49. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/context_compactor.py +0 -0
  50. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/dev.py +0 -0
  51. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/embedding/__init__.py +0 -0
  52. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/embedding/base.py +0 -0
  53. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/embedding/openai.py +0 -0
  54. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/__init__.py +0 -0
  55. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/edges.py +0 -0
  56. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/nodes/__init__.py +0 -0
  57. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/nodes/clarification.py +0 -0
  58. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/nodes/evaluator.py +0 -0
  59. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/nodes/guard.py +0 -0
  60. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/nodes/llm.py +0 -0
  61. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/nodes/planning.py +0 -0
  62. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/nodes/reasoning.py +0 -0
  63. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/nodes/reflection.py +0 -0
  64. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/nodes/reranker.py +0 -0
  65. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/nodes/retriever.py +0 -0
  66. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/nodes/verification.py +0 -0
  67. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/nodes/workflow_planner.py +0 -0
  68. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/runtime.py +0 -0
  69. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/state.py +0 -0
  70. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/learning/__init__.py +0 -0
  71. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/learning/error_learner.py +0 -0
  72. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/learning/pattern_extractor.py +0 -0
  73. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/learning/quality_optimizer.py +0 -0
  74. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/learning/skill_synthesizer.py +0 -0
  75. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/llm/base.py +0 -0
  76. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/llm/openai.py +0 -0
  77. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/models/__init__.py +0 -0
  78. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/models/base.py +0 -0
  79. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/models/client.py +0 -0
  80. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/models/document.py +0 -0
  81. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/models/error.py +0 -0
  82. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/models/graph.py +0 -0
  83. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/models/history.py +0 -0
  84. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/models/job.py +0 -0
  85. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/models/prompt.py +0 -0
  86. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/models/repository.py +0 -0
  87. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/models/rule.py +0 -0
  88. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/models/session.py +0 -0
  89. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/models/skill.py +0 -0
  90. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/models/user.py +0 -0
  91. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/models/workflow.py +0 -0
  92. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/observability/__init__.py +0 -0
  93. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/observability/audit.py +0 -0
  94. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/observability/logging.py +0 -0
  95. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/observability/metrics.py +0 -0
  96. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/observability/tracing.py +0 -0
  97. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/__init__.py +0 -0
  98. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/cli/__init__.py +0 -0
  99. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/cli/commands/agent.py +0 -0
  100. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/cli/commands/auth.py +0 -0
  101. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/cli/commands/ide.py +0 -0
  102. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/cli/commands/mcp.py +0 -0
  103. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/cli/commands/sync.py +0 -0
  104. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/cli/commands/update.py +0 -0
  105. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/cli/main.py +0 -0
  106. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/cli/utils/common.py +0 -0
  107. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/cli/utils/config.py +0 -0
  108. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/cli/utils/git.py +0 -0
  109. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/cli/utils/version.py +0 -0
  110. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/http/__init__.py +0 -0
  111. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/http/admin/__init__.py +0 -0
  112. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/http/admin/api.py +0 -0
  113. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/http/admin/dashboard.py +0 -0
  114. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/http/admin/jobs.py +0 -0
  115. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/http/admin/prompts.py +0 -0
  116. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/http/admin/routes.py +0 -0
  117. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/http/admin/runtime.py +0 -0
  118. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/http/admin/search.py +0 -0
  119. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/http/admin/skills.py +0 -0
  120. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/prompts/__init__.py +0 -0
  121. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/resources/__init__.py +0 -0
  122. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/retrieval/__init__.py +0 -0
  123. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/retrieval/hybrid.py +0 -0
  124. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/retrieval/mmr.py +0 -0
  125. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/retrieval/multi_hop.py +0 -0
  126. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/__init__.py +0 -0
  127. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/document.py +0 -0
  128. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/error.py +0 -0
  129. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/feedback.py +0 -0
  130. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/graph.py +0 -0
  131. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/history.py +0 -0
  132. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/interfaces.py +0 -0
  133. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/milvus/__init__.py +0 -0
  134. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/milvus/client.py +0 -0
  135. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/milvus/collections.py +0 -0
  136. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/milvus/vector_store.py +0 -0
  137. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/mongodb/__init__.py +0 -0
  138. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/mongodb/client.py +0 -0
  139. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/mongodb/graph_store.py +0 -0
  140. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/mongodb/indexes.py +0 -0
  141. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/mongodb/operational_store.py +0 -0
  142. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/relational.py +0 -0
  143. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/repo_state.py +0 -0
  144. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/rule.py +0 -0
  145. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/vector.py +0 -0
  146. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/tools/__init__.py +0 -0
  147. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/tools/auth.py +0 -0
  148. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/tools/graph.py +0 -0
  149. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/tools/ingest.py +0 -0
  150. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/tools/registry.py +0 -0
  151. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/tools/repo_scanner.py +0 -0
  152. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/tools/search.py +0 -0
  153. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/tools/session.py +0 -0
  154. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/tools/workflow.py +0 -0
  155. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/transport/__init__.py +0 -0
  156. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/transport/base.py +0 -0
  157. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/transport/sse.py +0 -0
  158. {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/transport/stdio.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: minder-cli
3
- Version: 0.4.9
3
+ Version: 0.5.0
4
4
  Summary: Minder CLI is the command-line interface for the Minder self-hosted MCP platform.
5
5
  Project-URL: Homepage, https://github.com/hiimtrung/minder
6
6
  Project-URL: Repository, https://github.com/hiimtrung/minder
@@ -19,10 +19,9 @@ Requires-Dist: fastapi>=0.136.0
19
19
  Requires-Dist: httpx>=0.28.0
20
20
  Provides-Extra: server
21
21
  Requires-Dist: aiosqlite>=0.21.0; extra == 'server'
22
- Requires-Dist: fastembed>=0.5.1; extra == 'server'
23
22
  Requires-Dist: langgraph>=1.1.8; extra == 'server'
24
23
  Requires-Dist: litellm>=1.83.1; extra == 'server'
25
- Requires-Dist: litert-lm-api-nightly>=0.10; extra == 'server'
24
+ Requires-Dist: llama-cpp-python>=0.3.7; extra == 'server'
26
25
  Requires-Dist: mcp>=1.26.0; extra == 'server'
27
26
  Requires-Dist: motor>=3.7.0; extra == 'server'
28
27
  Requires-Dist: passlib[bcrypt]>=1.7.4; extra == 'server'
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "minder-cli"
7
- version = "0.4.9"
7
+ version = "0.5.0"
8
8
  description = "Minder CLI is the command-line interface for the Minder self-hosted MCP platform."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.14"
@@ -26,10 +26,9 @@ dependencies = [
26
26
  [project.optional-dependencies]
27
27
  server = [
28
28
  "aiosqlite>=0.21.0",
29
- "fastembed>=0.5.1",
30
29
  "langgraph>=1.1.8",
31
30
  "litellm>=1.83.1",
32
- "litert-lm-api-nightly>=0.10",
31
+ "llama-cpp-python>=0.3.7",
33
32
  "mcp>=1.26.0",
34
33
  "motor>=3.7.0",
35
34
  "passlib[bcrypt]>=1.7.4",
@@ -31,21 +31,19 @@ class AuthConfig(BaseModel):
31
31
 
32
32
 
33
33
  class EmbeddingConfig(BaseModel):
34
- provider: str = "fastembed"
35
- runtime: str = "auto" # "auto" | "fastembed" | "mock"
36
- fastembed_model: str = "mixedbread-ai/mxbai-embed-large-v1"
37
- fastembed_cache_dir: str = "~/.minder/cache/fastembed"
38
- dimensions: int = 1024
34
+ provider: str = "llama_cpp"
35
+ runtime: str = "auto" # "auto" | "llama_cpp" | "mock"
36
+ llama_cpp_model_repo: str = "ggml-org/embeddinggemma-300M-GGUF"
37
+ llama_cpp_model_file: str = "*.gguf"
38
+ dimensions: int = 768
39
39
  openai_api_key: Optional[str] = None
40
40
  openai_model: str = "text-embedding-3-small"
41
41
 
42
42
 
43
43
  class LLMConfig(BaseModel):
44
- provider: str = "litert" # "litert" | "openai"
45
- # LiteRT-LM fields
46
- litert_model_path: str = "~/.minder/models/gemma-4-E2B-it.litertlm"
47
- litert_backend: str = "auto" # "auto" (GPU on Mac, CPU elsewhere) | "cpu" | "gpu"
48
- litert_cache_dir: str = "~/.minder/cache/litert"
44
+ provider: str = "llama_cpp" # "llama_cpp" | "openai"
45
+ llama_cpp_model_repo: str = "ggml-org/gemma-4-E2B-it-GGUF"
46
+ llama_cpp_model_file: str = "*.gguf"
49
47
  context_length: int = 16384
50
48
  temperature: float = 0.1
51
49
  openai_api_key: Optional[str] = None
@@ -325,7 +325,7 @@ class ContinuitySynthesizer:
325
325
  ).items()
326
326
  },
327
327
  }, {
328
- "provider": "litert_lm",
328
+ "provider": self._config.llm.provider,
329
329
  "model": self._config.llm.provider,
330
330
  "runtime": self._llm.runtime,
331
331
  }
@@ -1,5 +1,5 @@
1
1
  """
2
- Local Embedding provider — delegates to FastEmbed using ONNX runtime.
2
+ Local Embedding provider — delegates to llama-cpp-python using GGUF models.
3
3
 
4
4
  Falls back to a deterministic hash-based stub if initialization fails.
5
5
  """
@@ -11,9 +11,10 @@ import hashlib
11
11
  import logging
12
12
  import math
13
13
  from collections import OrderedDict
14
- from pathlib import Path
15
14
  from typing import Any
16
15
 
16
+ from minder.runtime import llama_cpp_usable
17
+
17
18
  logger = logging.getLogger(__name__)
18
19
 
19
20
 
@@ -26,42 +27,53 @@ MAX_TEXT_LENGTH = 8000 # Safety truncation to avoid over-context (~2000 tokens)
26
27
  class LocalEmbeddingProvider:
27
28
  def __init__(
28
29
  self,
29
- fastembed_model: str = "mixedbread-ai/mxbai-embed-large-v1",
30
- fastembed_cache_dir: str = "~/.minder/cache/fastembed",
31
- dimensions: int = 1024,
30
+ llama_cpp_model_repo: str = "ggml-org/embeddinggemma-300M-GGUF",
31
+ llama_cpp_model_file: str = "*Q4_K_M.gguf",
32
+ dimensions: int = 768,
32
33
  runtime: str = "auto",
33
34
  ) -> None:
34
- self._model_name = fastembed_model
35
- self._cache_dir = str(Path(fastembed_cache_dir).expanduser())
35
+ self._model_repo = llama_cpp_model_repo
36
+ self._model_file = llama_cpp_model_file
36
37
  self._dimensions = dimensions
37
38
  self._runtime = runtime
38
39
  self._model: Any | None = None
39
- self._init_model()
40
+ self._initialized = False
41
+
42
+ def _ensure_initialized(self) -> None:
43
+ if not self._initialized:
44
+ self._init_model()
45
+ self._initialized = True
40
46
 
41
47
  def _init_model(self) -> None:
42
48
  if self._runtime == "mock":
43
49
  return
44
50
 
45
- cache_key = f"{self._model_name}:{self._cache_dir}"
51
+ cache_key = f"{self._model_repo}:{self._model_file}"
46
52
  if cache_key in _MODEL_CACHE:
47
53
  self._model = _MODEL_CACHE[cache_key]
48
54
  return
49
55
 
56
+ if not llama_cpp_usable():
57
+ logger.warning(
58
+ "CPU does not support AVX2; llama.cpp unavailable. Using mock embedding."
59
+ )
60
+ return
61
+
50
62
  try:
51
- from fastembed import TextEmbedding # type: ignore[import-not-found]
52
-
53
- # Optimize for speed and resource usage:
54
- # - threads=4 limits CPU usage while maintaining good throughput
55
- # - lazy_load=False ensures first request is fast
56
- self._model = TextEmbedding(
57
- model_name=self._model_name,
58
- cache_dir=self._cache_dir,
59
- threads=4,
63
+ from llama_cpp import Llama
64
+
65
+ logger.info("Initializing Llama.cpp embedding engine for %s", self._model_repo)
66
+ self._model = Llama.from_pretrained(
67
+ repo_id=self._model_repo,
68
+ filename=self._model_file,
69
+ embedding=True,
70
+ verbose=False,
60
71
  )
61
72
  _MODEL_CACHE[cache_key] = self._model
62
73
  except Exception as e:
63
74
  logger.warning(
64
- f"Failed to initialize FastEmbed model {self._model_name}: {e}. Using mock."
75
+ "Failed to initialize Llama.cpp model %s: %s. Using mock.",
76
+ self._model_repo, e,
65
77
  )
66
78
  self._model = None
67
79
 
@@ -69,9 +81,10 @@ class LocalEmbeddingProvider:
69
81
  def runtime(self) -> str:
70
82
  if self._runtime != "auto":
71
83
  return self._runtime
72
- return "fastembed" if self._model is not None else "mock"
84
+ return "llama_cpp" if self._model is not None else "mock"
73
85
 
74
86
  def embed(self, text: str) -> list[float]:
87
+ self._ensure_initialized()
75
88
  if not text:
76
89
  return [0.0] * self._dimensions
77
90
 
@@ -85,16 +98,14 @@ class LocalEmbeddingProvider:
85
98
 
86
99
  # 3. Perform embedding
87
100
  embedding: list[float]
88
- if self.runtime == "fastembed" and self._model is not None:
101
+ if self.runtime == "llama_cpp" and self._model is not None:
89
102
  try:
90
- # FastEmbed returns a generator of numpy arrays
91
- embeddings = list(self._model.embed([safe_text]))
92
- if embeddings:
93
- embedding = embeddings[0].tolist()[: self._dimensions]
94
- else:
95
- embedding = self._hash_embed(safe_text)
103
+ # llama_cpp returns a dict with 'data'
104
+ result = self._model.create_embedding(safe_text)
105
+ vector = result["data"][0]["embedding"]
106
+ embedding = vector[: self._dimensions]
96
107
  except Exception as e:
97
- logger.warning(f"FastEmbed failed during inference: {e}")
108
+ logger.warning(f"Llama.cpp failed during embedding inference: {e}")
98
109
  embedding = self._hash_embed(safe_text)
99
110
  else:
100
111
  embedding = self._hash_embed(safe_text)
@@ -107,6 +118,7 @@ class LocalEmbeddingProvider:
107
118
  return embedding
108
119
 
109
120
  def embed_many(self, texts: list[str]) -> list[list[float]]:
121
+ self._ensure_initialized()
110
122
  if not texts:
111
123
  return []
112
124
 
@@ -130,17 +142,19 @@ class LocalEmbeddingProvider:
130
142
  return results
131
143
 
132
144
  # 2. Batch embed the missing ones
133
- if self.runtime == "fastembed" and self._model is not None:
145
+ if self.runtime == "llama_cpp" and self._model is not None:
134
146
  try:
135
- embeddings = list(self._model.embed(to_embed_texts))
147
+ # pass list of strings directly
148
+ res = self._model.create_embedding(to_embed_texts)
149
+ embeddings = [data["embedding"] for data in res["data"]]
136
150
  for i, emb in enumerate(embeddings):
137
151
  idx = to_embed_indices[i]
138
- vector = emb.tolist()[: self._dimensions]
152
+ vector = emb[: self._dimensions]
139
153
  results[idx] = vector
140
154
  # Update cache
141
155
  _EMBEDDING_CACHE[to_embed_texts[i]] = vector
142
156
  except Exception as e:
143
- logger.warning(f"FastEmbed batch failed: {e}")
157
+ logger.warning(f"Llama.cpp batch embedding failed: {e}")
144
158
  for i, idx in enumerate(to_embed_indices):
145
159
  vector = self._hash_embed(to_embed_texts[i])
146
160
  results[idx] = vector
@@ -178,7 +192,13 @@ class LocalEmbeddingProvider:
178
192
  def clear_caches() -> None:
179
193
  """Clear global model and embedding caches to reclaim memory."""
180
194
  global _MODEL_CACHE, _EMBEDDING_CACHE
195
+ for model in _MODEL_CACHE.values():
196
+ try:
197
+ if hasattr(model, "close"):
198
+ model.close()
199
+ except Exception:
200
+ pass
181
201
  _MODEL_CACHE.clear()
182
202
  _EMBEDDING_CACHE.clear()
183
203
  gc.collect()
184
- logger.debug("Cleared FastEmbed global caches.")
204
+ logger.debug("Cleared Llama.cpp embedding global caches.")
@@ -0,0 +1,247 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+
5
+ from minder.graph.edges import determine_next_edge
6
+ from minder.graph.nodes import (
7
+ ClarificationNode,
8
+ EvaluatorNode,
9
+ GuardNode,
10
+ LLMNode,
11
+ PlanningNode,
12
+ ReasoningNode,
13
+ RerankerNode,
14
+ ReflectionNode,
15
+ RetrieverNode,
16
+ VerificationNode,
17
+ WorkflowPlannerNode,
18
+ )
19
+ from minder.graph.runtime import graph_runtime_name, load_langgraph_state_graph
20
+ from minder.graph.state import GraphState
21
+
22
+
23
+ @dataclass
24
+ class GraphNodes:
25
+ workflow_planner: WorkflowPlannerNode
26
+ planning: PlanningNode
27
+ clarification: ClarificationNode
28
+ retriever: RetrieverNode
29
+ reasoning: ReasoningNode
30
+ llm: LLMNode
31
+ guard: GuardNode
32
+ verification: VerificationNode
33
+ evaluator: EvaluatorNode
34
+ reranker: RerankerNode | None = field(default=None)
35
+ reflection: ReflectionNode | None = field(default=None)
36
+
37
+
38
+ class InternalGraphExecutor:
39
+ def __init__(self, nodes: GraphNodes) -> None:
40
+ self._nodes = nodes
41
+
42
+ async def run(self, state: GraphState) -> GraphState:
43
+ max_attempts = int(state.metadata.get("max_attempts", 1))
44
+ state.metadata.setdefault("attempt_failures", [])
45
+ state.metadata["orchestration_runtime"] = "internal"
46
+ state = await self._nodes.workflow_planner.run(state)
47
+ state = self._nodes.planning.run(state)
48
+ state = self._nodes.clarification.run(state)
49
+ if state.metadata.get("needs_clarification"):
50
+ return state
51
+ state = await self._nodes.retriever.run(state)
52
+ if self._nodes.reranker is not None:
53
+ state = await self._nodes.reranker.run(state)
54
+
55
+ attempt = 0
56
+ while True:
57
+ attempt += 1
58
+ state.retry_count = attempt - 1
59
+ state = self._nodes.reasoning.run(state)
60
+ state = self._nodes.llm.run(state)
61
+ state = self._nodes.guard.run(state)
62
+ state = self._nodes.verification.run(state)
63
+ edge = determine_next_edge(state)
64
+ state.transition_log.append(
65
+ {
66
+ "attempt": attempt,
67
+ "edge": edge,
68
+ "provider": state.llm_output.get("provider"),
69
+ "fallback_used": state.metadata.get("fallback_used", False),
70
+ }
71
+ )
72
+ if (
73
+ edge not in {"verification_failed", "guard_failed"}
74
+ or attempt >= max_attempts
75
+ ):
76
+ break
77
+ retry_reason = (
78
+ "; ".join(
79
+ str(reason)
80
+ for reason in state.guard_result.get("reasons", [])
81
+ if reason
82
+ )
83
+ if edge == "guard_failed"
84
+ else state.verification_result.get("stderr", "verification failed")
85
+ )
86
+ state.metadata["attempt_failures"].append(
87
+ {
88
+ "attempt": attempt,
89
+ "reason": retry_reason,
90
+ "provider": state.llm_output.get("provider"),
91
+ "edge": edge,
92
+ }
93
+ )
94
+ state.metadata["retry_reason"] = retry_reason
95
+
96
+ state = self._nodes.evaluator.run(state)
97
+ state.metadata["edge"] = determine_next_edge(state)
98
+
99
+ if self._nodes.reflection is not None:
100
+ state = await self._nodes.reflection.run(state)
101
+
102
+ return state
103
+
104
+
105
+ class LangGraphExecutorAdapter:
106
+ def __init__(self, nodes: GraphNodes) -> None:
107
+ self._nodes = nodes
108
+ self._internal = InternalGraphExecutor(nodes)
109
+ self._compiled_graph = None
110
+
111
+ async def run(self, state: GraphState) -> GraphState:
112
+ if graph_runtime_name() != "langgraph":
113
+ state = await self._internal.run(state)
114
+ state.metadata["orchestration_runtime"] = "internal"
115
+ return state
116
+
117
+ compiled = self._compiled_graph or self._build_compiled_graph()
118
+ self._compiled_graph = compiled
119
+
120
+ # StateGraph invocation
121
+ state.metadata.setdefault("attempt_failures", [])
122
+ result = await compiled.ainvoke(state)
123
+
124
+ if isinstance(result, GraphState):
125
+ result.metadata["orchestration_runtime"] = "langgraph"
126
+ result.metadata["edge"] = determine_next_edge(result)
127
+ return result
128
+ validated = GraphState.model_validate(result)
129
+ validated.metadata["orchestration_runtime"] = "langgraph"
130
+ validated.metadata["edge"] = determine_next_edge(validated)
131
+ return validated
132
+
133
+ def _build_compiled_graph(self):
134
+ state_graph_cls = load_langgraph_state_graph()
135
+ if state_graph_cls is None:
136
+ raise RuntimeError(
137
+ "LangGraph runtime requested but StateGraph is unavailable"
138
+ )
139
+
140
+ workflow = state_graph_cls(GraphState)
141
+
142
+ # 1. Add nodes
143
+ workflow.add_node("workflow_planner", self._nodes.workflow_planner.run)
144
+ workflow.add_node("planning", self._nodes.planning.run)
145
+ workflow.add_node("clarification", self._nodes.clarification.run)
146
+ workflow.add_node("retriever", self._nodes.retriever.run)
147
+
148
+ if self._nodes.reranker is not None:
149
+ workflow.add_node("reranker", self._nodes.reranker.run)
150
+
151
+ workflow.add_node("reasoning", self._node_reasoning_wrapper)
152
+ workflow.add_node("llm", self._nodes.llm.run)
153
+ workflow.add_node("guard", self._nodes.guard.run)
154
+ workflow.add_node("verification", self._nodes.verification.run)
155
+ workflow.add_node("evaluator", self._nodes.evaluator.run)
156
+
157
+ if self._nodes.reflection is not None:
158
+ workflow.add_node("reflection", self._nodes.reflection.run)
159
+
160
+ # 2. Add Edges
161
+ workflow.set_entry_point("workflow_planner")
162
+ workflow.add_edge("workflow_planner", "planning")
163
+ workflow.add_edge("planning", "clarification")
164
+
165
+ def clarification_router(state: GraphState) -> str:
166
+ if state.metadata.get("needs_clarification"):
167
+ return "END"
168
+ return "retriever"
169
+
170
+ workflow.add_conditional_edges("clarification", clarification_router, {"END": "__end__", "retriever": "retriever"})
171
+
172
+ if self._nodes.reranker is not None:
173
+ workflow.add_edge("retriever", "reranker")
174
+ workflow.add_edge("reranker", "reasoning")
175
+ else:
176
+ workflow.add_edge("retriever", "reasoning")
177
+
178
+ workflow.add_edge("reasoning", "llm")
179
+ workflow.add_edge("llm", "guard")
180
+ workflow.add_edge("guard", "verification")
181
+
182
+ def record_transition(state: GraphState) -> GraphState:
183
+ attempt = state.retry_count + 1
184
+ edge = determine_next_edge(state)
185
+
186
+ new_log = list(state.transition_log)
187
+ new_log.append(
188
+ {
189
+ "attempt": attempt,
190
+ "edge": edge,
191
+ "provider": state.llm_output.get("provider"),
192
+ "fallback_used": state.metadata.get("fallback_used", False),
193
+ }
194
+ )
195
+ state.transition_log = new_log
196
+
197
+ max_attempts = int(state.metadata.get("max_attempts", 1))
198
+ if edge in {"verification_failed", "guard_failed"} and attempt < max_attempts:
199
+ retry_reason = (
200
+ "; ".join(
201
+ str(reason)
202
+ for reason in state.guard_result.get("reasons", [])
203
+ if reason
204
+ )
205
+ if edge == "guard_failed"
206
+ else state.verification_result.get("stderr", "verification failed")
207
+ )
208
+ if "attempt_failures" not in state.metadata:
209
+ state.metadata["attempt_failures"] = []
210
+
211
+ state.metadata["attempt_failures"].append(
212
+ {
213
+ "attempt": attempt,
214
+ "reason": retry_reason,
215
+ "provider": state.llm_output.get("provider"),
216
+ "edge": edge,
217
+ }
218
+ )
219
+ state.metadata["retry_reason"] = retry_reason
220
+ return state
221
+
222
+ workflow.add_node("record_transition", record_transition)
223
+ workflow.add_edge("verification", "record_transition")
224
+
225
+ def check_attempt_loop(state: GraphState) -> str:
226
+ max_attempts = int(state.metadata.get("max_attempts", 1))
227
+ attempt = state.retry_count + 1
228
+ edge = determine_next_edge(state)
229
+ if edge not in {"verification_failed", "guard_failed"} or attempt >= max_attempts:
230
+ return "evaluator"
231
+ return "reasoning"
232
+
233
+ workflow.add_conditional_edges("record_transition", check_attempt_loop, {"reasoning": "reasoning", "evaluator": "evaluator"})
234
+
235
+ if self._nodes.reflection is not None:
236
+ workflow.add_edge("evaluator", "reflection")
237
+ workflow.add_edge("reflection", "__end__")
238
+ else:
239
+ workflow.add_edge("evaluator", "__end__")
240
+
241
+ return workflow.compile()
242
+
243
+ def _node_reasoning_wrapper(self, state: GraphState) -> GraphState:
244
+ if "attempt_failures" not in state.metadata:
245
+ state.metadata["attempt_failures"] = []
246
+ state.retry_count = len(state.metadata["attempt_failures"])
247
+ return self._nodes.reasoning.run(state)
@@ -61,8 +61,8 @@ class MinderGraph:
61
61
  self._clarification = clarification or ClarificationNode()
62
62
  vector_store = VectorStore(store, store)
63
63
  embedder = LocalEmbeddingProvider(
64
- fastembed_model=config.embedding.fastembed_model,
65
- fastembed_cache_dir=config.embedding.fastembed_cache_dir,
64
+ llama_cpp_model_repo=config.embedding.llama_cpp_model_repo,
65
+ llama_cpp_model_file=config.embedding.llama_cpp_model_file,
66
66
  dimensions=config.embedding.dimensions,
67
67
  runtime="auto",
68
68
  )
@@ -1,11 +1,11 @@
1
1
  from .base import LLMClient
2
2
  from .factory import create_llm
3
- from .litert import LiteRTModelLLM
3
+ from .llama_cpp_llm import LlamaCppLLM
4
4
  from .openai import OpenAIFallbackLLM
5
5
 
6
6
  __all__ = [
7
7
  "LLMClient",
8
- "LiteRTModelLLM",
8
+ "LlamaCppLLM",
9
9
  "OpenAIFallbackLLM",
10
10
  "create_llm",
11
11
  ]
@@ -2,8 +2,7 @@
2
2
  LLM provider factory — selects the correct provider based on config.
3
3
 
4
4
  Supported providers:
5
- - ``litert``: LiteRT-LM (on-device, recommended for local LLM)
6
-
5
+ - ``llama_cpp``: llama-cpp-python GGUF inference (on-device, recommended for local LLM)
7
6
  - ``openai``: OpenAI-compatible cloud API
8
7
  """
9
8
 
@@ -14,18 +13,16 @@ from minder.config import LLMConfig
14
13
 
15
14
  def create_llm(config: LLMConfig): # type: ignore[no-untyped-def]
16
15
  """Create an LLM client from the given configuration."""
17
- if config.provider == "litert":
18
- from minder.llm.litert import LiteRTModelLLM
16
+ if config.provider == "llama_cpp":
17
+ from minder.llm.llama_cpp_llm import LlamaCppLLM
19
18
 
20
- return LiteRTModelLLM(
21
- model_path=config.litert_model_path,
22
- backend=config.litert_backend,
23
- cache_dir=config.litert_cache_dir,
19
+ return LlamaCppLLM(
20
+ model_repo=config.llama_cpp_model_repo,
21
+ model_file=config.llama_cpp_model_file,
24
22
  context_length=config.context_length,
23
+ temperature=config.temperature,
25
24
  )
26
25
 
27
-
28
-
29
26
  if config.provider == "openai":
30
27
  from minder.llm.openai import OpenAIFallbackLLM
31
28