minder-cli 0.4.9__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {minder_cli-0.4.9 → minder_cli-0.5.0}/PKG-INFO +2 -3
- {minder_cli-0.4.9 → minder_cli-0.5.0}/pyproject.toml +2 -3
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/config.py +8 -10
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/continuity.py +1 -1
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/embedding/local.py +53 -33
- minder_cli-0.5.0/src/minder/graph/executor.py +247 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/graph.py +2 -2
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/llm/__init__.py +2 -2
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/llm/factory.py +7 -10
- minder_cli-0.5.0/src/minder/llm/llama_cpp_llm.py +257 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/http/admin/context.py +2 -2
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/http/admin/memories.py +2 -2
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/prompts/formatter.py +1 -1
- minder_cli-0.5.0/src/minder/runtime.py +66 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/server.py +11 -22
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/tools/memory.py +2 -2
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/tools/query.py +2 -2
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/tools/skills.py +2 -2
- minder_cli-0.4.9/src/minder/graph/executor.py +0 -138
- minder_cli-0.4.9/src/minder/llm/litert.py +0 -331
- minder_cli-0.4.9/src/minder/runtime.py +0 -15
- {minder_cli-0.4.9 → minder_cli-0.5.0}/.gitignore +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/LICENSE +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/README-pypi.md +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/README.md +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/__init__.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/api/routers/prompts.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/application/__init__.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/application/admin/__init__.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/application/admin/dto.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/application/admin/jobs.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/application/admin/use_cases.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/auth/__init__.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/auth/context.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/auth/middleware.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/auth/principal.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/auth/rate_limiter.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/auth/rbac.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/auth/service.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/bootstrap/__init__.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/bootstrap/providers.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/bootstrap/transport.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/cache/__init__.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/cache/providers.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/chunking/__init__.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/chunking/code_splitter.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/chunking/splitter.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/cli.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/context_compactor.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/dev.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/embedding/__init__.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/embedding/base.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/embedding/openai.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/__init__.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/edges.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/nodes/__init__.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/nodes/clarification.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/nodes/evaluator.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/nodes/guard.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/nodes/llm.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/nodes/planning.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/nodes/reasoning.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/nodes/reflection.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/nodes/reranker.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/nodes/retriever.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/nodes/verification.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/nodes/workflow_planner.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/runtime.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/graph/state.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/learning/__init__.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/learning/error_learner.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/learning/pattern_extractor.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/learning/quality_optimizer.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/learning/skill_synthesizer.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/llm/base.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/llm/openai.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/models/__init__.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/models/base.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/models/client.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/models/document.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/models/error.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/models/graph.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/models/history.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/models/job.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/models/prompt.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/models/repository.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/models/rule.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/models/session.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/models/skill.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/models/user.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/models/workflow.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/observability/__init__.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/observability/audit.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/observability/logging.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/observability/metrics.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/observability/tracing.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/__init__.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/cli/__init__.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/cli/commands/agent.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/cli/commands/auth.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/cli/commands/ide.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/cli/commands/mcp.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/cli/commands/sync.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/cli/commands/update.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/cli/main.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/cli/utils/common.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/cli/utils/config.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/cli/utils/git.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/cli/utils/version.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/http/__init__.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/http/admin/__init__.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/http/admin/api.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/http/admin/dashboard.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/http/admin/jobs.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/http/admin/prompts.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/http/admin/routes.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/http/admin/runtime.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/http/admin/search.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/presentation/http/admin/skills.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/prompts/__init__.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/resources/__init__.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/retrieval/__init__.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/retrieval/hybrid.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/retrieval/mmr.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/retrieval/multi_hop.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/__init__.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/document.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/error.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/feedback.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/graph.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/history.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/interfaces.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/milvus/__init__.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/milvus/client.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/milvus/collections.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/milvus/vector_store.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/mongodb/__init__.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/mongodb/client.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/mongodb/graph_store.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/mongodb/indexes.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/mongodb/operational_store.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/relational.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/repo_state.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/rule.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/store/vector.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/tools/__init__.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/tools/auth.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/tools/graph.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/tools/ingest.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/tools/registry.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/tools/repo_scanner.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/tools/search.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/tools/session.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/tools/workflow.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/transport/__init__.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/transport/base.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/transport/sse.py +0 -0
- {minder_cli-0.4.9 → minder_cli-0.5.0}/src/minder/transport/stdio.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: minder-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: Minder CLI is the command-line interface for the Minder self-hosted MCP platform.
|
|
5
5
|
Project-URL: Homepage, https://github.com/hiimtrung/minder
|
|
6
6
|
Project-URL: Repository, https://github.com/hiimtrung/minder
|
|
@@ -19,10 +19,9 @@ Requires-Dist: fastapi>=0.136.0
|
|
|
19
19
|
Requires-Dist: httpx>=0.28.0
|
|
20
20
|
Provides-Extra: server
|
|
21
21
|
Requires-Dist: aiosqlite>=0.21.0; extra == 'server'
|
|
22
|
-
Requires-Dist: fastembed>=0.5.1; extra == 'server'
|
|
23
22
|
Requires-Dist: langgraph>=1.1.8; extra == 'server'
|
|
24
23
|
Requires-Dist: litellm>=1.83.1; extra == 'server'
|
|
25
|
-
Requires-Dist:
|
|
24
|
+
Requires-Dist: llama-cpp-python>=0.3.7; extra == 'server'
|
|
26
25
|
Requires-Dist: mcp>=1.26.0; extra == 'server'
|
|
27
26
|
Requires-Dist: motor>=3.7.0; extra == 'server'
|
|
28
27
|
Requires-Dist: passlib[bcrypt]>=1.7.4; extra == 'server'
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "minder-cli"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.5.0"
|
|
8
8
|
description = "Minder CLI is the command-line interface for the Minder self-hosted MCP platform."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.14"
|
|
@@ -26,10 +26,9 @@ dependencies = [
|
|
|
26
26
|
[project.optional-dependencies]
|
|
27
27
|
server = [
|
|
28
28
|
"aiosqlite>=0.21.0",
|
|
29
|
-
"fastembed>=0.5.1",
|
|
30
29
|
"langgraph>=1.1.8",
|
|
31
30
|
"litellm>=1.83.1",
|
|
32
|
-
"
|
|
31
|
+
"llama-cpp-python>=0.3.7",
|
|
33
32
|
"mcp>=1.26.0",
|
|
34
33
|
"motor>=3.7.0",
|
|
35
34
|
"passlib[bcrypt]>=1.7.4",
|
|
@@ -31,21 +31,19 @@ class AuthConfig(BaseModel):
|
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
class EmbeddingConfig(BaseModel):
|
|
34
|
-
provider: str = "
|
|
35
|
-
runtime: str = "auto" # "auto" | "
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
dimensions: int =
|
|
34
|
+
provider: str = "llama_cpp"
|
|
35
|
+
runtime: str = "auto" # "auto" | "llama_cpp" | "mock"
|
|
36
|
+
llama_cpp_model_repo: str = "ggml-org/embeddinggemma-300M-GGUF"
|
|
37
|
+
llama_cpp_model_file: str = "*.gguf"
|
|
38
|
+
dimensions: int = 768
|
|
39
39
|
openai_api_key: Optional[str] = None
|
|
40
40
|
openai_model: str = "text-embedding-3-small"
|
|
41
41
|
|
|
42
42
|
|
|
43
43
|
class LLMConfig(BaseModel):
|
|
44
|
-
provider: str = "
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
litert_backend: str = "auto" # "auto" (GPU on Mac, CPU elsewhere) | "cpu" | "gpu"
|
|
48
|
-
litert_cache_dir: str = "~/.minder/cache/litert"
|
|
44
|
+
provider: str = "llama_cpp" # "llama_cpp" | "openai"
|
|
45
|
+
llama_cpp_model_repo: str = "ggml-org/gemma-4-E2B-it-GGUF"
|
|
46
|
+
llama_cpp_model_file: str = "*.gguf"
|
|
49
47
|
context_length: int = 16384
|
|
50
48
|
temperature: float = 0.1
|
|
51
49
|
openai_api_key: Optional[str] = None
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Local Embedding provider — delegates to
|
|
2
|
+
Local Embedding provider — delegates to llama-cpp-python using GGUF models.
|
|
3
3
|
|
|
4
4
|
Falls back to a deterministic hash-based stub if initialization fails.
|
|
5
5
|
"""
|
|
@@ -11,9 +11,10 @@ import hashlib
|
|
|
11
11
|
import logging
|
|
12
12
|
import math
|
|
13
13
|
from collections import OrderedDict
|
|
14
|
-
from pathlib import Path
|
|
15
14
|
from typing import Any
|
|
16
15
|
|
|
16
|
+
from minder.runtime import llama_cpp_usable
|
|
17
|
+
|
|
17
18
|
logger = logging.getLogger(__name__)
|
|
18
19
|
|
|
19
20
|
|
|
@@ -26,42 +27,53 @@ MAX_TEXT_LENGTH = 8000 # Safety truncation to avoid over-context (~2000 tokens)
|
|
|
26
27
|
class LocalEmbeddingProvider:
|
|
27
28
|
def __init__(
|
|
28
29
|
self,
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
dimensions: int =
|
|
30
|
+
llama_cpp_model_repo: str = "ggml-org/embeddinggemma-300M-GGUF",
|
|
31
|
+
llama_cpp_model_file: str = "*Q4_K_M.gguf",
|
|
32
|
+
dimensions: int = 768,
|
|
32
33
|
runtime: str = "auto",
|
|
33
34
|
) -> None:
|
|
34
|
-
self.
|
|
35
|
-
self.
|
|
35
|
+
self._model_repo = llama_cpp_model_repo
|
|
36
|
+
self._model_file = llama_cpp_model_file
|
|
36
37
|
self._dimensions = dimensions
|
|
37
38
|
self._runtime = runtime
|
|
38
39
|
self._model: Any | None = None
|
|
39
|
-
self.
|
|
40
|
+
self._initialized = False
|
|
41
|
+
|
|
42
|
+
def _ensure_initialized(self) -> None:
|
|
43
|
+
if not self._initialized:
|
|
44
|
+
self._init_model()
|
|
45
|
+
self._initialized = True
|
|
40
46
|
|
|
41
47
|
def _init_model(self) -> None:
|
|
42
48
|
if self._runtime == "mock":
|
|
43
49
|
return
|
|
44
50
|
|
|
45
|
-
cache_key = f"{self.
|
|
51
|
+
cache_key = f"{self._model_repo}:{self._model_file}"
|
|
46
52
|
if cache_key in _MODEL_CACHE:
|
|
47
53
|
self._model = _MODEL_CACHE[cache_key]
|
|
48
54
|
return
|
|
49
55
|
|
|
56
|
+
if not llama_cpp_usable():
|
|
57
|
+
logger.warning(
|
|
58
|
+
"CPU does not support AVX2; llama.cpp unavailable. Using mock embedding."
|
|
59
|
+
)
|
|
60
|
+
return
|
|
61
|
+
|
|
50
62
|
try:
|
|
51
|
-
from
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
threads=4,
|
|
63
|
+
from llama_cpp import Llama
|
|
64
|
+
|
|
65
|
+
logger.info("Initializing Llama.cpp embedding engine for %s", self._model_repo)
|
|
66
|
+
self._model = Llama.from_pretrained(
|
|
67
|
+
repo_id=self._model_repo,
|
|
68
|
+
filename=self._model_file,
|
|
69
|
+
embedding=True,
|
|
70
|
+
verbose=False,
|
|
60
71
|
)
|
|
61
72
|
_MODEL_CACHE[cache_key] = self._model
|
|
62
73
|
except Exception as e:
|
|
63
74
|
logger.warning(
|
|
64
|
-
|
|
75
|
+
"Failed to initialize Llama.cpp model %s: %s. Using mock.",
|
|
76
|
+
self._model_repo, e,
|
|
65
77
|
)
|
|
66
78
|
self._model = None
|
|
67
79
|
|
|
@@ -69,9 +81,10 @@ class LocalEmbeddingProvider:
|
|
|
69
81
|
def runtime(self) -> str:
|
|
70
82
|
if self._runtime != "auto":
|
|
71
83
|
return self._runtime
|
|
72
|
-
return "
|
|
84
|
+
return "llama_cpp" if self._model is not None else "mock"
|
|
73
85
|
|
|
74
86
|
def embed(self, text: str) -> list[float]:
|
|
87
|
+
self._ensure_initialized()
|
|
75
88
|
if not text:
|
|
76
89
|
return [0.0] * self._dimensions
|
|
77
90
|
|
|
@@ -85,16 +98,14 @@ class LocalEmbeddingProvider:
|
|
|
85
98
|
|
|
86
99
|
# 3. Perform embedding
|
|
87
100
|
embedding: list[float]
|
|
88
|
-
if self.runtime == "
|
|
101
|
+
if self.runtime == "llama_cpp" and self._model is not None:
|
|
89
102
|
try:
|
|
90
|
-
#
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
else:
|
|
95
|
-
embedding = self._hash_embed(safe_text)
|
|
103
|
+
# llama_cpp returns a dict with 'data'
|
|
104
|
+
result = self._model.create_embedding(safe_text)
|
|
105
|
+
vector = result["data"][0]["embedding"]
|
|
106
|
+
embedding = vector[: self._dimensions]
|
|
96
107
|
except Exception as e:
|
|
97
|
-
logger.warning(f"
|
|
108
|
+
logger.warning(f"Llama.cpp failed during embedding inference: {e}")
|
|
98
109
|
embedding = self._hash_embed(safe_text)
|
|
99
110
|
else:
|
|
100
111
|
embedding = self._hash_embed(safe_text)
|
|
@@ -107,6 +118,7 @@ class LocalEmbeddingProvider:
|
|
|
107
118
|
return embedding
|
|
108
119
|
|
|
109
120
|
def embed_many(self, texts: list[str]) -> list[list[float]]:
|
|
121
|
+
self._ensure_initialized()
|
|
110
122
|
if not texts:
|
|
111
123
|
return []
|
|
112
124
|
|
|
@@ -130,17 +142,19 @@ class LocalEmbeddingProvider:
|
|
|
130
142
|
return results
|
|
131
143
|
|
|
132
144
|
# 2. Batch embed the missing ones
|
|
133
|
-
if self.runtime == "
|
|
145
|
+
if self.runtime == "llama_cpp" and self._model is not None:
|
|
134
146
|
try:
|
|
135
|
-
|
|
147
|
+
# pass list of strings directly
|
|
148
|
+
res = self._model.create_embedding(to_embed_texts)
|
|
149
|
+
embeddings = [data["embedding"] for data in res["data"]]
|
|
136
150
|
for i, emb in enumerate(embeddings):
|
|
137
151
|
idx = to_embed_indices[i]
|
|
138
|
-
vector = emb
|
|
152
|
+
vector = emb[: self._dimensions]
|
|
139
153
|
results[idx] = vector
|
|
140
154
|
# Update cache
|
|
141
155
|
_EMBEDDING_CACHE[to_embed_texts[i]] = vector
|
|
142
156
|
except Exception as e:
|
|
143
|
-
logger.warning(f"
|
|
157
|
+
logger.warning(f"Llama.cpp batch embedding failed: {e}")
|
|
144
158
|
for i, idx in enumerate(to_embed_indices):
|
|
145
159
|
vector = self._hash_embed(to_embed_texts[i])
|
|
146
160
|
results[idx] = vector
|
|
@@ -178,7 +192,13 @@ class LocalEmbeddingProvider:
|
|
|
178
192
|
def clear_caches() -> None:
|
|
179
193
|
"""Clear global model and embedding caches to reclaim memory."""
|
|
180
194
|
global _MODEL_CACHE, _EMBEDDING_CACHE
|
|
195
|
+
for model in _MODEL_CACHE.values():
|
|
196
|
+
try:
|
|
197
|
+
if hasattr(model, "close"):
|
|
198
|
+
model.close()
|
|
199
|
+
except Exception:
|
|
200
|
+
pass
|
|
181
201
|
_MODEL_CACHE.clear()
|
|
182
202
|
_EMBEDDING_CACHE.clear()
|
|
183
203
|
gc.collect()
|
|
184
|
-
logger.debug("Cleared
|
|
204
|
+
logger.debug("Cleared Llama.cpp embedding global caches.")
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
|
|
5
|
+
from minder.graph.edges import determine_next_edge
|
|
6
|
+
from minder.graph.nodes import (
|
|
7
|
+
ClarificationNode,
|
|
8
|
+
EvaluatorNode,
|
|
9
|
+
GuardNode,
|
|
10
|
+
LLMNode,
|
|
11
|
+
PlanningNode,
|
|
12
|
+
ReasoningNode,
|
|
13
|
+
RerankerNode,
|
|
14
|
+
ReflectionNode,
|
|
15
|
+
RetrieverNode,
|
|
16
|
+
VerificationNode,
|
|
17
|
+
WorkflowPlannerNode,
|
|
18
|
+
)
|
|
19
|
+
from minder.graph.runtime import graph_runtime_name, load_langgraph_state_graph
|
|
20
|
+
from minder.graph.state import GraphState
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class GraphNodes:
|
|
25
|
+
workflow_planner: WorkflowPlannerNode
|
|
26
|
+
planning: PlanningNode
|
|
27
|
+
clarification: ClarificationNode
|
|
28
|
+
retriever: RetrieverNode
|
|
29
|
+
reasoning: ReasoningNode
|
|
30
|
+
llm: LLMNode
|
|
31
|
+
guard: GuardNode
|
|
32
|
+
verification: VerificationNode
|
|
33
|
+
evaluator: EvaluatorNode
|
|
34
|
+
reranker: RerankerNode | None = field(default=None)
|
|
35
|
+
reflection: ReflectionNode | None = field(default=None)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class InternalGraphExecutor:
|
|
39
|
+
def __init__(self, nodes: GraphNodes) -> None:
|
|
40
|
+
self._nodes = nodes
|
|
41
|
+
|
|
42
|
+
async def run(self, state: GraphState) -> GraphState:
|
|
43
|
+
max_attempts = int(state.metadata.get("max_attempts", 1))
|
|
44
|
+
state.metadata.setdefault("attempt_failures", [])
|
|
45
|
+
state.metadata["orchestration_runtime"] = "internal"
|
|
46
|
+
state = await self._nodes.workflow_planner.run(state)
|
|
47
|
+
state = self._nodes.planning.run(state)
|
|
48
|
+
state = self._nodes.clarification.run(state)
|
|
49
|
+
if state.metadata.get("needs_clarification"):
|
|
50
|
+
return state
|
|
51
|
+
state = await self._nodes.retriever.run(state)
|
|
52
|
+
if self._nodes.reranker is not None:
|
|
53
|
+
state = await self._nodes.reranker.run(state)
|
|
54
|
+
|
|
55
|
+
attempt = 0
|
|
56
|
+
while True:
|
|
57
|
+
attempt += 1
|
|
58
|
+
state.retry_count = attempt - 1
|
|
59
|
+
state = self._nodes.reasoning.run(state)
|
|
60
|
+
state = self._nodes.llm.run(state)
|
|
61
|
+
state = self._nodes.guard.run(state)
|
|
62
|
+
state = self._nodes.verification.run(state)
|
|
63
|
+
edge = determine_next_edge(state)
|
|
64
|
+
state.transition_log.append(
|
|
65
|
+
{
|
|
66
|
+
"attempt": attempt,
|
|
67
|
+
"edge": edge,
|
|
68
|
+
"provider": state.llm_output.get("provider"),
|
|
69
|
+
"fallback_used": state.metadata.get("fallback_used", False),
|
|
70
|
+
}
|
|
71
|
+
)
|
|
72
|
+
if (
|
|
73
|
+
edge not in {"verification_failed", "guard_failed"}
|
|
74
|
+
or attempt >= max_attempts
|
|
75
|
+
):
|
|
76
|
+
break
|
|
77
|
+
retry_reason = (
|
|
78
|
+
"; ".join(
|
|
79
|
+
str(reason)
|
|
80
|
+
for reason in state.guard_result.get("reasons", [])
|
|
81
|
+
if reason
|
|
82
|
+
)
|
|
83
|
+
if edge == "guard_failed"
|
|
84
|
+
else state.verification_result.get("stderr", "verification failed")
|
|
85
|
+
)
|
|
86
|
+
state.metadata["attempt_failures"].append(
|
|
87
|
+
{
|
|
88
|
+
"attempt": attempt,
|
|
89
|
+
"reason": retry_reason,
|
|
90
|
+
"provider": state.llm_output.get("provider"),
|
|
91
|
+
"edge": edge,
|
|
92
|
+
}
|
|
93
|
+
)
|
|
94
|
+
state.metadata["retry_reason"] = retry_reason
|
|
95
|
+
|
|
96
|
+
state = self._nodes.evaluator.run(state)
|
|
97
|
+
state.metadata["edge"] = determine_next_edge(state)
|
|
98
|
+
|
|
99
|
+
if self._nodes.reflection is not None:
|
|
100
|
+
state = await self._nodes.reflection.run(state)
|
|
101
|
+
|
|
102
|
+
return state
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class LangGraphExecutorAdapter:
|
|
106
|
+
def __init__(self, nodes: GraphNodes) -> None:
|
|
107
|
+
self._nodes = nodes
|
|
108
|
+
self._internal = InternalGraphExecutor(nodes)
|
|
109
|
+
self._compiled_graph = None
|
|
110
|
+
|
|
111
|
+
async def run(self, state: GraphState) -> GraphState:
|
|
112
|
+
if graph_runtime_name() != "langgraph":
|
|
113
|
+
state = await self._internal.run(state)
|
|
114
|
+
state.metadata["orchestration_runtime"] = "internal"
|
|
115
|
+
return state
|
|
116
|
+
|
|
117
|
+
compiled = self._compiled_graph or self._build_compiled_graph()
|
|
118
|
+
self._compiled_graph = compiled
|
|
119
|
+
|
|
120
|
+
# StateGraph invocation
|
|
121
|
+
state.metadata.setdefault("attempt_failures", [])
|
|
122
|
+
result = await compiled.ainvoke(state)
|
|
123
|
+
|
|
124
|
+
if isinstance(result, GraphState):
|
|
125
|
+
result.metadata["orchestration_runtime"] = "langgraph"
|
|
126
|
+
result.metadata["edge"] = determine_next_edge(result)
|
|
127
|
+
return result
|
|
128
|
+
validated = GraphState.model_validate(result)
|
|
129
|
+
validated.metadata["orchestration_runtime"] = "langgraph"
|
|
130
|
+
validated.metadata["edge"] = determine_next_edge(validated)
|
|
131
|
+
return validated
|
|
132
|
+
|
|
133
|
+
def _build_compiled_graph(self):
|
|
134
|
+
state_graph_cls = load_langgraph_state_graph()
|
|
135
|
+
if state_graph_cls is None:
|
|
136
|
+
raise RuntimeError(
|
|
137
|
+
"LangGraph runtime requested but StateGraph is unavailable"
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
workflow = state_graph_cls(GraphState)
|
|
141
|
+
|
|
142
|
+
# 1. Add nodes
|
|
143
|
+
workflow.add_node("workflow_planner", self._nodes.workflow_planner.run)
|
|
144
|
+
workflow.add_node("planning", self._nodes.planning.run)
|
|
145
|
+
workflow.add_node("clarification", self._nodes.clarification.run)
|
|
146
|
+
workflow.add_node("retriever", self._nodes.retriever.run)
|
|
147
|
+
|
|
148
|
+
if self._nodes.reranker is not None:
|
|
149
|
+
workflow.add_node("reranker", self._nodes.reranker.run)
|
|
150
|
+
|
|
151
|
+
workflow.add_node("reasoning", self._node_reasoning_wrapper)
|
|
152
|
+
workflow.add_node("llm", self._nodes.llm.run)
|
|
153
|
+
workflow.add_node("guard", self._nodes.guard.run)
|
|
154
|
+
workflow.add_node("verification", self._nodes.verification.run)
|
|
155
|
+
workflow.add_node("evaluator", self._nodes.evaluator.run)
|
|
156
|
+
|
|
157
|
+
if self._nodes.reflection is not None:
|
|
158
|
+
workflow.add_node("reflection", self._nodes.reflection.run)
|
|
159
|
+
|
|
160
|
+
# 2. Add Edges
|
|
161
|
+
workflow.set_entry_point("workflow_planner")
|
|
162
|
+
workflow.add_edge("workflow_planner", "planning")
|
|
163
|
+
workflow.add_edge("planning", "clarification")
|
|
164
|
+
|
|
165
|
+
def clarification_router(state: GraphState) -> str:
|
|
166
|
+
if state.metadata.get("needs_clarification"):
|
|
167
|
+
return "END"
|
|
168
|
+
return "retriever"
|
|
169
|
+
|
|
170
|
+
workflow.add_conditional_edges("clarification", clarification_router, {"END": "__end__", "retriever": "retriever"})
|
|
171
|
+
|
|
172
|
+
if self._nodes.reranker is not None:
|
|
173
|
+
workflow.add_edge("retriever", "reranker")
|
|
174
|
+
workflow.add_edge("reranker", "reasoning")
|
|
175
|
+
else:
|
|
176
|
+
workflow.add_edge("retriever", "reasoning")
|
|
177
|
+
|
|
178
|
+
workflow.add_edge("reasoning", "llm")
|
|
179
|
+
workflow.add_edge("llm", "guard")
|
|
180
|
+
workflow.add_edge("guard", "verification")
|
|
181
|
+
|
|
182
|
+
def record_transition(state: GraphState) -> GraphState:
|
|
183
|
+
attempt = state.retry_count + 1
|
|
184
|
+
edge = determine_next_edge(state)
|
|
185
|
+
|
|
186
|
+
new_log = list(state.transition_log)
|
|
187
|
+
new_log.append(
|
|
188
|
+
{
|
|
189
|
+
"attempt": attempt,
|
|
190
|
+
"edge": edge,
|
|
191
|
+
"provider": state.llm_output.get("provider"),
|
|
192
|
+
"fallback_used": state.metadata.get("fallback_used", False),
|
|
193
|
+
}
|
|
194
|
+
)
|
|
195
|
+
state.transition_log = new_log
|
|
196
|
+
|
|
197
|
+
max_attempts = int(state.metadata.get("max_attempts", 1))
|
|
198
|
+
if edge in {"verification_failed", "guard_failed"} and attempt < max_attempts:
|
|
199
|
+
retry_reason = (
|
|
200
|
+
"; ".join(
|
|
201
|
+
str(reason)
|
|
202
|
+
for reason in state.guard_result.get("reasons", [])
|
|
203
|
+
if reason
|
|
204
|
+
)
|
|
205
|
+
if edge == "guard_failed"
|
|
206
|
+
else state.verification_result.get("stderr", "verification failed")
|
|
207
|
+
)
|
|
208
|
+
if "attempt_failures" not in state.metadata:
|
|
209
|
+
state.metadata["attempt_failures"] = []
|
|
210
|
+
|
|
211
|
+
state.metadata["attempt_failures"].append(
|
|
212
|
+
{
|
|
213
|
+
"attempt": attempt,
|
|
214
|
+
"reason": retry_reason,
|
|
215
|
+
"provider": state.llm_output.get("provider"),
|
|
216
|
+
"edge": edge,
|
|
217
|
+
}
|
|
218
|
+
)
|
|
219
|
+
state.metadata["retry_reason"] = retry_reason
|
|
220
|
+
return state
|
|
221
|
+
|
|
222
|
+
workflow.add_node("record_transition", record_transition)
|
|
223
|
+
workflow.add_edge("verification", "record_transition")
|
|
224
|
+
|
|
225
|
+
def check_attempt_loop(state: GraphState) -> str:
|
|
226
|
+
max_attempts = int(state.metadata.get("max_attempts", 1))
|
|
227
|
+
attempt = state.retry_count + 1
|
|
228
|
+
edge = determine_next_edge(state)
|
|
229
|
+
if edge not in {"verification_failed", "guard_failed"} or attempt >= max_attempts:
|
|
230
|
+
return "evaluator"
|
|
231
|
+
return "reasoning"
|
|
232
|
+
|
|
233
|
+
workflow.add_conditional_edges("record_transition", check_attempt_loop, {"reasoning": "reasoning", "evaluator": "evaluator"})
|
|
234
|
+
|
|
235
|
+
if self._nodes.reflection is not None:
|
|
236
|
+
workflow.add_edge("evaluator", "reflection")
|
|
237
|
+
workflow.add_edge("reflection", "__end__")
|
|
238
|
+
else:
|
|
239
|
+
workflow.add_edge("evaluator", "__end__")
|
|
240
|
+
|
|
241
|
+
return workflow.compile()
|
|
242
|
+
|
|
243
|
+
def _node_reasoning_wrapper(self, state: GraphState) -> GraphState:
|
|
244
|
+
if "attempt_failures" not in state.metadata:
|
|
245
|
+
state.metadata["attempt_failures"] = []
|
|
246
|
+
state.retry_count = len(state.metadata["attempt_failures"])
|
|
247
|
+
return self._nodes.reasoning.run(state)
|
|
@@ -61,8 +61,8 @@ class MinderGraph:
|
|
|
61
61
|
self._clarification = clarification or ClarificationNode()
|
|
62
62
|
vector_store = VectorStore(store, store)
|
|
63
63
|
embedder = LocalEmbeddingProvider(
|
|
64
|
-
|
|
65
|
-
|
|
64
|
+
llama_cpp_model_repo=config.embedding.llama_cpp_model_repo,
|
|
65
|
+
llama_cpp_model_file=config.embedding.llama_cpp_model_file,
|
|
66
66
|
dimensions=config.embedding.dimensions,
|
|
67
67
|
runtime="auto",
|
|
68
68
|
)
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
from .base import LLMClient
|
|
2
2
|
from .factory import create_llm
|
|
3
|
-
from .
|
|
3
|
+
from .llama_cpp_llm import LlamaCppLLM
|
|
4
4
|
from .openai import OpenAIFallbackLLM
|
|
5
5
|
|
|
6
6
|
__all__ = [
|
|
7
7
|
"LLMClient",
|
|
8
|
-
"
|
|
8
|
+
"LlamaCppLLM",
|
|
9
9
|
"OpenAIFallbackLLM",
|
|
10
10
|
"create_llm",
|
|
11
11
|
]
|
|
@@ -2,8 +2,7 @@
|
|
|
2
2
|
LLM provider factory — selects the correct provider based on config.
|
|
3
3
|
|
|
4
4
|
Supported providers:
|
|
5
|
-
- ``
|
|
6
|
-
|
|
5
|
+
- ``llama_cpp``: llama-cpp-python GGUF inference (on-device, recommended for local LLM)
|
|
7
6
|
- ``openai``: OpenAI-compatible cloud API
|
|
8
7
|
"""
|
|
9
8
|
|
|
@@ -14,18 +13,16 @@ from minder.config import LLMConfig
|
|
|
14
13
|
|
|
15
14
|
def create_llm(config: LLMConfig): # type: ignore[no-untyped-def]
|
|
16
15
|
"""Create an LLM client from the given configuration."""
|
|
17
|
-
if config.provider == "
|
|
18
|
-
from minder.llm.
|
|
16
|
+
if config.provider == "llama_cpp":
|
|
17
|
+
from minder.llm.llama_cpp_llm import LlamaCppLLM
|
|
19
18
|
|
|
20
|
-
return
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
cache_dir=config.litert_cache_dir,
|
|
19
|
+
return LlamaCppLLM(
|
|
20
|
+
model_repo=config.llama_cpp_model_repo,
|
|
21
|
+
model_file=config.llama_cpp_model_file,
|
|
24
22
|
context_length=config.context_length,
|
|
23
|
+
temperature=config.temperature,
|
|
25
24
|
)
|
|
26
25
|
|
|
27
|
-
|
|
28
|
-
|
|
29
26
|
if config.provider == "openai":
|
|
30
27
|
from minder.llm.openai import OpenAIFallbackLLM
|
|
31
28
|
|