minder-cli 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. minder/__init__.py +12 -0
  2. minder/api/routers/prompts.py +177 -0
  3. minder/application/__init__.py +1 -0
  4. minder/application/admin/__init__.py +11 -0
  5. minder/application/admin/dto.py +453 -0
  6. minder/application/admin/jobs.py +327 -0
  7. minder/application/admin/use_cases.py +1895 -0
  8. minder/auth/__init__.py +12 -0
  9. minder/auth/context.py +26 -0
  10. minder/auth/middleware.py +70 -0
  11. minder/auth/principal.py +59 -0
  12. minder/auth/rate_limiter.py +89 -0
  13. minder/auth/rbac.py +60 -0
  14. minder/auth/service.py +541 -0
  15. minder/bootstrap/__init__.py +9 -0
  16. minder/bootstrap/providers.py +109 -0
  17. minder/bootstrap/transport.py +807 -0
  18. minder/cache/__init__.py +10 -0
  19. minder/cache/providers.py +140 -0
  20. minder/chunking/__init__.py +4 -0
  21. minder/chunking/code_splitter.py +184 -0
  22. minder/chunking/splitter.py +136 -0
  23. minder/cli.py +1542 -0
  24. minder/config.py +179 -0
  25. minder/continuity.py +363 -0
  26. minder/dev.py +160 -0
  27. minder/embedding/__init__.py +9 -0
  28. minder/embedding/base.py +7 -0
  29. minder/embedding/local.py +65 -0
  30. minder/embedding/openai.py +7 -0
  31. minder/graph/__init__.py +11 -0
  32. minder/graph/edges.py +13 -0
  33. minder/graph/executor.py +127 -0
  34. minder/graph/graph.py +263 -0
  35. minder/graph/nodes/__init__.py +27 -0
  36. minder/graph/nodes/evaluator.py +21 -0
  37. minder/graph/nodes/guard.py +64 -0
  38. minder/graph/nodes/llm.py +59 -0
  39. minder/graph/nodes/planning.py +30 -0
  40. minder/graph/nodes/reasoning.py +87 -0
  41. minder/graph/nodes/reranker.py +141 -0
  42. minder/graph/nodes/retriever.py +86 -0
  43. minder/graph/nodes/verification.py +230 -0
  44. minder/graph/nodes/workflow_planner.py +250 -0
  45. minder/graph/runtime.py +15 -0
  46. minder/graph/state.py +26 -0
  47. minder/llm/__init__.py +5 -0
  48. minder/llm/base.py +14 -0
  49. minder/llm/local.py +381 -0
  50. minder/llm/openai.py +89 -0
  51. minder/models/__init__.py +109 -0
  52. minder/models/base.py +10 -0
  53. minder/models/client.py +137 -0
  54. minder/models/document.py +34 -0
  55. minder/models/error.py +32 -0
  56. minder/models/graph.py +114 -0
  57. minder/models/history.py +32 -0
  58. minder/models/job.py +62 -0
  59. minder/models/prompt.py +41 -0
  60. minder/models/repository.py +62 -0
  61. minder/models/rule.py +68 -0
  62. minder/models/session.py +51 -0
  63. minder/models/skill.py +52 -0
  64. minder/models/user.py +41 -0
  65. minder/models/workflow.py +35 -0
  66. minder/observability/__init__.py +57 -0
  67. minder/observability/audit.py +243 -0
  68. minder/observability/logging.py +253 -0
  69. minder/observability/metrics.py +448 -0
  70. minder/observability/tracing.py +215 -0
  71. minder/presentation/__init__.py +1 -0
  72. minder/presentation/http/__init__.py +1 -0
  73. minder/presentation/http/admin/__init__.py +3 -0
  74. minder/presentation/http/admin/api.py +1309 -0
  75. minder/presentation/http/admin/context.py +94 -0
  76. minder/presentation/http/admin/dashboard.py +111 -0
  77. minder/presentation/http/admin/jobs.py +208 -0
  78. minder/presentation/http/admin/memories.py +185 -0
  79. minder/presentation/http/admin/prompts.py +219 -0
  80. minder/presentation/http/admin/routes.py +127 -0
  81. minder/presentation/http/admin/runtime.py +650 -0
  82. minder/presentation/http/admin/search.py +368 -0
  83. minder/presentation/http/admin/skills.py +230 -0
  84. minder/prompts/__init__.py +646 -0
  85. minder/prompts/formatter.py +142 -0
  86. minder/resources/__init__.py +318 -0
  87. minder/retrieval/__init__.py +5 -0
  88. minder/retrieval/hybrid.py +178 -0
  89. minder/retrieval/mmr.py +116 -0
  90. minder/retrieval/multi_hop.py +115 -0
  91. minder/runtime.py +15 -0
  92. minder/server.py +145 -0
  93. minder/store/__init__.py +64 -0
  94. minder/store/document.py +115 -0
  95. minder/store/error.py +82 -0
  96. minder/store/feedback.py +114 -0
  97. minder/store/graph.py +588 -0
  98. minder/store/history.py +57 -0
  99. minder/store/interfaces.py +512 -0
  100. minder/store/milvus/__init__.py +11 -0
  101. minder/store/milvus/client.py +26 -0
  102. minder/store/milvus/collections.py +15 -0
  103. minder/store/milvus/vector_store.py +232 -0
  104. minder/store/mongodb/__init__.py +11 -0
  105. minder/store/mongodb/client.py +49 -0
  106. minder/store/mongodb/indexes.py +90 -0
  107. minder/store/mongodb/operational_store.py +993 -0
  108. minder/store/relational.py +1087 -0
  109. minder/store/repo_state.py +58 -0
  110. minder/store/rule.py +93 -0
  111. minder/store/vector.py +79 -0
  112. minder/tools/__init__.py +47 -0
  113. minder/tools/auth.py +94 -0
  114. minder/tools/graph.py +839 -0
  115. minder/tools/ingest.py +353 -0
  116. minder/tools/memory.py +381 -0
  117. minder/tools/query.py +307 -0
  118. minder/tools/registry.py +269 -0
  119. minder/tools/repo_scanner.py +1266 -0
  120. minder/tools/search.py +15 -0
  121. minder/tools/session.py +316 -0
  122. minder/tools/skills.py +899 -0
  123. minder/tools/workflow.py +215 -0
  124. minder/transport/__init__.py +4 -0
  125. minder/transport/base.py +286 -0
  126. minder/transport/sse.py +252 -0
  127. minder/transport/stdio.py +29 -0
  128. minder_cli-0.2.0.dist-info/METADATA +318 -0
  129. minder_cli-0.2.0.dist-info/RECORD +132 -0
  130. minder_cli-0.2.0.dist-info/WHEEL +4 -0
  131. minder_cli-0.2.0.dist-info/entry_points.txt +2 -0
  132. minder_cli-0.2.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,116 @@
1
+ """
2
+ Maximal Marginal Relevance (MMR) diversity re-ranking.
3
+
4
+ MMR balances relevance to the query against redundancy among already-selected
5
+ results. lambda_mult controls the trade-off:
6
+ lambda_mult = 1.0 → pure relevance ranking (no diversity)
7
+ lambda_mult = 0.0 → maximum diversity (no relevance)
8
+ lambda_mult = 0.5 → balanced default
9
+
10
+ Reference: Carbonell & Goldstein (1998) "The Use of MMR, Diversity-Based
11
+ Reranking for Reordering Documents and Producing Summaries"
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import math
17
+ from typing import Any
18
+
19
+
20
+ # ---------------------------------------------------------------------------
21
+ # Internal helpers
22
+ # ---------------------------------------------------------------------------
23
+
24
+
25
+ def _cosine(a: list[float], b: list[float]) -> float:
26
+ if not a or not b or len(a) != len(b):
27
+ return 0.0
28
+ dot = sum(x * y for x, y in zip(a, b, strict=False))
29
+ norm_a = math.sqrt(sum(x * x for x in a))
30
+ norm_b = math.sqrt(sum(x * x for x in b))
31
+ if norm_a == 0.0 or norm_b == 0.0:
32
+ return 0.0
33
+ return dot / (norm_a * norm_b)
34
+
35
+
36
+ def _relevance(doc: dict[str, Any], query_embedding: list[float], embedding_key: str, score_key: str) -> float:
37
+ emb = doc.get(embedding_key)
38
+ if isinstance(emb, list) and emb:
39
+ return _cosine(query_embedding, emb)
40
+ return float(doc.get(score_key, 0.0))
41
+
42
+
43
+ # ---------------------------------------------------------------------------
44
+ # Public API
45
+ # ---------------------------------------------------------------------------
46
+
47
+
48
+ def mmr_rerank(
49
+ query_embedding: list[float],
50
+ candidates: list[dict[str, Any]],
51
+ *,
52
+ top_k: int = 5,
53
+ lambda_mult: float = 0.5,
54
+ embedding_key: str = "embedding",
55
+ score_key: str = "score",
56
+ ) -> list[dict[str, Any]]:
57
+ """
58
+ Re-rank *candidates* using Maximal Marginal Relevance.
59
+
60
+ Args:
61
+ query_embedding: embedding vector of the query.
62
+ candidates: list of document dicts, each optionally containing an
63
+ ``embedding_key`` field (list[float]) and/or a ``score_key``
64
+ field (float). Documents without embeddings fall back to their
65
+ existing score for relevance estimation.
66
+ top_k: maximum number of results to return.
67
+ lambda_mult: trade-off coefficient in [0, 1].
68
+ embedding_key: key in each doc dict that holds the document embedding.
69
+ score_key: key used as a relevance proxy when no embedding is present.
70
+
71
+ Returns:
72
+ A sub-list of *candidates* of length ≤ top_k, ordered by MMR score.
73
+ """
74
+ if not candidates:
75
+ return []
76
+
77
+ top_k = max(1, top_k)
78
+ lambda_mult = max(0.0, min(1.0, lambda_mult))
79
+
80
+ # Pre-compute relevance scores to avoid repeated cosine calls.
81
+ relevances = [
82
+ _relevance(doc, query_embedding, embedding_key, score_key)
83
+ for doc in candidates
84
+ ]
85
+
86
+ selected_indices: list[int] = []
87
+ remaining_indices = list(range(len(candidates)))
88
+
89
+ while remaining_indices and len(selected_indices) < top_k:
90
+ best_score = -float("inf")
91
+ best_pos = 0
92
+
93
+ for pos, idx in enumerate(remaining_indices):
94
+ rel = relevances[idx]
95
+
96
+ if not selected_indices:
97
+ mmr_score = rel
98
+ else:
99
+ doc_emb = candidates[idx].get(embedding_key)
100
+ if isinstance(doc_emb, list) and doc_emb:
101
+ max_sim = max(
102
+ _cosine(doc_emb, candidates[sel].get(embedding_key) or [])
103
+ for sel in selected_indices
104
+ )
105
+ else:
106
+ max_sim = 0.0
107
+ mmr_score = lambda_mult * rel - (1.0 - lambda_mult) * max_sim
108
+
109
+ if mmr_score > best_score:
110
+ best_score = mmr_score
111
+ best_pos = pos
112
+
113
+ chosen = remaining_indices.pop(best_pos)
114
+ selected_indices.append(chosen)
115
+
116
+ return [candidates[i] for i in selected_indices]
@@ -0,0 +1,115 @@
1
+ """
2
+ Multi-hop retrieval.
3
+
4
+ Iteratively refines the search query using content from the previous hop's
5
+ top result, then merges and de-duplicates results across all hops.
6
+
7
+ Hop 1 → retrieve on original query
8
+ Hop 2 → expand query with key terms extracted from hop-1 top result → retrieve
9
+ ...
10
+ Final → merge all hops, sort by score, return top-K.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from collections import Counter
16
+ from typing import Any, Protocol, runtime_checkable
17
+
18
+
19
+ # ---------------------------------------------------------------------------
20
+ # Retriever protocol
21
+ # ---------------------------------------------------------------------------
22
+
23
+
24
+ @runtime_checkable
25
+ class RetrieveFn(Protocol):
26
+ """Any async callable ``(query, *, limit) → list[dict]`` qualifies."""
27
+
28
+ async def __call__(
29
+ self,
30
+ query: str,
31
+ *,
32
+ limit: int,
33
+ ) -> list[dict[str, Any]]: ...
34
+
35
+
36
+ # ---------------------------------------------------------------------------
37
+ # MultiHopRetriever
38
+ # ---------------------------------------------------------------------------
39
+
40
+
41
+ class MultiHopRetriever:
42
+ """
43
+ Iterative retrieval that uses the top result from each hop to expand the
44
+ query for the next hop.
45
+
46
+ Args:
47
+ retrieve_fn: async callable matching :class:`RetrieveFn`.
48
+ max_hops: total number of retrieval hops (default 2).
49
+ """
50
+
51
+ def __init__(self, retrieve_fn: RetrieveFn, *, max_hops: int = 2) -> None:
52
+ self._retrieve_fn = retrieve_fn
53
+ self._max_hops = max(1, max_hops)
54
+
55
+ async def retrieve(
56
+ self,
57
+ query: str,
58
+ *,
59
+ limit: int = 5,
60
+ ) -> list[dict[str, Any]]:
61
+ """
62
+ Run multi-hop retrieval.
63
+
64
+ Returns:
65
+ Deduplicated, score-sorted list of documents across all hops,
66
+ truncated to *limit*. Each document gains a ``"hop"`` metadata
67
+ field indicating which hop first found it.
68
+ """
69
+ seen_keys: set[str] = set()
70
+ all_results: list[dict[str, Any]] = []
71
+ current_query = query
72
+
73
+ for hop in range(self._max_hops):
74
+ hop_results = await self._retrieve_fn(current_query, limit=limit)
75
+ new_this_hop: list[dict[str, Any]] = []
76
+ for doc in hop_results:
77
+ key = self._doc_key(doc)
78
+ if key not in seen_keys:
79
+ seen_keys.add(key)
80
+ enriched = dict(doc)
81
+ enriched.setdefault("hop", hop + 1)
82
+ all_results.append(enriched)
83
+ new_this_hop.append(enriched)
84
+
85
+ # Expand query for next hop using key terms from top new result
86
+ if hop < self._max_hops - 1 and new_this_hop:
87
+ top_content = str(new_this_hop[0].get("content", ""))
88
+ expansion = self._expand_query(top_content, base_query=query)
89
+ if expansion:
90
+ current_query = f"{query} {expansion}"
91
+
92
+ # Sort by descending score then stable insertion order
93
+ all_results.sort(key=lambda d: float(d.get("score", 0.0)), reverse=True)
94
+ return all_results[:limit]
95
+
96
+ # ------------------------------------------------------------------
97
+ # Internals
98
+ # ------------------------------------------------------------------
99
+
100
+ @staticmethod
101
+ def _doc_key(doc: dict[str, Any]) -> str:
102
+ return str(doc.get("path", doc.get("id", id(doc))))
103
+
104
+ @staticmethod
105
+ def _expand_query(content: str, *, base_query: str, max_terms: int = 5) -> str:
106
+ """Extract high-frequency content terms not already in the base query."""
107
+ base_tokens = set(base_query.lower().split())
108
+ tokens = [
109
+ tok
110
+ for tok in content.lower().split()
111
+ if len(tok) > 3 and tok not in base_tokens and tok.isalpha()
112
+ ]
113
+ freq: Counter[str] = Counter(tokens)
114
+ top_terms = [term for term, _ in freq.most_common(max_terms)]
115
+ return " ".join(top_terms)
minder/runtime.py ADDED
@@ -0,0 +1,15 @@
1
+ from __future__ import annotations
2
+
3
+ import importlib.util
4
+ from typing import Any
5
+
6
+
7
+ def module_available(module_name: str) -> bool:
8
+ return importlib.util.find_spec(module_name) is not None
9
+
10
+
11
+ def load_attr(module_name: str, attr_name: str) -> Any | None:
12
+ if not module_available(module_name):
13
+ return None
14
+ module = __import__(module_name, fromlist=[attr_name])
15
+ return getattr(module, attr_name, None)
minder/server.py ADDED
@@ -0,0 +1,145 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import sys
5
+ from pathlib import Path
6
+
7
+ from minder.bootstrap.providers import (
8
+ build_cache,
9
+ build_graph_store,
10
+ build_store,
11
+ build_vector_store,
12
+ )
13
+ from minder.bootstrap.transport import build_transport
14
+ from minder.config import Settings
15
+ from minder.embedding.local import LocalEmbeddingProvider
16
+ from minder.graph.runtime import graph_runtime_name
17
+ from minder.llm.local import LocalModelLLM
18
+ from minder.llm.openai import OpenAIFallbackLLM
19
+ from minder.presentation.http.admin.routes import build_http_app, build_http_routes
20
+
21
+ __all__ = [
22
+ "build_cache",
23
+ "build_graph_store",
24
+ "build_http_app",
25
+ "build_http_routes",
26
+ "build_store",
27
+ "build_transport",
28
+ "build_vector_store",
29
+ "main",
30
+ "runtime_summary",
31
+ ]
32
+
33
+
34
+ def runtime_summary(config: Settings) -> dict[str, object]:
35
+ llm = LocalModelLLM(
36
+ config.llm.model_path,
37
+ runtime="auto",
38
+ context_length=config.llm.context_length,
39
+ )
40
+ embedder = LocalEmbeddingProvider(
41
+ config.embedding.model_path,
42
+ dimensions=config.embedding.dimensions,
43
+ runtime="auto",
44
+ )
45
+ fallback = OpenAIFallbackLLM(
46
+ config.llm.openai_api_key, config.llm.openai_model, runtime="auto"
47
+ )
48
+ return {
49
+ "transport": config.server.transport,
50
+ "host": config.server.host,
51
+ "port": config.server.port,
52
+ "orchestration_runtime_requested": config.workflow.orchestration_runtime,
53
+ "orchestration_runtime_effective": graph_runtime_name(
54
+ config.workflow.orchestration_runtime
55
+ ),
56
+ "llm_model_path": str(Path(config.llm.model_path).expanduser()),
57
+ "llm_runtime_effective": llm.runtime,
58
+ "llm_context_length": config.llm.context_length,
59
+ "embedding_model_path": str(Path(config.embedding.model_path).expanduser()),
60
+ "embedding_runtime_effective": embedder.runtime,
61
+ "openai_fallback_configured": fallback.available(),
62
+ "openai_fallback_runtime_effective": fallback.runtime,
63
+ }
64
+
65
+
66
+ async def _async_run() -> None:
67
+ print("MINDER SERVER STARTING", file=sys.stderr, flush=True)
68
+ config = Settings()
69
+
70
+ # Initialise structured JSON logging and tracing before anything else
71
+ from minder.observability import configure_json_logging, configure_tracing
72
+
73
+ configure_json_logging(level=config.server.log_level)
74
+ configure_tracing(
75
+ service_name=config.server.name,
76
+ service_version=config.server.version,
77
+ )
78
+
79
+ store = build_store(config)
80
+ await store.init_db()
81
+
82
+ graph_store = build_graph_store(config)
83
+ if graph_store is not None and hasattr(graph_store, "init_db"):
84
+ await graph_store.init_db()
85
+
86
+ vector_store = build_vector_store(config, store)
87
+ if hasattr(vector_store, "setup"):
88
+ await vector_store.setup()
89
+
90
+ cache = build_cache(config)
91
+ admin = await store.get_user_by_username("admin")
92
+ print(f"MINDER ADMIN EXISTS: {admin is not None}", file=sys.stderr, flush=True)
93
+
94
+ transport = build_transport(
95
+ config=config,
96
+ store=store,
97
+ vector_store=vector_store,
98
+ graph_store=graph_store,
99
+ cache=cache,
100
+ )
101
+
102
+ from minder.prompts import PromptRegistry
103
+
104
+ await PromptRegistry.sync(transport.app, store)
105
+
106
+ print(
107
+ f"Minder store={config.relational_store.provider} cache={config.cache.provider} "
108
+ f"transport={transport.transport_name} host={config.server.host}:{config.server.port}",
109
+ file=sys.stderr,
110
+ flush=True,
111
+ )
112
+ print(
113
+ "Minder runtime summary:", runtime_summary(config), file=sys.stderr, flush=True
114
+ )
115
+
116
+ try:
117
+ if transport.transport_name == "stdio":
118
+ await transport.app.run_stdio_async()
119
+ else:
120
+ print(
121
+ f"Starting SSE on {config.server.host}:{config.server.port}",
122
+ file=sys.stderr,
123
+ flush=True,
124
+ )
125
+ if hasattr(transport, "run"):
126
+ await transport.run()
127
+ else:
128
+ await transport.app.run_sse_async()
129
+ finally:
130
+ await store.dispose()
131
+ if graph_store is not None and hasattr(graph_store, "dispose"):
132
+ await graph_store.dispose()
133
+ await cache.close()
134
+
135
+
136
+ def _run() -> None:
137
+ asyncio.run(_async_run())
138
+
139
+
140
+ def main() -> None:
141
+ _run()
142
+
143
+
144
+ if __name__ == "__main__":
145
+ main()
@@ -0,0 +1,64 @@
1
+ """
2
+ Store package — data access layer.
3
+
4
+ Exports both concrete implementations and domain interfaces.
5
+ Application code should depend on interfaces from `minder.store.interfaces`.
6
+ """
7
+
8
+ from .document import DocumentStore
9
+ from .error import ErrorStore
10
+ from .feedback import FeedbackStore
11
+ from .graph import KnowledgeGraphStore
12
+ from .history import HistoryStore
13
+ from .interfaces import (
14
+ ICacheProvider,
15
+ IClientRepository,
16
+ IDocumentRepository,
17
+ IErrorRepository,
18
+ IFeedbackRepository,
19
+ IGraphRepository,
20
+ IHistoryRepository,
21
+ IOperationalStore,
22
+ IRepositoryRepo,
23
+ IRuleRepository,
24
+ ISessionRepository,
25
+ ISkillRepository,
26
+ IUserRepository,
27
+ IVectorStore,
28
+ IWorkflowRepository,
29
+ IWorkflowStateRepository,
30
+ )
31
+ from .relational import RelationalStore
32
+ from .repo_state import RepoStateStore
33
+ from .rule import RuleStore
34
+ from .vector import VectorStore
35
+
36
+ __all__ = [
37
+ # Domain interfaces
38
+ "ICacheProvider",
39
+ "IClientRepository",
40
+ "IDocumentRepository",
41
+ "IErrorRepository",
42
+ "IFeedbackRepository",
43
+ "IGraphRepository",
44
+ "IHistoryRepository",
45
+ "IOperationalStore",
46
+ "IRepositoryRepo",
47
+ "IRuleRepository",
48
+ "ISessionRepository",
49
+ "ISkillRepository",
50
+ "IUserRepository",
51
+ "IVectorStore",
52
+ "IWorkflowRepository",
53
+ "IWorkflowStateRepository",
54
+ # Concrete implementations
55
+ "DocumentStore",
56
+ "ErrorStore",
57
+ "FeedbackStore",
58
+ "HistoryStore",
59
+ "KnowledgeGraphStore",
60
+ "RelationalStore",
61
+ "RepoStateStore",
62
+ "RuleStore",
63
+ "VectorStore",
64
+ ]
@@ -0,0 +1,115 @@
1
+ from __future__ import annotations
2
+
3
+ import uuid
4
+ from typing import Any
5
+
6
+ from sqlalchemy import delete, select, update
7
+
8
+ from minder.models.document import Document
9
+ from minder.store.relational import RelationalStore
10
+
11
+
12
+ class DocumentStore:
13
+ def __init__(self, store: RelationalStore) -> None:
14
+ self._store = store
15
+
16
+ async def create_document(
17
+ self,
18
+ title: str,
19
+ content: str,
20
+ doc_type: str,
21
+ source_path: str,
22
+ project: str,
23
+ *,
24
+ chunks: dict[str, Any] | None = None,
25
+ embedding: list[float] | None = None,
26
+ ) -> Document:
27
+ async with self._store._session() as sess:
28
+ document = Document(
29
+ id=uuid.uuid4(),
30
+ title=title,
31
+ content=content,
32
+ doc_type=doc_type,
33
+ source_path=source_path,
34
+ chunks=chunks or {},
35
+ embedding=embedding,
36
+ project=project,
37
+ )
38
+ sess.add(document)
39
+ await sess.flush()
40
+ await sess.refresh(document)
41
+ return document
42
+
43
+ async def get_document_by_path(
44
+ self, source_path: str, *, project: str | None = None
45
+ ) -> Document | None:
46
+ async with self._store._session() as sess:
47
+ stmt = select(Document).where(Document.source_path == source_path)
48
+ if project is not None:
49
+ stmt = stmt.where(Document.project == project)
50
+ result = await sess.execute(stmt)
51
+ return result.scalar_one_or_none()
52
+
53
+ async def get_documents_by_ids(self, doc_ids: list[uuid.UUID]) -> list[Document]:
54
+ if not doc_ids:
55
+ return []
56
+ async with self._store._session() as sess:
57
+ stmt = select(Document).where(Document.id.in_(doc_ids))
58
+ result = await sess.execute(stmt)
59
+ return list(result.scalars().all())
60
+
61
+ async def list_documents(self, project: str | None = None) -> list[Document]:
62
+ async with self._store._session() as sess:
63
+ stmt = select(Document)
64
+ if project is not None:
65
+ stmt = stmt.where(Document.project == project)
66
+ result = await sess.execute(stmt)
67
+ return list(result.scalars().all())
68
+
69
+ async def upsert_document(
70
+ self,
71
+ *,
72
+ title: str,
73
+ content: str,
74
+ doc_type: str,
75
+ source_path: str,
76
+ project: str,
77
+ chunks: dict[str, Any] | None = None,
78
+ embedding: list[float] | None = None,
79
+ ) -> Document:
80
+ existing = await self.get_document_by_path(source_path, project=project)
81
+ if existing is None:
82
+ return await self.create_document(
83
+ title=title,
84
+ content=content,
85
+ doc_type=doc_type,
86
+ source_path=source_path,
87
+ project=project,
88
+ chunks=chunks,
89
+ embedding=embedding,
90
+ )
91
+
92
+ async with self._store._session() as sess:
93
+ await sess.execute(
94
+ update(Document)
95
+ .where(Document.id == existing.id)
96
+ .values(
97
+ title=title,
98
+ content=content,
99
+ doc_type=doc_type,
100
+ chunks=chunks or {},
101
+ embedding=embedding,
102
+ project=project,
103
+ )
104
+ )
105
+ result = await sess.execute(select(Document).where(Document.id == existing.id))
106
+ return result.scalar_one()
107
+
108
+ async def delete_documents_not_in_paths(
109
+ self, *, project: str, keep_paths: set[str]
110
+ ) -> None:
111
+ async with self._store._session() as sess:
112
+ stmt = delete(Document).where(Document.project == project)
113
+ if keep_paths:
114
+ stmt = stmt.where(Document.source_path.not_in(keep_paths))
115
+ await sess.execute(stmt)
minder/store/error.py ADDED
@@ -0,0 +1,82 @@
1
+ from __future__ import annotations
2
+
3
+ import math
4
+ import uuid
5
+ from collections import Counter
6
+ from typing import Any
7
+ from typing import cast
8
+
9
+ from sqlalchemy import select
10
+
11
+ from minder.models.error import Error
12
+ from minder.store.relational import RelationalStore
13
+
14
+
15
+ class ErrorStore:
16
+ def __init__(self, store: RelationalStore) -> None:
17
+ self._store = store
18
+
19
+ async def create_error(
20
+ self,
21
+ error_code: str,
22
+ error_message: str,
23
+ stack_trace: str | None = None,
24
+ context: dict[str, Any] | None = None,
25
+ resolution: str | None = None,
26
+ embedding: list[float] | None = None,
27
+ resolved: bool = False,
28
+ ) -> Error:
29
+ async with self._store._session() as sess:
30
+ error = Error(
31
+ id=uuid.uuid4(),
32
+ error_code=error_code,
33
+ error_message=error_message,
34
+ stack_trace=stack_trace,
35
+ context=context or {},
36
+ resolution=resolution,
37
+ embedding=embedding,
38
+ resolved=resolved,
39
+ )
40
+ sess.add(error)
41
+ await sess.flush()
42
+ await sess.refresh(error)
43
+ return error
44
+
45
+ async def list_errors(self) -> list[Error]:
46
+ async with self._store._session() as sess:
47
+ result = await sess.execute(select(Error))
48
+ return list(result.scalars().all())
49
+
50
+ async def search_errors(self, query: str, limit: int = 5) -> list[dict[str, Any]]:
51
+ rows = await self.list_errors()
52
+ query_vector = self._text_vector(query)
53
+ ranked = []
54
+ for row in rows:
55
+ text = f"{row.error_code} {row.error_message} {row.context}"
56
+ score = self._cosine_similarity(query_vector, self._text_vector(text))
57
+ ranked.append(
58
+ {
59
+ "id": row.id,
60
+ "error_code": row.error_code,
61
+ "error_message": row.error_message,
62
+ "resolution": row.resolution,
63
+ "score": round(score, 4),
64
+ }
65
+ )
66
+ ranked.sort(key=lambda item: cast(float, item["score"]), reverse=True)
67
+ return ranked[:limit]
68
+
69
+ @staticmethod
70
+ def _text_vector(text: str) -> Counter[str]:
71
+ return Counter(token for token in text.lower().split() if len(token) > 2)
72
+
73
+ @staticmethod
74
+ def _cosine_similarity(left: Counter[str], right: Counter[str]) -> float:
75
+ if not left or not right:
76
+ return 0.0
77
+ numerator = sum(left[key] * right[key] for key in left.keys() & right.keys())
78
+ left_norm = math.sqrt(sum(value * value for value in left.values()))
79
+ right_norm = math.sqrt(sum(value * value for value in right.values()))
80
+ if left_norm == 0 or right_norm == 0:
81
+ return 0.0
82
+ return numerator / (left_norm * right_norm)