minder-cli 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. minder/__init__.py +12 -0
  2. minder/api/routers/prompts.py +177 -0
  3. minder/application/__init__.py +1 -0
  4. minder/application/admin/__init__.py +11 -0
  5. minder/application/admin/dto.py +453 -0
  6. minder/application/admin/jobs.py +327 -0
  7. minder/application/admin/use_cases.py +1895 -0
  8. minder/auth/__init__.py +12 -0
  9. minder/auth/context.py +26 -0
  10. minder/auth/middleware.py +70 -0
  11. minder/auth/principal.py +59 -0
  12. minder/auth/rate_limiter.py +89 -0
  13. minder/auth/rbac.py +60 -0
  14. minder/auth/service.py +541 -0
  15. minder/bootstrap/__init__.py +9 -0
  16. minder/bootstrap/providers.py +109 -0
  17. minder/bootstrap/transport.py +807 -0
  18. minder/cache/__init__.py +10 -0
  19. minder/cache/providers.py +140 -0
  20. minder/chunking/__init__.py +4 -0
  21. minder/chunking/code_splitter.py +184 -0
  22. minder/chunking/splitter.py +136 -0
  23. minder/cli.py +1542 -0
  24. minder/config.py +179 -0
  25. minder/continuity.py +363 -0
  26. minder/dev.py +160 -0
  27. minder/embedding/__init__.py +9 -0
  28. minder/embedding/base.py +7 -0
  29. minder/embedding/local.py +65 -0
  30. minder/embedding/openai.py +7 -0
  31. minder/graph/__init__.py +11 -0
  32. minder/graph/edges.py +13 -0
  33. minder/graph/executor.py +127 -0
  34. minder/graph/graph.py +263 -0
  35. minder/graph/nodes/__init__.py +27 -0
  36. minder/graph/nodes/evaluator.py +21 -0
  37. minder/graph/nodes/guard.py +64 -0
  38. minder/graph/nodes/llm.py +59 -0
  39. minder/graph/nodes/planning.py +30 -0
  40. minder/graph/nodes/reasoning.py +87 -0
  41. minder/graph/nodes/reranker.py +141 -0
  42. minder/graph/nodes/retriever.py +86 -0
  43. minder/graph/nodes/verification.py +230 -0
  44. minder/graph/nodes/workflow_planner.py +250 -0
  45. minder/graph/runtime.py +15 -0
  46. minder/graph/state.py +26 -0
  47. minder/llm/__init__.py +5 -0
  48. minder/llm/base.py +14 -0
  49. minder/llm/local.py +381 -0
  50. minder/llm/openai.py +89 -0
  51. minder/models/__init__.py +109 -0
  52. minder/models/base.py +10 -0
  53. minder/models/client.py +137 -0
  54. minder/models/document.py +34 -0
  55. minder/models/error.py +32 -0
  56. minder/models/graph.py +114 -0
  57. minder/models/history.py +32 -0
  58. minder/models/job.py +62 -0
  59. minder/models/prompt.py +41 -0
  60. minder/models/repository.py +62 -0
  61. minder/models/rule.py +68 -0
  62. minder/models/session.py +51 -0
  63. minder/models/skill.py +52 -0
  64. minder/models/user.py +41 -0
  65. minder/models/workflow.py +35 -0
  66. minder/observability/__init__.py +57 -0
  67. minder/observability/audit.py +243 -0
  68. minder/observability/logging.py +253 -0
  69. minder/observability/metrics.py +448 -0
  70. minder/observability/tracing.py +215 -0
  71. minder/presentation/__init__.py +1 -0
  72. minder/presentation/http/__init__.py +1 -0
  73. minder/presentation/http/admin/__init__.py +3 -0
  74. minder/presentation/http/admin/api.py +1309 -0
  75. minder/presentation/http/admin/context.py +94 -0
  76. minder/presentation/http/admin/dashboard.py +111 -0
  77. minder/presentation/http/admin/jobs.py +208 -0
  78. minder/presentation/http/admin/memories.py +185 -0
  79. minder/presentation/http/admin/prompts.py +219 -0
  80. minder/presentation/http/admin/routes.py +127 -0
  81. minder/presentation/http/admin/runtime.py +650 -0
  82. minder/presentation/http/admin/search.py +368 -0
  83. minder/presentation/http/admin/skills.py +230 -0
  84. minder/prompts/__init__.py +646 -0
  85. minder/prompts/formatter.py +142 -0
  86. minder/resources/__init__.py +318 -0
  87. minder/retrieval/__init__.py +5 -0
  88. minder/retrieval/hybrid.py +178 -0
  89. minder/retrieval/mmr.py +116 -0
  90. minder/retrieval/multi_hop.py +115 -0
  91. minder/runtime.py +15 -0
  92. minder/server.py +145 -0
  93. minder/store/__init__.py +64 -0
  94. minder/store/document.py +115 -0
  95. minder/store/error.py +82 -0
  96. minder/store/feedback.py +114 -0
  97. minder/store/graph.py +588 -0
  98. minder/store/history.py +57 -0
  99. minder/store/interfaces.py +512 -0
  100. minder/store/milvus/__init__.py +11 -0
  101. minder/store/milvus/client.py +26 -0
  102. minder/store/milvus/collections.py +15 -0
  103. minder/store/milvus/vector_store.py +232 -0
  104. minder/store/mongodb/__init__.py +11 -0
  105. minder/store/mongodb/client.py +49 -0
  106. minder/store/mongodb/indexes.py +90 -0
  107. minder/store/mongodb/operational_store.py +993 -0
  108. minder/store/relational.py +1087 -0
  109. minder/store/repo_state.py +58 -0
  110. minder/store/rule.py +93 -0
  111. minder/store/vector.py +79 -0
  112. minder/tools/__init__.py +47 -0
  113. minder/tools/auth.py +94 -0
  114. minder/tools/graph.py +839 -0
  115. minder/tools/ingest.py +353 -0
  116. minder/tools/memory.py +381 -0
  117. minder/tools/query.py +307 -0
  118. minder/tools/registry.py +269 -0
  119. minder/tools/repo_scanner.py +1266 -0
  120. minder/tools/search.py +15 -0
  121. minder/tools/session.py +316 -0
  122. minder/tools/skills.py +899 -0
  123. minder/tools/workflow.py +215 -0
  124. minder/transport/__init__.py +4 -0
  125. minder/transport/base.py +286 -0
  126. minder/transport/sse.py +252 -0
  127. minder/transport/stdio.py +29 -0
  128. minder_cli-0.2.0.dist-info/METADATA +318 -0
  129. minder_cli-0.2.0.dist-info/RECORD +132 -0
  130. minder_cli-0.2.0.dist-info/WHEEL +4 -0
  131. minder_cli-0.2.0.dist-info/entry_points.txt +2 -0
  132. minder_cli-0.2.0.dist-info/licenses/LICENSE +201 -0
minder/tools/ingest.py ADDED
@@ -0,0 +1,353 @@
1
+ from __future__ import annotations
2
+
3
+ import shutil
4
+ import subprocess
5
+ import tempfile
6
+ from pathlib import Path
7
+ from typing import Any
8
+ from urllib.parse import urlparse
9
+
10
+ import httpx
11
+
12
+ from minder.chunking.splitter import TextSplitter
13
+ from minder.embedding.base import EmbeddingProvider
14
+ from minder.store.interfaces import IDocumentRepository
15
+
16
+ SUPPORTED_SUFFIXES = {".py", ".md", ".txt", ".json", ".toml", ".yml", ".yaml"}
17
+
18
+ # Maximum raw bytes to read from a URL response (4 MB).
19
+ _MAX_URL_BYTES = 4 * 1024 * 1024
20
+
21
+
22
+ class IngestTools:
23
+ def __init__(
24
+ self,
25
+ document_store: IDocumentRepository,
26
+ embedding_provider: EmbeddingProvider,
27
+ vector_store: Any | None = None,
28
+ ) -> None:
29
+ self._document_store = document_store
30
+ self._embedding_provider = embedding_provider
31
+ self._vector_store = vector_store
32
+
33
+ async def minder_ingest_file(self, path: str, *, project: str | None = None) -> dict[str, object]:
34
+ file_path = Path(path)
35
+ doc_type = self._doc_type_for_suffix(file_path.suffix)
36
+ target_project = project or file_path.parent.name
37
+ file_stat = file_path.stat()
38
+ existing = await self._document_store.get_document_by_path(
39
+ str(file_path),
40
+ project=target_project,
41
+ )
42
+ vector_enabled = bool(self._vector_store and hasattr(self._vector_store, "upsert_document"))
43
+
44
+ if existing is not None and self._is_current_file_document(
45
+ existing,
46
+ title=file_path.name,
47
+ doc_type=doc_type,
48
+ project=target_project,
49
+ file_size=file_stat.st_size,
50
+ mtime_ns=file_stat.st_mtime_ns,
51
+ vector_enabled=vector_enabled,
52
+ ):
53
+ return {
54
+ "document_id": existing.id,
55
+ "path": str(file_path),
56
+ "project": target_project,
57
+ "doc_type": doc_type,
58
+ }
59
+
60
+ content = file_path.read_text(encoding="utf-8")
61
+ embedding = self._embedding_provider.embed(content)
62
+ chunks = {
63
+ "size": len(content),
64
+ "file_size": file_stat.st_size,
65
+ "mtime_ns": file_stat.st_mtime_ns,
66
+ "vector_indexed": not vector_enabled,
67
+ }
68
+ document = await self._document_store.upsert_document(
69
+ title=file_path.name,
70
+ content=content,
71
+ doc_type=doc_type,
72
+ source_path=str(file_path),
73
+ project=target_project,
74
+ chunks=chunks,
75
+ embedding=embedding,
76
+ )
77
+
78
+ if self._vector_store and vector_enabled and embedding:
79
+ await self._vector_store.upsert_document(
80
+ doc_id=document.id,
81
+ embedding=embedding,
82
+ payload={
83
+ "title": file_path.name,
84
+ "content": content,
85
+ "doc_type": doc_type,
86
+ "source_path": str(file_path),
87
+ "project": target_project,
88
+ }
89
+ )
90
+ chunks["vector_indexed"] = True
91
+ document = await self._document_store.upsert_document(
92
+ title=file_path.name,
93
+ content=content,
94
+ doc_type=doc_type,
95
+ source_path=str(file_path),
96
+ project=target_project,
97
+ chunks=chunks,
98
+ embedding=embedding,
99
+ )
100
+
101
+ return {
102
+ "document_id": document.id,
103
+ "path": str(file_path),
104
+ "project": target_project,
105
+ "doc_type": doc_type,
106
+ }
107
+
108
+ async def minder_ingest_directory(
109
+ self,
110
+ path: str,
111
+ *,
112
+ project: str | None = None,
113
+ ) -> dict[str, object]:
114
+ root = Path(path)
115
+ target_project = project or root.name
116
+ ingested_paths: set[str] = set()
117
+ ingested_count = 0
118
+
119
+ for file_path in root.rglob("*"):
120
+ if not file_path.is_file():
121
+ continue
122
+ if any(part.startswith(".") and part != ".minder" for part in file_path.parts):
123
+ continue
124
+ if file_path.suffix not in SUPPORTED_SUFFIXES:
125
+ continue
126
+ await self.minder_ingest_file(str(file_path), project=target_project)
127
+ ingested_paths.add(str(file_path))
128
+ ingested_count += 1
129
+
130
+ # We first need to get the list of documents that WILL be deleted
131
+ docs_to_delete = []
132
+ if self._vector_store and hasattr(self._vector_store, "delete_documents"):
133
+ existing = await self._document_store.list_documents(project=target_project)
134
+ docs_to_delete = [
135
+ doc.id for doc in existing
136
+ if doc.source_path not in ingested_paths
137
+ ]
138
+
139
+ await self._document_store.delete_documents_not_in_paths(
140
+ project=target_project,
141
+ keep_paths=ingested_paths,
142
+ )
143
+
144
+ if docs_to_delete and self._vector_store and hasattr(self._vector_store, "delete_documents"):
145
+ await self._vector_store.delete_documents(docs_to_delete)
146
+ return {
147
+ "project": target_project,
148
+ "ingested_count": ingested_count,
149
+ "paths": sorted(ingested_paths),
150
+ }
151
+
152
+ # ------------------------------------------------------------------
153
+ # URL ingestion
154
+ # ------------------------------------------------------------------
155
+
156
+ async def minder_ingest_url(
157
+ self,
158
+ url: str,
159
+ *,
160
+ project: str | None = None,
161
+ chunk_size: int = 512,
162
+ overlap: int = 64,
163
+ ) -> dict[str, object]:
164
+ """Fetch *url* via HTTP, chunk the text, embed, and upsert each chunk.
165
+
166
+ Content-type detection:
167
+ - ``text/html``: strip tags naively (extract visible text via a
168
+ whitespace-collapse pass — no external HTML parser required).
169
+ - ``text/markdown`` / ``text/plain`` / unknown text: use as-is.
170
+
171
+ Returns a summary dict with ``url``, ``project``, ``chunk_count``,
172
+ and ``doc_ids`` (list of upserted document IDs).
173
+ """
174
+ parsed = urlparse(url)
175
+ target_project = project or (parsed.netloc.replace(".", "_") or "url_ingest")
176
+
177
+ async with httpx.AsyncClient(follow_redirects=True, timeout=30) as client:
178
+ response = await client.get(url)
179
+ response.raise_for_status()
180
+
181
+ raw = response.content[:_MAX_URL_BYTES]
182
+ content_type = response.headers.get("content-type", "").lower()
183
+
184
+ if "text/html" in content_type:
185
+ text = self._strip_html(raw.decode("utf-8", errors="replace"))
186
+ doc_type = "markdown"
187
+ else:
188
+ text = raw.decode("utf-8", errors="replace")
189
+ doc_type = "markdown"
190
+
191
+ splitter = TextSplitter(chunk_size=chunk_size, overlap=overlap)
192
+ chunks = splitter.split(text)
193
+
194
+ doc_ids: list[str] = []
195
+ for i, chunk in enumerate(chunks):
196
+ embedding = self._embedding_provider.embed(chunk.content)
197
+ title = f"{parsed.path.rstrip('/').rsplit('/', 1)[-1] or parsed.netloc}_chunk{i}"
198
+ document = await self._document_store.upsert_document(
199
+ title=title,
200
+ content=chunk.content,
201
+ doc_type=doc_type,
202
+ source_path=url,
203
+ project=target_project,
204
+ chunks={"chunk_index": i, "start_char": chunk.start_char, "end_char": chunk.end_char},
205
+ embedding=embedding,
206
+ )
207
+ if self._vector_store and hasattr(self._vector_store, "upsert_document") and embedding:
208
+ await self._vector_store.upsert_document(
209
+ doc_id=document.id,
210
+ embedding=embedding,
211
+ payload={
212
+ "title": title,
213
+ "content": chunk.content,
214
+ "doc_type": doc_type,
215
+ "source_path": url,
216
+ "project": target_project,
217
+ },
218
+ )
219
+ doc_ids.append(str(document.id))
220
+
221
+ return {
222
+ "url": url,
223
+ "project": target_project,
224
+ "chunk_count": len(chunks),
225
+ "doc_ids": doc_ids,
226
+ }
227
+
228
+ # ------------------------------------------------------------------
229
+ # Git ingestion
230
+ # ------------------------------------------------------------------
231
+
232
+ async def minder_ingest_git(
233
+ self,
234
+ repo_url: str,
235
+ *,
236
+ project: str | None = None,
237
+ branch: str | None = None,
238
+ ) -> dict[str, object]:
239
+ """Shallow-clone *repo_url*, ingest its contents, then clean up.
240
+
241
+ The clone is written to a temp directory that is always removed on exit
242
+ (success or failure). Internally delegates to
243
+ :meth:`minder_ingest_directory` so the same chunk→embed→store pipeline
244
+ applies.
245
+
246
+ Args:
247
+ repo_url: HTTPS or SSH git URL.
248
+ project: Project label forwarded to document store. Defaults to
249
+ the repo name derived from the URL.
250
+ branch: Optional branch / tag to clone (``--branch``). When
251
+ ``None`` the remote's default branch is used.
252
+
253
+ Returns a dict with ``repo_url``, ``project``, ``ingested_count``,
254
+ and ``paths``.
255
+ """
256
+ # Derive a sensible project name from the URL path.
257
+ repo_name = urlparse(repo_url).path.rstrip("/").rsplit("/", 1)[-1]
258
+ if repo_name.endswith(".git"):
259
+ repo_name = repo_name[:-4]
260
+ target_project = project or repo_name or "git_ingest"
261
+
262
+ tmp_dir = tempfile.mkdtemp(prefix="minder_git_")
263
+ try:
264
+ cmd = ["git", "clone", "--depth=1", "--single-branch"]
265
+ if branch:
266
+ cmd += ["--branch", branch]
267
+ cmd += [repo_url, tmp_dir]
268
+
269
+ result = subprocess.run(
270
+ cmd,
271
+ capture_output=True,
272
+ text=True,
273
+ timeout=120,
274
+ )
275
+ if result.returncode != 0:
276
+ raise RuntimeError(
277
+ f"git clone failed (exit {result.returncode}): {result.stderr.strip()}"
278
+ )
279
+
280
+ ingest_result = await self.minder_ingest_directory(
281
+ tmp_dir,
282
+ project=target_project,
283
+ )
284
+ finally:
285
+ shutil.rmtree(tmp_dir, ignore_errors=True)
286
+
287
+ return {
288
+ "repo_url": repo_url,
289
+ **ingest_result,
290
+ }
291
+
292
+ # ------------------------------------------------------------------
293
+ # Internal helpers
294
+ # ------------------------------------------------------------------
295
+
296
+ @staticmethod
297
+ def _strip_html(html: str) -> str:
298
+ """Very lightweight HTML → plain-text converter (no deps).
299
+
300
+ Removes ``<script>``/``<style>`` blocks, strips all remaining tags,
301
+ and collapses runs of whitespace.
302
+ """
303
+ import re
304
+
305
+ # Drop script / style blocks entirely.
306
+ html = re.sub(r"<(script|style)[^>]*>.*?</\1>", " ", html, flags=re.DOTALL | re.IGNORECASE)
307
+ # Replace block-level elements with newlines for readability.
308
+ html = re.sub(r"</(p|div|li|h[1-6]|br)>", "\n", html, flags=re.IGNORECASE)
309
+ # Strip remaining tags.
310
+ html = re.sub(r"<[^>]+>", " ", html)
311
+ # Decode common HTML entities.
312
+ for entity, char in (("&amp;", "&"), ("&lt;", "<"), ("&gt;", ">"), ("&quot;", '"'), ("&#39;", "'")):
313
+ html = html.replace(entity, char)
314
+ # Collapse whitespace.
315
+ html = re.sub(r"[ \t]+", " ", html)
316
+ html = re.sub(r"\n{3,}", "\n\n", html)
317
+ return html.strip()
318
+
319
+ @staticmethod
320
+ def _doc_type_for_suffix(suffix: str) -> str:
321
+ if suffix == ".py":
322
+ return "code"
323
+ if suffix in {".json", ".toml", ".yml", ".yaml"}:
324
+ return "config"
325
+ return "markdown"
326
+
327
+ @staticmethod
328
+ def _is_current_file_document(
329
+ document: Any,
330
+ *,
331
+ title: str,
332
+ doc_type: str,
333
+ project: str,
334
+ file_size: int,
335
+ mtime_ns: int,
336
+ vector_enabled: bool,
337
+ ) -> bool:
338
+ chunks = getattr(document, "chunks", {})
339
+ if not isinstance(chunks, dict):
340
+ return False
341
+ if getattr(document, "title", None) != title:
342
+ return False
343
+ if getattr(document, "doc_type", None) != doc_type:
344
+ return False
345
+ if getattr(document, "project", None) != project:
346
+ return False
347
+ if chunks.get("file_size") != file_size:
348
+ return False
349
+ if chunks.get("mtime_ns") != mtime_ns:
350
+ return False
351
+ if vector_enabled and chunks.get("vector_indexed") is not True:
352
+ return False
353
+ return True