minder-cli 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- minder/__init__.py +12 -0
- minder/api/routers/prompts.py +177 -0
- minder/application/__init__.py +1 -0
- minder/application/admin/__init__.py +11 -0
- minder/application/admin/dto.py +453 -0
- minder/application/admin/jobs.py +327 -0
- minder/application/admin/use_cases.py +1895 -0
- minder/auth/__init__.py +12 -0
- minder/auth/context.py +26 -0
- minder/auth/middleware.py +70 -0
- minder/auth/principal.py +59 -0
- minder/auth/rate_limiter.py +89 -0
- minder/auth/rbac.py +60 -0
- minder/auth/service.py +541 -0
- minder/bootstrap/__init__.py +9 -0
- minder/bootstrap/providers.py +109 -0
- minder/bootstrap/transport.py +807 -0
- minder/cache/__init__.py +10 -0
- minder/cache/providers.py +140 -0
- minder/chunking/__init__.py +4 -0
- minder/chunking/code_splitter.py +184 -0
- minder/chunking/splitter.py +136 -0
- minder/cli.py +1542 -0
- minder/config.py +179 -0
- minder/continuity.py +363 -0
- minder/dev.py +160 -0
- minder/embedding/__init__.py +9 -0
- minder/embedding/base.py +7 -0
- minder/embedding/local.py +65 -0
- minder/embedding/openai.py +7 -0
- minder/graph/__init__.py +11 -0
- minder/graph/edges.py +13 -0
- minder/graph/executor.py +127 -0
- minder/graph/graph.py +263 -0
- minder/graph/nodes/__init__.py +27 -0
- minder/graph/nodes/evaluator.py +21 -0
- minder/graph/nodes/guard.py +64 -0
- minder/graph/nodes/llm.py +59 -0
- minder/graph/nodes/planning.py +30 -0
- minder/graph/nodes/reasoning.py +87 -0
- minder/graph/nodes/reranker.py +141 -0
- minder/graph/nodes/retriever.py +86 -0
- minder/graph/nodes/verification.py +230 -0
- minder/graph/nodes/workflow_planner.py +250 -0
- minder/graph/runtime.py +15 -0
- minder/graph/state.py +26 -0
- minder/llm/__init__.py +5 -0
- minder/llm/base.py +14 -0
- minder/llm/local.py +381 -0
- minder/llm/openai.py +89 -0
- minder/models/__init__.py +109 -0
- minder/models/base.py +10 -0
- minder/models/client.py +137 -0
- minder/models/document.py +34 -0
- minder/models/error.py +32 -0
- minder/models/graph.py +114 -0
- minder/models/history.py +32 -0
- minder/models/job.py +62 -0
- minder/models/prompt.py +41 -0
- minder/models/repository.py +62 -0
- minder/models/rule.py +68 -0
- minder/models/session.py +51 -0
- minder/models/skill.py +52 -0
- minder/models/user.py +41 -0
- minder/models/workflow.py +35 -0
- minder/observability/__init__.py +57 -0
- minder/observability/audit.py +243 -0
- minder/observability/logging.py +253 -0
- minder/observability/metrics.py +448 -0
- minder/observability/tracing.py +215 -0
- minder/presentation/__init__.py +1 -0
- minder/presentation/http/__init__.py +1 -0
- minder/presentation/http/admin/__init__.py +3 -0
- minder/presentation/http/admin/api.py +1309 -0
- minder/presentation/http/admin/context.py +94 -0
- minder/presentation/http/admin/dashboard.py +111 -0
- minder/presentation/http/admin/jobs.py +208 -0
- minder/presentation/http/admin/memories.py +185 -0
- minder/presentation/http/admin/prompts.py +219 -0
- minder/presentation/http/admin/routes.py +127 -0
- minder/presentation/http/admin/runtime.py +650 -0
- minder/presentation/http/admin/search.py +368 -0
- minder/presentation/http/admin/skills.py +230 -0
- minder/prompts/__init__.py +646 -0
- minder/prompts/formatter.py +142 -0
- minder/resources/__init__.py +318 -0
- minder/retrieval/__init__.py +5 -0
- minder/retrieval/hybrid.py +178 -0
- minder/retrieval/mmr.py +116 -0
- minder/retrieval/multi_hop.py +115 -0
- minder/runtime.py +15 -0
- minder/server.py +145 -0
- minder/store/__init__.py +64 -0
- minder/store/document.py +115 -0
- minder/store/error.py +82 -0
- minder/store/feedback.py +114 -0
- minder/store/graph.py +588 -0
- minder/store/history.py +57 -0
- minder/store/interfaces.py +512 -0
- minder/store/milvus/__init__.py +11 -0
- minder/store/milvus/client.py +26 -0
- minder/store/milvus/collections.py +15 -0
- minder/store/milvus/vector_store.py +232 -0
- minder/store/mongodb/__init__.py +11 -0
- minder/store/mongodb/client.py +49 -0
- minder/store/mongodb/indexes.py +90 -0
- minder/store/mongodb/operational_store.py +993 -0
- minder/store/relational.py +1087 -0
- minder/store/repo_state.py +58 -0
- minder/store/rule.py +93 -0
- minder/store/vector.py +79 -0
- minder/tools/__init__.py +47 -0
- minder/tools/auth.py +94 -0
- minder/tools/graph.py +839 -0
- minder/tools/ingest.py +353 -0
- minder/tools/memory.py +381 -0
- minder/tools/query.py +307 -0
- minder/tools/registry.py +269 -0
- minder/tools/repo_scanner.py +1266 -0
- minder/tools/search.py +15 -0
- minder/tools/session.py +316 -0
- minder/tools/skills.py +899 -0
- minder/tools/workflow.py +215 -0
- minder/transport/__init__.py +4 -0
- minder/transport/base.py +286 -0
- minder/transport/sse.py +252 -0
- minder/transport/stdio.py +29 -0
- minder_cli-0.2.0.dist-info/METADATA +318 -0
- minder_cli-0.2.0.dist-info/RECORD +132 -0
- minder_cli-0.2.0.dist-info/WHEEL +4 -0
- minder_cli-0.2.0.dist-info/entry_points.txt +2 -0
- minder_cli-0.2.0.dist-info/licenses/LICENSE +201 -0
minder/tools/ingest.py
ADDED
|
@@ -0,0 +1,353 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import shutil
|
|
4
|
+
import subprocess
|
|
5
|
+
import tempfile
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
from urllib.parse import urlparse
|
|
9
|
+
|
|
10
|
+
import httpx
|
|
11
|
+
|
|
12
|
+
from minder.chunking.splitter import TextSplitter
|
|
13
|
+
from minder.embedding.base import EmbeddingProvider
|
|
14
|
+
from minder.store.interfaces import IDocumentRepository
|
|
15
|
+
|
|
16
|
+
SUPPORTED_SUFFIXES = {".py", ".md", ".txt", ".json", ".toml", ".yml", ".yaml"}
|
|
17
|
+
|
|
18
|
+
# Maximum raw bytes to read from a URL response (4 MB).
|
|
19
|
+
_MAX_URL_BYTES = 4 * 1024 * 1024
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class IngestTools:
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
document_store: IDocumentRepository,
|
|
26
|
+
embedding_provider: EmbeddingProvider,
|
|
27
|
+
vector_store: Any | None = None,
|
|
28
|
+
) -> None:
|
|
29
|
+
self._document_store = document_store
|
|
30
|
+
self._embedding_provider = embedding_provider
|
|
31
|
+
self._vector_store = vector_store
|
|
32
|
+
|
|
33
|
+
async def minder_ingest_file(self, path: str, *, project: str | None = None) -> dict[str, object]:
|
|
34
|
+
file_path = Path(path)
|
|
35
|
+
doc_type = self._doc_type_for_suffix(file_path.suffix)
|
|
36
|
+
target_project = project or file_path.parent.name
|
|
37
|
+
file_stat = file_path.stat()
|
|
38
|
+
existing = await self._document_store.get_document_by_path(
|
|
39
|
+
str(file_path),
|
|
40
|
+
project=target_project,
|
|
41
|
+
)
|
|
42
|
+
vector_enabled = bool(self._vector_store and hasattr(self._vector_store, "upsert_document"))
|
|
43
|
+
|
|
44
|
+
if existing is not None and self._is_current_file_document(
|
|
45
|
+
existing,
|
|
46
|
+
title=file_path.name,
|
|
47
|
+
doc_type=doc_type,
|
|
48
|
+
project=target_project,
|
|
49
|
+
file_size=file_stat.st_size,
|
|
50
|
+
mtime_ns=file_stat.st_mtime_ns,
|
|
51
|
+
vector_enabled=vector_enabled,
|
|
52
|
+
):
|
|
53
|
+
return {
|
|
54
|
+
"document_id": existing.id,
|
|
55
|
+
"path": str(file_path),
|
|
56
|
+
"project": target_project,
|
|
57
|
+
"doc_type": doc_type,
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
content = file_path.read_text(encoding="utf-8")
|
|
61
|
+
embedding = self._embedding_provider.embed(content)
|
|
62
|
+
chunks = {
|
|
63
|
+
"size": len(content),
|
|
64
|
+
"file_size": file_stat.st_size,
|
|
65
|
+
"mtime_ns": file_stat.st_mtime_ns,
|
|
66
|
+
"vector_indexed": not vector_enabled,
|
|
67
|
+
}
|
|
68
|
+
document = await self._document_store.upsert_document(
|
|
69
|
+
title=file_path.name,
|
|
70
|
+
content=content,
|
|
71
|
+
doc_type=doc_type,
|
|
72
|
+
source_path=str(file_path),
|
|
73
|
+
project=target_project,
|
|
74
|
+
chunks=chunks,
|
|
75
|
+
embedding=embedding,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
if self._vector_store and vector_enabled and embedding:
|
|
79
|
+
await self._vector_store.upsert_document(
|
|
80
|
+
doc_id=document.id,
|
|
81
|
+
embedding=embedding,
|
|
82
|
+
payload={
|
|
83
|
+
"title": file_path.name,
|
|
84
|
+
"content": content,
|
|
85
|
+
"doc_type": doc_type,
|
|
86
|
+
"source_path": str(file_path),
|
|
87
|
+
"project": target_project,
|
|
88
|
+
}
|
|
89
|
+
)
|
|
90
|
+
chunks["vector_indexed"] = True
|
|
91
|
+
document = await self._document_store.upsert_document(
|
|
92
|
+
title=file_path.name,
|
|
93
|
+
content=content,
|
|
94
|
+
doc_type=doc_type,
|
|
95
|
+
source_path=str(file_path),
|
|
96
|
+
project=target_project,
|
|
97
|
+
chunks=chunks,
|
|
98
|
+
embedding=embedding,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
return {
|
|
102
|
+
"document_id": document.id,
|
|
103
|
+
"path": str(file_path),
|
|
104
|
+
"project": target_project,
|
|
105
|
+
"doc_type": doc_type,
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
async def minder_ingest_directory(
|
|
109
|
+
self,
|
|
110
|
+
path: str,
|
|
111
|
+
*,
|
|
112
|
+
project: str | None = None,
|
|
113
|
+
) -> dict[str, object]:
|
|
114
|
+
root = Path(path)
|
|
115
|
+
target_project = project or root.name
|
|
116
|
+
ingested_paths: set[str] = set()
|
|
117
|
+
ingested_count = 0
|
|
118
|
+
|
|
119
|
+
for file_path in root.rglob("*"):
|
|
120
|
+
if not file_path.is_file():
|
|
121
|
+
continue
|
|
122
|
+
if any(part.startswith(".") and part != ".minder" for part in file_path.parts):
|
|
123
|
+
continue
|
|
124
|
+
if file_path.suffix not in SUPPORTED_SUFFIXES:
|
|
125
|
+
continue
|
|
126
|
+
await self.minder_ingest_file(str(file_path), project=target_project)
|
|
127
|
+
ingested_paths.add(str(file_path))
|
|
128
|
+
ingested_count += 1
|
|
129
|
+
|
|
130
|
+
# We first need to get the list of documents that WILL be deleted
|
|
131
|
+
docs_to_delete = []
|
|
132
|
+
if self._vector_store and hasattr(self._vector_store, "delete_documents"):
|
|
133
|
+
existing = await self._document_store.list_documents(project=target_project)
|
|
134
|
+
docs_to_delete = [
|
|
135
|
+
doc.id for doc in existing
|
|
136
|
+
if doc.source_path not in ingested_paths
|
|
137
|
+
]
|
|
138
|
+
|
|
139
|
+
await self._document_store.delete_documents_not_in_paths(
|
|
140
|
+
project=target_project,
|
|
141
|
+
keep_paths=ingested_paths,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
if docs_to_delete and self._vector_store and hasattr(self._vector_store, "delete_documents"):
|
|
145
|
+
await self._vector_store.delete_documents(docs_to_delete)
|
|
146
|
+
return {
|
|
147
|
+
"project": target_project,
|
|
148
|
+
"ingested_count": ingested_count,
|
|
149
|
+
"paths": sorted(ingested_paths),
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
# ------------------------------------------------------------------
|
|
153
|
+
# URL ingestion
|
|
154
|
+
# ------------------------------------------------------------------
|
|
155
|
+
|
|
156
|
+
async def minder_ingest_url(
|
|
157
|
+
self,
|
|
158
|
+
url: str,
|
|
159
|
+
*,
|
|
160
|
+
project: str | None = None,
|
|
161
|
+
chunk_size: int = 512,
|
|
162
|
+
overlap: int = 64,
|
|
163
|
+
) -> dict[str, object]:
|
|
164
|
+
"""Fetch *url* via HTTP, chunk the text, embed, and upsert each chunk.
|
|
165
|
+
|
|
166
|
+
Content-type detection:
|
|
167
|
+
- ``text/html``: strip tags naively (extract visible text via a
|
|
168
|
+
whitespace-collapse pass — no external HTML parser required).
|
|
169
|
+
- ``text/markdown`` / ``text/plain`` / unknown text: use as-is.
|
|
170
|
+
|
|
171
|
+
Returns a summary dict with ``url``, ``project``, ``chunk_count``,
|
|
172
|
+
and ``doc_ids`` (list of upserted document IDs).
|
|
173
|
+
"""
|
|
174
|
+
parsed = urlparse(url)
|
|
175
|
+
target_project = project or (parsed.netloc.replace(".", "_") or "url_ingest")
|
|
176
|
+
|
|
177
|
+
async with httpx.AsyncClient(follow_redirects=True, timeout=30) as client:
|
|
178
|
+
response = await client.get(url)
|
|
179
|
+
response.raise_for_status()
|
|
180
|
+
|
|
181
|
+
raw = response.content[:_MAX_URL_BYTES]
|
|
182
|
+
content_type = response.headers.get("content-type", "").lower()
|
|
183
|
+
|
|
184
|
+
if "text/html" in content_type:
|
|
185
|
+
text = self._strip_html(raw.decode("utf-8", errors="replace"))
|
|
186
|
+
doc_type = "markdown"
|
|
187
|
+
else:
|
|
188
|
+
text = raw.decode("utf-8", errors="replace")
|
|
189
|
+
doc_type = "markdown"
|
|
190
|
+
|
|
191
|
+
splitter = TextSplitter(chunk_size=chunk_size, overlap=overlap)
|
|
192
|
+
chunks = splitter.split(text)
|
|
193
|
+
|
|
194
|
+
doc_ids: list[str] = []
|
|
195
|
+
for i, chunk in enumerate(chunks):
|
|
196
|
+
embedding = self._embedding_provider.embed(chunk.content)
|
|
197
|
+
title = f"{parsed.path.rstrip('/').rsplit('/', 1)[-1] or parsed.netloc}_chunk{i}"
|
|
198
|
+
document = await self._document_store.upsert_document(
|
|
199
|
+
title=title,
|
|
200
|
+
content=chunk.content,
|
|
201
|
+
doc_type=doc_type,
|
|
202
|
+
source_path=url,
|
|
203
|
+
project=target_project,
|
|
204
|
+
chunks={"chunk_index": i, "start_char": chunk.start_char, "end_char": chunk.end_char},
|
|
205
|
+
embedding=embedding,
|
|
206
|
+
)
|
|
207
|
+
if self._vector_store and hasattr(self._vector_store, "upsert_document") and embedding:
|
|
208
|
+
await self._vector_store.upsert_document(
|
|
209
|
+
doc_id=document.id,
|
|
210
|
+
embedding=embedding,
|
|
211
|
+
payload={
|
|
212
|
+
"title": title,
|
|
213
|
+
"content": chunk.content,
|
|
214
|
+
"doc_type": doc_type,
|
|
215
|
+
"source_path": url,
|
|
216
|
+
"project": target_project,
|
|
217
|
+
},
|
|
218
|
+
)
|
|
219
|
+
doc_ids.append(str(document.id))
|
|
220
|
+
|
|
221
|
+
return {
|
|
222
|
+
"url": url,
|
|
223
|
+
"project": target_project,
|
|
224
|
+
"chunk_count": len(chunks),
|
|
225
|
+
"doc_ids": doc_ids,
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
# ------------------------------------------------------------------
|
|
229
|
+
# Git ingestion
|
|
230
|
+
# ------------------------------------------------------------------
|
|
231
|
+
|
|
232
|
+
async def minder_ingest_git(
|
|
233
|
+
self,
|
|
234
|
+
repo_url: str,
|
|
235
|
+
*,
|
|
236
|
+
project: str | None = None,
|
|
237
|
+
branch: str | None = None,
|
|
238
|
+
) -> dict[str, object]:
|
|
239
|
+
"""Shallow-clone *repo_url*, ingest its contents, then clean up.
|
|
240
|
+
|
|
241
|
+
The clone is written to a temp directory that is always removed on exit
|
|
242
|
+
(success or failure). Internally delegates to
|
|
243
|
+
:meth:`minder_ingest_directory` so the same chunk→embed→store pipeline
|
|
244
|
+
applies.
|
|
245
|
+
|
|
246
|
+
Args:
|
|
247
|
+
repo_url: HTTPS or SSH git URL.
|
|
248
|
+
project: Project label forwarded to document store. Defaults to
|
|
249
|
+
the repo name derived from the URL.
|
|
250
|
+
branch: Optional branch / tag to clone (``--branch``). When
|
|
251
|
+
``None`` the remote's default branch is used.
|
|
252
|
+
|
|
253
|
+
Returns a dict with ``repo_url``, ``project``, ``ingested_count``,
|
|
254
|
+
and ``paths``.
|
|
255
|
+
"""
|
|
256
|
+
# Derive a sensible project name from the URL path.
|
|
257
|
+
repo_name = urlparse(repo_url).path.rstrip("/").rsplit("/", 1)[-1]
|
|
258
|
+
if repo_name.endswith(".git"):
|
|
259
|
+
repo_name = repo_name[:-4]
|
|
260
|
+
target_project = project or repo_name or "git_ingest"
|
|
261
|
+
|
|
262
|
+
tmp_dir = tempfile.mkdtemp(prefix="minder_git_")
|
|
263
|
+
try:
|
|
264
|
+
cmd = ["git", "clone", "--depth=1", "--single-branch"]
|
|
265
|
+
if branch:
|
|
266
|
+
cmd += ["--branch", branch]
|
|
267
|
+
cmd += [repo_url, tmp_dir]
|
|
268
|
+
|
|
269
|
+
result = subprocess.run(
|
|
270
|
+
cmd,
|
|
271
|
+
capture_output=True,
|
|
272
|
+
text=True,
|
|
273
|
+
timeout=120,
|
|
274
|
+
)
|
|
275
|
+
if result.returncode != 0:
|
|
276
|
+
raise RuntimeError(
|
|
277
|
+
f"git clone failed (exit {result.returncode}): {result.stderr.strip()}"
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
ingest_result = await self.minder_ingest_directory(
|
|
281
|
+
tmp_dir,
|
|
282
|
+
project=target_project,
|
|
283
|
+
)
|
|
284
|
+
finally:
|
|
285
|
+
shutil.rmtree(tmp_dir, ignore_errors=True)
|
|
286
|
+
|
|
287
|
+
return {
|
|
288
|
+
"repo_url": repo_url,
|
|
289
|
+
**ingest_result,
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
# ------------------------------------------------------------------
|
|
293
|
+
# Internal helpers
|
|
294
|
+
# ------------------------------------------------------------------
|
|
295
|
+
|
|
296
|
+
@staticmethod
|
|
297
|
+
def _strip_html(html: str) -> str:
|
|
298
|
+
"""Very lightweight HTML → plain-text converter (no deps).
|
|
299
|
+
|
|
300
|
+
Removes ``<script>``/``<style>`` blocks, strips all remaining tags,
|
|
301
|
+
and collapses runs of whitespace.
|
|
302
|
+
"""
|
|
303
|
+
import re
|
|
304
|
+
|
|
305
|
+
# Drop script / style blocks entirely.
|
|
306
|
+
html = re.sub(r"<(script|style)[^>]*>.*?</\1>", " ", html, flags=re.DOTALL | re.IGNORECASE)
|
|
307
|
+
# Replace block-level elements with newlines for readability.
|
|
308
|
+
html = re.sub(r"</(p|div|li|h[1-6]|br)>", "\n", html, flags=re.IGNORECASE)
|
|
309
|
+
# Strip remaining tags.
|
|
310
|
+
html = re.sub(r"<[^>]+>", " ", html)
|
|
311
|
+
# Decode common HTML entities.
|
|
312
|
+
for entity, char in (("&", "&"), ("<", "<"), (">", ">"), (""", '"'), ("'", "'")):
|
|
313
|
+
html = html.replace(entity, char)
|
|
314
|
+
# Collapse whitespace.
|
|
315
|
+
html = re.sub(r"[ \t]+", " ", html)
|
|
316
|
+
html = re.sub(r"\n{3,}", "\n\n", html)
|
|
317
|
+
return html.strip()
|
|
318
|
+
|
|
319
|
+
@staticmethod
|
|
320
|
+
def _doc_type_for_suffix(suffix: str) -> str:
|
|
321
|
+
if suffix == ".py":
|
|
322
|
+
return "code"
|
|
323
|
+
if suffix in {".json", ".toml", ".yml", ".yaml"}:
|
|
324
|
+
return "config"
|
|
325
|
+
return "markdown"
|
|
326
|
+
|
|
327
|
+
@staticmethod
|
|
328
|
+
def _is_current_file_document(
|
|
329
|
+
document: Any,
|
|
330
|
+
*,
|
|
331
|
+
title: str,
|
|
332
|
+
doc_type: str,
|
|
333
|
+
project: str,
|
|
334
|
+
file_size: int,
|
|
335
|
+
mtime_ns: int,
|
|
336
|
+
vector_enabled: bool,
|
|
337
|
+
) -> bool:
|
|
338
|
+
chunks = getattr(document, "chunks", {})
|
|
339
|
+
if not isinstance(chunks, dict):
|
|
340
|
+
return False
|
|
341
|
+
if getattr(document, "title", None) != title:
|
|
342
|
+
return False
|
|
343
|
+
if getattr(document, "doc_type", None) != doc_type:
|
|
344
|
+
return False
|
|
345
|
+
if getattr(document, "project", None) != project:
|
|
346
|
+
return False
|
|
347
|
+
if chunks.get("file_size") != file_size:
|
|
348
|
+
return False
|
|
349
|
+
if chunks.get("mtime_ns") != mtime_ns:
|
|
350
|
+
return False
|
|
351
|
+
if vector_enabled and chunks.get("vector_indexed") is not True:
|
|
352
|
+
return False
|
|
353
|
+
return True
|