minder-cli 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. minder/__init__.py +12 -0
  2. minder/api/routers/prompts.py +177 -0
  3. minder/application/__init__.py +1 -0
  4. minder/application/admin/__init__.py +11 -0
  5. minder/application/admin/dto.py +453 -0
  6. minder/application/admin/jobs.py +327 -0
  7. minder/application/admin/use_cases.py +1895 -0
  8. minder/auth/__init__.py +12 -0
  9. minder/auth/context.py +26 -0
  10. minder/auth/middleware.py +70 -0
  11. minder/auth/principal.py +59 -0
  12. minder/auth/rate_limiter.py +89 -0
  13. minder/auth/rbac.py +60 -0
  14. minder/auth/service.py +541 -0
  15. minder/bootstrap/__init__.py +9 -0
  16. minder/bootstrap/providers.py +109 -0
  17. minder/bootstrap/transport.py +807 -0
  18. minder/cache/__init__.py +10 -0
  19. minder/cache/providers.py +140 -0
  20. minder/chunking/__init__.py +4 -0
  21. minder/chunking/code_splitter.py +184 -0
  22. minder/chunking/splitter.py +136 -0
  23. minder/cli.py +1542 -0
  24. minder/config.py +179 -0
  25. minder/continuity.py +363 -0
  26. minder/dev.py +160 -0
  27. minder/embedding/__init__.py +9 -0
  28. minder/embedding/base.py +7 -0
  29. minder/embedding/local.py +65 -0
  30. minder/embedding/openai.py +7 -0
  31. minder/graph/__init__.py +11 -0
  32. minder/graph/edges.py +13 -0
  33. minder/graph/executor.py +127 -0
  34. minder/graph/graph.py +263 -0
  35. minder/graph/nodes/__init__.py +27 -0
  36. minder/graph/nodes/evaluator.py +21 -0
  37. minder/graph/nodes/guard.py +64 -0
  38. minder/graph/nodes/llm.py +59 -0
  39. minder/graph/nodes/planning.py +30 -0
  40. minder/graph/nodes/reasoning.py +87 -0
  41. minder/graph/nodes/reranker.py +141 -0
  42. minder/graph/nodes/retriever.py +86 -0
  43. minder/graph/nodes/verification.py +230 -0
  44. minder/graph/nodes/workflow_planner.py +250 -0
  45. minder/graph/runtime.py +15 -0
  46. minder/graph/state.py +26 -0
  47. minder/llm/__init__.py +5 -0
  48. minder/llm/base.py +14 -0
  49. minder/llm/local.py +381 -0
  50. minder/llm/openai.py +89 -0
  51. minder/models/__init__.py +109 -0
  52. minder/models/base.py +10 -0
  53. minder/models/client.py +137 -0
  54. minder/models/document.py +34 -0
  55. minder/models/error.py +32 -0
  56. minder/models/graph.py +114 -0
  57. minder/models/history.py +32 -0
  58. minder/models/job.py +62 -0
  59. minder/models/prompt.py +41 -0
  60. minder/models/repository.py +62 -0
  61. minder/models/rule.py +68 -0
  62. minder/models/session.py +51 -0
  63. minder/models/skill.py +52 -0
  64. minder/models/user.py +41 -0
  65. minder/models/workflow.py +35 -0
  66. minder/observability/__init__.py +57 -0
  67. minder/observability/audit.py +243 -0
  68. minder/observability/logging.py +253 -0
  69. minder/observability/metrics.py +448 -0
  70. minder/observability/tracing.py +215 -0
  71. minder/presentation/__init__.py +1 -0
  72. minder/presentation/http/__init__.py +1 -0
  73. minder/presentation/http/admin/__init__.py +3 -0
  74. minder/presentation/http/admin/api.py +1309 -0
  75. minder/presentation/http/admin/context.py +94 -0
  76. minder/presentation/http/admin/dashboard.py +111 -0
  77. minder/presentation/http/admin/jobs.py +208 -0
  78. minder/presentation/http/admin/memories.py +185 -0
  79. minder/presentation/http/admin/prompts.py +219 -0
  80. minder/presentation/http/admin/routes.py +127 -0
  81. minder/presentation/http/admin/runtime.py +650 -0
  82. minder/presentation/http/admin/search.py +368 -0
  83. minder/presentation/http/admin/skills.py +230 -0
  84. minder/prompts/__init__.py +646 -0
  85. minder/prompts/formatter.py +142 -0
  86. minder/resources/__init__.py +318 -0
  87. minder/retrieval/__init__.py +5 -0
  88. minder/retrieval/hybrid.py +178 -0
  89. minder/retrieval/mmr.py +116 -0
  90. minder/retrieval/multi_hop.py +115 -0
  91. minder/runtime.py +15 -0
  92. minder/server.py +145 -0
  93. minder/store/__init__.py +64 -0
  94. minder/store/document.py +115 -0
  95. minder/store/error.py +82 -0
  96. minder/store/feedback.py +114 -0
  97. minder/store/graph.py +588 -0
  98. minder/store/history.py +57 -0
  99. minder/store/interfaces.py +512 -0
  100. minder/store/milvus/__init__.py +11 -0
  101. minder/store/milvus/client.py +26 -0
  102. minder/store/milvus/collections.py +15 -0
  103. minder/store/milvus/vector_store.py +232 -0
  104. minder/store/mongodb/__init__.py +11 -0
  105. minder/store/mongodb/client.py +49 -0
  106. minder/store/mongodb/indexes.py +90 -0
  107. minder/store/mongodb/operational_store.py +993 -0
  108. minder/store/relational.py +1087 -0
  109. minder/store/repo_state.py +58 -0
  110. minder/store/rule.py +93 -0
  111. minder/store/vector.py +79 -0
  112. minder/tools/__init__.py +47 -0
  113. minder/tools/auth.py +94 -0
  114. minder/tools/graph.py +839 -0
  115. minder/tools/ingest.py +353 -0
  116. minder/tools/memory.py +381 -0
  117. minder/tools/query.py +307 -0
  118. minder/tools/registry.py +269 -0
  119. minder/tools/repo_scanner.py +1266 -0
  120. minder/tools/search.py +15 -0
  121. minder/tools/session.py +316 -0
  122. minder/tools/skills.py +899 -0
  123. minder/tools/workflow.py +215 -0
  124. minder/transport/__init__.py +4 -0
  125. minder/transport/base.py +286 -0
  126. minder/transport/sse.py +252 -0
  127. minder/transport/stdio.py +29 -0
  128. minder_cli-0.2.0.dist-info/METADATA +318 -0
  129. minder_cli-0.2.0.dist-info/RECORD +132 -0
  130. minder_cli-0.2.0.dist-info/WHEEL +4 -0
  131. minder_cli-0.2.0.dist-info/entry_points.txt +2 -0
  132. minder_cli-0.2.0.dist-info/licenses/LICENSE +201 -0
minder/tools/skills.py ADDED
@@ -0,0 +1,899 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import math
5
+ import subprocess
6
+ import tempfile
7
+ import uuid
8
+ from dataclasses import dataclass
9
+ from datetime import UTC, datetime
10
+ from pathlib import Path
11
+ from typing import Any
12
+ from urllib.parse import urlparse
13
+ from collections.abc import Awaitable, Callable
14
+
15
+ from minder.continuity import compatibility_score_for_memory, step_keywords
16
+ from minder.config import MinderConfig
17
+ from minder.embedding.local import LocalEmbeddingProvider
18
+ from minder.observability.metrics import record_continuity_skill_recall
19
+ from minder.store.interfaces import IOperationalStore
20
+
21
+
22
+ @dataclass(frozen=True)
23
+ class _ImportTarget:
24
+ source_path: str
25
+ files: tuple[Path, ...]
26
+
27
+
28
+ class SkillTools:
29
+ _ALLOWED_EXCERPT_KINDS = {"none", "reusable_excerpt"}
30
+ _IMPORT_SUFFIXES = {".json", ".md", ".markdown", ".txt"}
31
+ _CANONICAL_SKILL_FILENAMES = {
32
+ "skill.md",
33
+ "skill.markdown",
34
+ "skill.txt",
35
+ }
36
+ _DEFAULT_IMPORT_SOURCE_PATH = "skills"
37
+ _AUTO_IMPORT_SOURCE_PATH = "auto"
38
+ _DISCOVERY_DIRECTORY_NAMES = {
39
+ "skill",
40
+ "skills",
41
+ "skill-pack",
42
+ "skill-packs",
43
+ "skill_pack",
44
+ "skill_packs",
45
+ "skillpacks",
46
+ "playbook",
47
+ "playbooks",
48
+ "runbook",
49
+ "runbooks",
50
+ "checklists",
51
+ }
52
+ _DISCOVERY_FILE_HINTS = ("skill", "playbook", "runbook", "checklist")
53
+ _PRUNED_IMPORT_NAMES = {
54
+ ".git",
55
+ ".hg",
56
+ ".svn",
57
+ ".venv",
58
+ "venv",
59
+ "node_modules",
60
+ "dist",
61
+ "build",
62
+ "coverage",
63
+ "__pycache__",
64
+ ".mypy_cache",
65
+ ".pytest_cache",
66
+ }
67
+ _ALLOWED_HIDDEN_IMPORT_DIRS = {".agents", ".minder"}
68
+ _ARTIFACT_TAGS = {
69
+ "problem_statement",
70
+ "acceptance_criteria",
71
+ "analysis_notes",
72
+ "use_cases",
73
+ "test_plan",
74
+ "failing_tests",
75
+ "implementation_notes",
76
+ "changed_files",
77
+ "verification_report",
78
+ "test_results",
79
+ "review_notes",
80
+ "approval_summary",
81
+ "release_notes",
82
+ "rollback_plan",
83
+ "step_notes",
84
+ }
85
+
86
+ def __init__(self, store: IOperationalStore, config: MinderConfig) -> None:
87
+ self._store = store
88
+ self._embedder = LocalEmbeddingProvider(
89
+ config.embedding.model_path,
90
+ dimensions=min(config.embedding.dimensions, 16),
91
+ runtime="auto",
92
+ )
93
+
94
+ async def minder_skill_store(
95
+ self,
96
+ *,
97
+ title: str,
98
+ content: str,
99
+ language: str,
100
+ tags: list[str] | None = None,
101
+ workflow_steps: list[str] | None = None,
102
+ artifact_types: list[str] | None = None,
103
+ provenance: str | None = None,
104
+ quality_score: float = 0.0,
105
+ source_metadata: dict[str, Any] | None = None,
106
+ excerpt_kind: str = "none",
107
+ ) -> dict[str, Any]:
108
+ skill = await self._store.create_skill(
109
+ id=uuid.uuid4(),
110
+ title=title,
111
+ content=content,
112
+ language=language,
113
+ tags=self._normalized_tags(
114
+ tags=tags,
115
+ workflow_steps=workflow_steps,
116
+ artifact_types=artifact_types,
117
+ provenance=provenance,
118
+ ),
119
+ embedding=self._embedder.embed(f"{title}\n{content}"),
120
+ usage_count=0,
121
+ quality_score=max(float(quality_score), 0.0),
122
+ source_metadata=self._normalized_source_metadata(source_metadata),
123
+ excerpt_kind=self._validated_excerpt_kind(excerpt_kind),
124
+ )
125
+ return self._serialize_skill(skill)
126
+
127
+ async def minder_skill_recall(
128
+ self,
129
+ query: str,
130
+ *,
131
+ limit: int = 5,
132
+ current_step: str | None = None,
133
+ artifact_type: str | None = None,
134
+ min_quality_score: float = 0.0,
135
+ ) -> list[dict[str, Any]]:
136
+ query_embedding = self._embedder.embed(query)
137
+ ranked: list[dict[str, Any]] = []
138
+ for skill in await self._store.list_skills():
139
+ quality_score = float(getattr(skill, "quality_score", 0.0) or 0.0)
140
+ if quality_score < min_quality_score:
141
+ continue
142
+ embedding = skill.embedding if isinstance(skill.embedding, list) else None
143
+ if not embedding:
144
+ continue
145
+ semantic_score = self._cosine_similarity(query_embedding, embedding)
146
+ compatibility_score, compatibility_reasons = compatibility_score_for_memory(
147
+ tags=list(skill.tags) if isinstance(skill.tags, list) else [],
148
+ title=str(skill.title),
149
+ content=str(skill.content),
150
+ current_step=current_step,
151
+ artifact_type=artifact_type,
152
+ )
153
+ blended_score = min(
154
+ (semantic_score * 0.65)
155
+ + (compatibility_score * 0.2)
156
+ + (min(quality_score, 1.0) * 0.15),
157
+ 1.5,
158
+ )
159
+ ranked_item = {
160
+ **self._serialize_skill(skill),
161
+ "semantic_score": round(semantic_score, 4),
162
+ "step_compatibility": round(compatibility_score, 4),
163
+ "continuity_reasons": compatibility_reasons,
164
+ "score": round(blended_score, 4),
165
+ }
166
+ ranked.append(ranked_item)
167
+ ranked.sort(key=lambda item: float(item["score"]), reverse=True)
168
+ limited = ranked[:limit]
169
+ for item in limited:
170
+ record_continuity_skill_recall(
171
+ step_compatibility=float(item["step_compatibility"]),
172
+ quality_score=float(item["quality_score"]),
173
+ )
174
+ return limited
175
+
176
+ async def minder_skill_list(
177
+ self,
178
+ *,
179
+ current_step: str | None = None,
180
+ tag: str | None = None,
181
+ min_quality_score: float = 0.0,
182
+ ) -> list[dict[str, Any]]:
183
+ required_tags = {
184
+ str(tag).strip().lower()
185
+ for tag in [tag]
186
+ if tag is not None and str(tag).strip()
187
+ }
188
+ if current_step:
189
+ required_tags.update(step_keywords(current_step))
190
+ items: list[dict[str, Any]] = []
191
+ for skill in await self._store.list_skills():
192
+ quality_score = float(getattr(skill, "quality_score", 0.0) or 0.0)
193
+ if quality_score < min_quality_score:
194
+ continue
195
+ normalized_tags = {
196
+ str(item).strip().lower()
197
+ for item in list(getattr(skill, "tags", []) or [])
198
+ if str(item).strip()
199
+ }
200
+ if required_tags and not required_tags <= normalized_tags:
201
+ continue
202
+ items.append(self._serialize_skill(skill))
203
+ items.sort(
204
+ key=lambda item: (-float(item["quality_score"]), str(item["title"]).lower())
205
+ )
206
+ return items
207
+
208
+ async def minder_skill_update(
209
+ self,
210
+ skill_id: str,
211
+ *,
212
+ title: str | None = None,
213
+ content: str | None = None,
214
+ language: str | None = None,
215
+ tags: list[str] | None = None,
216
+ workflow_steps: list[str] | None = None,
217
+ artifact_types: list[str] | None = None,
218
+ provenance: str | None = None,
219
+ quality_score: float | None = None,
220
+ source_metadata: dict[str, Any] | None = None,
221
+ excerpt_kind: str | None = None,
222
+ ) -> dict[str, Any]:
223
+ existing = await self._store.get_skill_by_id(uuid.UUID(skill_id))
224
+ if existing is None:
225
+ raise ValueError(f"Skill not found: {skill_id}")
226
+
227
+ update_data: dict[str, Any] = {}
228
+ next_title = title if title is not None else str(existing.title)
229
+ next_content = content if content is not None else str(existing.content)
230
+ if title is not None:
231
+ update_data["title"] = title
232
+ if content is not None:
233
+ update_data["content"] = content
234
+ if language is not None:
235
+ update_data["language"] = language
236
+ if quality_score is not None:
237
+ update_data["quality_score"] = max(float(quality_score), 0.0)
238
+ if source_metadata is not None:
239
+ update_data["source_metadata"] = self._normalized_source_metadata(
240
+ source_metadata
241
+ )
242
+ if excerpt_kind is not None:
243
+ update_data["excerpt_kind"] = self._validated_excerpt_kind(excerpt_kind)
244
+ if any(
245
+ value is not None
246
+ for value in (tags, workflow_steps, artifact_types, provenance)
247
+ ):
248
+ update_data["tags"] = self._normalized_tags(
249
+ tags=(
250
+ tags
251
+ if tags is not None
252
+ else list(getattr(existing, "tags", []) or [])
253
+ ),
254
+ workflow_steps=workflow_steps,
255
+ artifact_types=artifact_types,
256
+ provenance=provenance,
257
+ )
258
+ if title is not None or content is not None:
259
+ update_data["embedding"] = self._embedder.embed(
260
+ f"{next_title}\n{next_content}"
261
+ )
262
+ updated = await self._store.update_skill(uuid.UUID(skill_id), **update_data)
263
+ if updated is None:
264
+ raise ValueError(f"Skill not found: {skill_id}")
265
+ return self._serialize_skill(updated)
266
+
267
+ async def minder_skill_import_git(
268
+ self,
269
+ *,
270
+ repo_url: str,
271
+ source_path: str = "skills",
272
+ ref: str | None = None,
273
+ provider: str | None = None,
274
+ excerpt_kind: str = "none",
275
+ progress_callback: Callable[[dict[str, Any]], Awaitable[None]] | None = None,
276
+ ) -> dict[str, Any]:
277
+ normalized_repo_url = self._normalize_repo_url(repo_url)
278
+ normalized_source_path = self._normalize_source_path(source_path)
279
+ resolved_provider = self._resolve_provider(provider, normalized_repo_url)
280
+ validated_excerpt_kind = self._validated_excerpt_kind(excerpt_kind)
281
+
282
+ async def emit_progress(**payload: Any) -> None:
283
+ if progress_callback is None:
284
+ return
285
+ await progress_callback(payload)
286
+
287
+ with tempfile.TemporaryDirectory(prefix="minder-skill-import-") as tmp_dir:
288
+ await emit_progress(
289
+ event_type="clone_started",
290
+ message="Cloning Git repository",
291
+ )
292
+ command = ["git", "clone", "--depth", "1"]
293
+ if ref:
294
+ command += ["--branch", ref]
295
+ command += [repo_url, tmp_dir]
296
+ result = subprocess.run(
297
+ command,
298
+ capture_output=True,
299
+ text=True,
300
+ check=False,
301
+ )
302
+ if result.returncode != 0:
303
+ message = (
304
+ result.stderr.strip() or result.stdout.strip() or "git clone failed"
305
+ )
306
+ raise ValueError(message)
307
+
308
+ repo_root = Path(tmp_dir)
309
+ import_targets = self._resolve_import_targets(
310
+ repo_root=repo_root,
311
+ source_path=normalized_source_path,
312
+ )
313
+ await emit_progress(
314
+ event_type="discovery_completed",
315
+ message="Resolved import targets",
316
+ details={
317
+ "resolved_paths": [target.source_path for target in import_targets],
318
+ },
319
+ )
320
+
321
+ existing_by_source_key = self._skills_by_source_key(
322
+ await self._store.list_skills()
323
+ )
324
+ imported: list[dict[str, Any]] = []
325
+ created_count = 0
326
+ updated_count = 0
327
+ imported_file_paths: set[str] = set()
328
+ total_files = sum(len(target.files) for target in import_targets)
329
+ processed_files = 0
330
+
331
+ for target in import_targets:
332
+ for file_path in target.files:
333
+ relative_file_path = file_path.relative_to(repo_root).as_posix()
334
+ if relative_file_path in imported_file_paths:
335
+ continue
336
+ imported_file_paths.add(relative_file_path)
337
+ processed_files += 1
338
+ await emit_progress(
339
+ event_type="file_processing",
340
+ message=f"Processing {relative_file_path}",
341
+ progress_current=processed_files,
342
+ progress_total=total_files,
343
+ details={
344
+ "resolved_path": target.source_path,
345
+ "file_path": relative_file_path,
346
+ },
347
+ )
348
+ documents = self._load_import_documents(file_path)
349
+ for index, document in enumerate(documents):
350
+ auxiliary_paths = self._collect_auxiliary_paths(
351
+ repo_root=repo_root,
352
+ file_path=file_path,
353
+ )
354
+ source_metadata = self._build_import_source_metadata(
355
+ provider=resolved_provider,
356
+ repo_url=normalized_repo_url,
357
+ ref=ref,
358
+ source_path=target.source_path,
359
+ file_path=relative_file_path,
360
+ document_index=index,
361
+ auxiliary_paths=auxiliary_paths,
362
+ )
363
+ source_key = str(source_metadata["import_key"])
364
+ existing = existing_by_source_key.get(source_key)
365
+ next_excerpt_kind = document.get(
366
+ "excerpt_kind", validated_excerpt_kind
367
+ )
368
+ if existing is None:
369
+ stored = await self.minder_skill_store(
370
+ title=document["title"],
371
+ content=document["content"],
372
+ language=document["language"],
373
+ tags=document["tags"],
374
+ workflow_steps=document["workflow_steps"],
375
+ artifact_types=document["artifact_types"],
376
+ provenance=document["provenance"],
377
+ quality_score=document["quality_score"],
378
+ source_metadata=source_metadata,
379
+ excerpt_kind=next_excerpt_kind,
380
+ )
381
+ created_count += 1
382
+ imported.append(
383
+ {
384
+ "action": "created",
385
+ "id": stored["id"],
386
+ "title": stored["title"],
387
+ "source": stored["source"],
388
+ }
389
+ )
390
+ existing_by_source_key[source_key] = stored
391
+ continue
392
+
393
+ updated = await self.minder_skill_update(
394
+ str(existing["id"]),
395
+ title=document["title"],
396
+ content=document["content"],
397
+ language=document["language"],
398
+ tags=document["tags"],
399
+ workflow_steps=document["workflow_steps"],
400
+ artifact_types=document["artifact_types"],
401
+ provenance=document["provenance"],
402
+ quality_score=document["quality_score"],
403
+ source_metadata=source_metadata,
404
+ excerpt_kind=next_excerpt_kind,
405
+ )
406
+ updated_count += 1
407
+ imported.append(
408
+ {
409
+ "action": "updated",
410
+ "id": updated["id"],
411
+ "title": updated["title"],
412
+ "source": updated["source"],
413
+ }
414
+ )
415
+ existing_by_source_key[source_key] = updated
416
+
417
+ return {
418
+ "provider": resolved_provider,
419
+ "repo_url": normalized_repo_url,
420
+ "ref": ref,
421
+ "path": normalized_source_path,
422
+ "resolved_paths": [target.source_path for target in import_targets],
423
+ "created_count": created_count,
424
+ "updated_count": updated_count,
425
+ "imported_count": created_count + updated_count,
426
+ "imported": imported,
427
+ }
428
+
429
+ async def minder_skill_delete(self, skill_id: str) -> dict[str, bool]:
430
+ await self._store.delete_skill(uuid.UUID(skill_id))
431
+ return {"deleted": True}
432
+
433
+ def _serialize_skill(self, skill: Any) -> dict[str, Any]:
434
+ tags = list(getattr(skill, "tags", []) or [])
435
+ source_metadata = self._normalized_source_metadata(
436
+ getattr(skill, "source_metadata", None)
437
+ )
438
+ return {
439
+ "id": str(skill.id),
440
+ "title": str(skill.title),
441
+ "content": str(skill.content),
442
+ "language": str(getattr(skill, "language", "")),
443
+ "tags": tags,
444
+ "quality_score": round(
445
+ float(getattr(skill, "quality_score", 0.0) or 0.0), 4
446
+ ),
447
+ "usage_count": int(getattr(skill, "usage_count", 0) or 0),
448
+ "workflow_step_tags": [
449
+ tag for tag in tags if ":" not in tag and tag not in self._ARTIFACT_TAGS
450
+ ],
451
+ "artifact_type_tags": [tag for tag in tags if tag in self._ARTIFACT_TAGS],
452
+ "provenance": next(
453
+ (tag.split(":", 1)[1] for tag in tags if tag.startswith("source:")),
454
+ None,
455
+ ),
456
+ "source": source_metadata,
457
+ "excerpt_kind": self._validated_excerpt_kind(
458
+ str(getattr(skill, "excerpt_kind", "none") or "none")
459
+ ),
460
+ }
461
+
462
+ @classmethod
463
+ def _validated_excerpt_kind(cls, excerpt_kind: str) -> str:
464
+ normalized = str(excerpt_kind or "none").strip().lower() or "none"
465
+ if normalized not in cls._ALLOWED_EXCERPT_KINDS:
466
+ raise ValueError(f"Unsupported excerpt_kind: {excerpt_kind}")
467
+ return normalized
468
+
469
+ @staticmethod
470
+ def _normalized_source_metadata(
471
+ source_metadata: dict[str, Any] | None,
472
+ ) -> dict[str, Any] | None:
473
+ if not isinstance(source_metadata, dict) or not source_metadata:
474
+ return None
475
+ normalized = {
476
+ str(key): value
477
+ for key, value in source_metadata.items()
478
+ if value is not None and str(key).strip()
479
+ }
480
+ return normalized or None
481
+
482
+ @staticmethod
483
+ def _normalize_source_path(source_path: str) -> str:
484
+ normalized = str(source_path or "skills").strip().strip("/")
485
+ if not normalized:
486
+ return "skills"
487
+ if normalized.lower() == SkillTools._AUTO_IMPORT_SOURCE_PATH:
488
+ return SkillTools._AUTO_IMPORT_SOURCE_PATH
489
+ if Path(normalized).is_absolute() or ".." in Path(normalized).parts:
490
+ raise ValueError(f"Invalid skill source path: {source_path}")
491
+ return normalized
492
+
493
+ @classmethod
494
+ def _resolve_import_targets(
495
+ cls,
496
+ *,
497
+ repo_root: Path,
498
+ source_path: str,
499
+ ) -> list[_ImportTarget]:
500
+ auto_discovery = source_path in {
501
+ cls._DEFAULT_IMPORT_SOURCE_PATH,
502
+ cls._AUTO_IMPORT_SOURCE_PATH,
503
+ }
504
+ targets: list[_ImportTarget] = []
505
+ seen_paths: set[str] = set()
506
+
507
+ def add_target(candidate: Path) -> None:
508
+ target = cls._build_import_target(repo_root=repo_root, candidate=candidate)
509
+ if target is None or target.source_path in seen_paths:
510
+ return
511
+ target_parts = Path(target.source_path).parts
512
+ for existing in targets:
513
+ existing_parts = Path(existing.source_path).parts
514
+ if target_parts[: len(existing_parts)] == existing_parts:
515
+ return
516
+ filtered_targets = [
517
+ existing
518
+ for existing in targets
519
+ if Path(existing.source_path).parts[: len(target_parts)] != target_parts
520
+ ]
521
+ if len(filtered_targets) != len(targets):
522
+ targets[:] = filtered_targets
523
+ seen_paths.clear()
524
+ seen_paths.update(existing.source_path for existing in targets)
525
+ seen_paths.add(target.source_path)
526
+ targets.append(target)
527
+
528
+ if source_path != cls._AUTO_IMPORT_SOURCE_PATH:
529
+ requested_path = repo_root / source_path
530
+ if requested_path.exists():
531
+ add_target(requested_path)
532
+ if not auto_discovery:
533
+ return targets
534
+ elif not auto_discovery:
535
+ raise ValueError(
536
+ f"Skill source path not found in repository: {source_path}"
537
+ )
538
+
539
+ if auto_discovery:
540
+ for candidate in cls._discover_skill_candidates(repo_root):
541
+ add_target(candidate)
542
+ if targets:
543
+ return targets
544
+ raise ValueError(
545
+ f"Skill source path not found in repository: {source_path}. "
546
+ "Auto-discovery could not find any supported skill documents."
547
+ )
548
+
549
+ raise ValueError(f"No supported skill documents found under {source_path}")
550
+
551
+ @classmethod
552
+ def _build_import_target(
553
+ cls,
554
+ *,
555
+ repo_root: Path,
556
+ candidate: Path,
557
+ ) -> _ImportTarget | None:
558
+ try:
559
+ relative_candidate = candidate.relative_to(repo_root)
560
+ except ValueError:
561
+ return None
562
+ if cls._should_ignore_relative_parts(relative_candidate.parts):
563
+ return None
564
+ if candidate.is_file():
565
+ if not cls._is_supported_import_file(candidate):
566
+ return None
567
+ return _ImportTarget(
568
+ source_path=relative_candidate.as_posix(),
569
+ files=(candidate,),
570
+ )
571
+ if not candidate.is_dir():
572
+ return None
573
+ files = tuple(cls._collect_import_files(candidate, repo_root=repo_root))
574
+ if not files:
575
+ return None
576
+ return _ImportTarget(
577
+ source_path=relative_candidate.as_posix(),
578
+ files=files,
579
+ )
580
+
581
+ @classmethod
582
+ def _collect_import_files(cls, root: Path, *, repo_root: Path) -> list[Path]:
583
+ canonical_root_file = cls._canonical_skill_file_for_dir(root)
584
+ if canonical_root_file is not None:
585
+ return [canonical_root_file]
586
+ return [
587
+ path
588
+ for path in sorted(root.rglob("*"))
589
+ if path.is_file()
590
+ and cls._is_supported_import_file(path)
591
+ and not cls._should_ignore_relative_parts(
592
+ path.relative_to(repo_root).parts,
593
+ )
594
+ and cls._should_import_supported_file(path, repo_root=repo_root)
595
+ ]
596
+
597
+ @classmethod
598
+ def _discover_skill_candidates(cls, repo_root: Path) -> list[Path]:
599
+ candidates: list[tuple[int, str, Path]] = []
600
+ for path in repo_root.rglob("*"):
601
+ try:
602
+ relative = path.relative_to(repo_root)
603
+ except ValueError:
604
+ continue
605
+ if cls._should_ignore_relative_parts(relative.parts):
606
+ continue
607
+ name = path.name.lower()
608
+ relative_text = relative.as_posix().lower()
609
+ score = 0
610
+ if path.is_dir():
611
+ if name in cls._DISCOVERY_DIRECTORY_NAMES:
612
+ score += 5
613
+ if "skill" in name:
614
+ score += 4
615
+ if any(hint in relative_text for hint in cls._DISCOVERY_FILE_HINTS):
616
+ score += 1
617
+ if score <= 0:
618
+ continue
619
+ elif path.is_file():
620
+ if not cls._is_supported_import_file(path):
621
+ continue
622
+ if not cls._should_import_supported_file(path, repo_root=repo_root):
623
+ continue
624
+ if any(hint in name for hint in cls._DISCOVERY_FILE_HINTS):
625
+ score += 4
626
+ if "skills" in relative_text:
627
+ score += 2
628
+ if score <= 0:
629
+ continue
630
+ else:
631
+ continue
632
+ candidates.append((score, relative.as_posix(), path))
633
+
634
+ candidates.sort(key=lambda item: (-item[0], item[1]))
635
+ return [path for _, _, path in candidates]
636
+
637
+ @classmethod
638
+ def _is_supported_import_file(cls, path: Path) -> bool:
639
+ return path.suffix.lower() in cls._IMPORT_SUFFIXES
640
+
641
+ @classmethod
642
+ def _canonical_skill_file_for_dir(cls, directory: Path) -> Path | None:
643
+ for path in sorted(directory.iterdir() if directory.exists() else []):
644
+ if not path.is_file() or not cls._is_supported_import_file(path):
645
+ continue
646
+ if path.name.casefold() in cls._CANONICAL_SKILL_FILENAMES:
647
+ return path
648
+ return None
649
+
650
+ @classmethod
651
+ def _canonical_skill_ancestor_file(
652
+ cls,
653
+ *,
654
+ path: Path,
655
+ repo_root: Path,
656
+ ) -> Path | None:
657
+ current = path.parent
658
+ while current != repo_root and repo_root in current.parents:
659
+ canonical = cls._canonical_skill_file_for_dir(current)
660
+ if canonical is not None:
661
+ return canonical
662
+ current = current.parent
663
+ canonical = cls._canonical_skill_file_for_dir(repo_root)
664
+ if canonical is not None:
665
+ return canonical
666
+ return None
667
+
668
+ @classmethod
669
+ def _should_import_supported_file(cls, path: Path, *, repo_root: Path) -> bool:
670
+ canonical_ancestor = cls._canonical_skill_ancestor_file(
671
+ path=path,
672
+ repo_root=repo_root,
673
+ )
674
+ if canonical_ancestor is None:
675
+ return True
676
+ return canonical_ancestor == path
677
+
678
+ @classmethod
679
+ def _collect_auxiliary_paths(
680
+ cls,
681
+ *,
682
+ repo_root: Path,
683
+ file_path: Path,
684
+ ) -> list[str]:
685
+ skill_root = file_path.parent
686
+ canonical = cls._canonical_skill_file_for_dir(skill_root)
687
+ if canonical is None or canonical != file_path:
688
+ return []
689
+ auxiliary_paths: list[str] = []
690
+ for candidate in sorted(skill_root.rglob("*")):
691
+ if candidate == canonical:
692
+ continue
693
+ if cls._should_ignore_relative_parts(
694
+ candidate.relative_to(repo_root).parts
695
+ ):
696
+ continue
697
+ if candidate.is_file() and not cls._is_supported_import_file(candidate):
698
+ auxiliary_paths.append(candidate.relative_to(skill_root).as_posix())
699
+ continue
700
+ if candidate.is_file():
701
+ auxiliary_paths.append(candidate.relative_to(skill_root).as_posix())
702
+ continue
703
+ if candidate.is_dir() and candidate != skill_root:
704
+ auxiliary_paths.append(candidate.relative_to(skill_root).as_posix())
705
+ return auxiliary_paths
706
+
707
+ @classmethod
708
+ def _should_ignore_relative_parts(cls, parts: tuple[str, ...]) -> bool:
709
+ for part in parts:
710
+ if part in cls._PRUNED_IMPORT_NAMES:
711
+ return True
712
+ if part.startswith(".") and part not in cls._ALLOWED_HIDDEN_IMPORT_DIRS:
713
+ return True
714
+ return False
715
+
716
+ @staticmethod
717
+ def _normalize_repo_url(repo_url: str) -> str:
718
+ raw = str(repo_url or "").strip()
719
+ if not raw:
720
+ raise ValueError("repo_url is required")
721
+ parsed = urlparse(raw)
722
+ if parsed.scheme or raw.startswith("git@"):
723
+ return raw.rstrip("/")
724
+ path = Path(raw).expanduser()
725
+ if path.exists():
726
+ return path.resolve().as_posix()
727
+ return raw.rstrip("/")
728
+
729
+ @staticmethod
730
+ def _resolve_provider(provider: str | None, repo_url: str) -> str:
731
+ if provider:
732
+ normalized = str(provider).strip().lower()
733
+ if normalized in {"github", "gitlab", "generic_git"}:
734
+ return normalized
735
+ raise ValueError(f"Unsupported provider: {provider}")
736
+ lowered = repo_url.lower()
737
+ if "github.com" in lowered:
738
+ return "github"
739
+ if "gitlab" in lowered:
740
+ return "gitlab"
741
+ return "generic_git"
742
+
743
+ def _skills_by_source_key(self, skills: list[Any]) -> dict[str, dict[str, Any]]:
744
+ indexed: dict[str, dict[str, Any]] = {}
745
+ for skill in skills:
746
+ serialized = self._serialize_skill(skill)
747
+ source = serialized.get("source") or {}
748
+ source_key = str(source.get("import_key") or "").strip()
749
+ if source_key:
750
+ indexed[source_key] = serialized
751
+ return indexed
752
+
753
+ def _build_import_source_metadata(
754
+ self,
755
+ *,
756
+ provider: str,
757
+ repo_url: str,
758
+ ref: str | None,
759
+ source_path: str,
760
+ file_path: str,
761
+ document_index: int,
762
+ auxiliary_paths: list[str] | None = None,
763
+ ) -> dict[str, Any]:
764
+ import_key = "::".join(
765
+ [
766
+ provider,
767
+ repo_url,
768
+ ref or "HEAD",
769
+ source_path,
770
+ file_path,
771
+ str(document_index),
772
+ ]
773
+ )
774
+ return {
775
+ "provider": provider,
776
+ "repo_url": repo_url,
777
+ "ref": ref,
778
+ "path": source_path,
779
+ "file_path": file_path,
780
+ "auxiliary_paths": list(auxiliary_paths or []),
781
+ "import_key": import_key,
782
+ "imported_at": datetime.now(UTC).isoformat(),
783
+ }
784
+
785
+ def _load_import_documents(self, file_path: Path) -> list[dict[str, Any]]:
786
+ suffix = file_path.suffix.lower()
787
+ raw = file_path.read_text(encoding="utf-8")
788
+ if suffix in {".md", ".markdown", ".txt"}:
789
+ title = self._extract_document_title(raw, fallback=file_path.stem)
790
+ return [
791
+ {
792
+ "title": title,
793
+ "content": raw.strip(),
794
+ "language": "markdown" if suffix != ".txt" else "text",
795
+ "tags": [],
796
+ "workflow_steps": [],
797
+ "artifact_types": [],
798
+ "provenance": None,
799
+ "quality_score": 0.0,
800
+ }
801
+ ]
802
+ if suffix == ".json":
803
+ payload = json.loads(raw)
804
+ if isinstance(payload, dict) and isinstance(payload.get("skills"), list):
805
+ candidates = payload.get("skills") or []
806
+ elif isinstance(payload, list):
807
+ candidates = payload
808
+ else:
809
+ candidates = [payload]
810
+ documents = [
811
+ self._coerce_import_document(item, file_path=file_path)
812
+ for item in candidates
813
+ ]
814
+ return [document for document in documents if document is not None]
815
+ raise ValueError(f"Unsupported skill import file: {file_path.name}")
816
+
817
+ def _coerce_import_document(
818
+ self,
819
+ payload: Any,
820
+ *,
821
+ file_path: Path,
822
+ ) -> dict[str, Any] | None:
823
+ if not isinstance(payload, dict):
824
+ return None
825
+ content = str(payload.get("content", "") or "").strip()
826
+ title = str(payload.get("title", "") or "").strip() or file_path.stem
827
+ if not content:
828
+ return None
829
+ return {
830
+ "title": title,
831
+ "content": content,
832
+ "language": str(payload.get("language", "markdown") or "markdown"),
833
+ "tags": [str(tag) for tag in list(payload.get("tags", []) or [])],
834
+ "workflow_steps": [
835
+ str(step) for step in list(payload.get("workflow_steps", []) or [])
836
+ ],
837
+ "artifact_types": [
838
+ str(item) for item in list(payload.get("artifact_types", []) or [])
839
+ ],
840
+ "provenance": (
841
+ str(payload.get("provenance"))
842
+ if payload.get("provenance") is not None
843
+ else None
844
+ ),
845
+ "quality_score": float(payload.get("quality_score", 0.0) or 0.0),
846
+ "excerpt_kind": (
847
+ str(payload.get("excerpt_kind"))
848
+ if payload.get("excerpt_kind") is not None
849
+ else "none"
850
+ ),
851
+ }
852
+
853
+ @staticmethod
854
+ def _extract_document_title(raw: str, *, fallback: str) -> str:
855
+ for line in raw.splitlines():
856
+ stripped = line.strip()
857
+ if stripped.startswith("#"):
858
+ return stripped.lstrip("#").strip() or fallback
859
+ return fallback
860
+
861
+ @staticmethod
862
+ def _normalized_tags(
863
+ *,
864
+ tags: list[str] | None,
865
+ workflow_steps: list[str] | None,
866
+ artifact_types: list[str] | None,
867
+ provenance: str | None,
868
+ ) -> list[str]:
869
+ normalized: list[str] = []
870
+ seen: set[str] = set()
871
+
872
+ def add(value: str) -> None:
873
+ token = str(value or "").strip().lower()
874
+ if not token or token in seen:
875
+ return
876
+ seen.add(token)
877
+ normalized.append(token)
878
+
879
+ for tag in tags or []:
880
+ add(tag)
881
+ for step in workflow_steps or []:
882
+ for token in sorted(step_keywords(step)):
883
+ add(token)
884
+ for artifact in artifact_types or []:
885
+ add(artifact)
886
+ if provenance:
887
+ add(f"source:{provenance}")
888
+ return normalized
889
+
890
+ @staticmethod
891
+ def _cosine_similarity(left: list[float], right: list[float]) -> float:
892
+ if not left or not right or len(left) != len(right):
893
+ return 0.0
894
+ numerator = sum(a * b for a, b in zip(left, right, strict=False))
895
+ left_norm = math.sqrt(sum(value * value for value in left))
896
+ right_norm = math.sqrt(sum(value * value for value in right))
897
+ if left_norm == 0 or right_norm == 0:
898
+ return 0.0
899
+ return numerator / (left_norm * right_norm)