minder-cli 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- minder/__init__.py +12 -0
- minder/api/routers/prompts.py +177 -0
- minder/application/__init__.py +1 -0
- minder/application/admin/__init__.py +11 -0
- minder/application/admin/dto.py +453 -0
- minder/application/admin/jobs.py +327 -0
- minder/application/admin/use_cases.py +1895 -0
- minder/auth/__init__.py +12 -0
- minder/auth/context.py +26 -0
- minder/auth/middleware.py +70 -0
- minder/auth/principal.py +59 -0
- minder/auth/rate_limiter.py +89 -0
- minder/auth/rbac.py +60 -0
- minder/auth/service.py +541 -0
- minder/bootstrap/__init__.py +9 -0
- minder/bootstrap/providers.py +109 -0
- minder/bootstrap/transport.py +807 -0
- minder/cache/__init__.py +10 -0
- minder/cache/providers.py +140 -0
- minder/chunking/__init__.py +4 -0
- minder/chunking/code_splitter.py +184 -0
- minder/chunking/splitter.py +136 -0
- minder/cli.py +1542 -0
- minder/config.py +179 -0
- minder/continuity.py +363 -0
- minder/dev.py +160 -0
- minder/embedding/__init__.py +9 -0
- minder/embedding/base.py +7 -0
- minder/embedding/local.py +65 -0
- minder/embedding/openai.py +7 -0
- minder/graph/__init__.py +11 -0
- minder/graph/edges.py +13 -0
- minder/graph/executor.py +127 -0
- minder/graph/graph.py +263 -0
- minder/graph/nodes/__init__.py +27 -0
- minder/graph/nodes/evaluator.py +21 -0
- minder/graph/nodes/guard.py +64 -0
- minder/graph/nodes/llm.py +59 -0
- minder/graph/nodes/planning.py +30 -0
- minder/graph/nodes/reasoning.py +87 -0
- minder/graph/nodes/reranker.py +141 -0
- minder/graph/nodes/retriever.py +86 -0
- minder/graph/nodes/verification.py +230 -0
- minder/graph/nodes/workflow_planner.py +250 -0
- minder/graph/runtime.py +15 -0
- minder/graph/state.py +26 -0
- minder/llm/__init__.py +5 -0
- minder/llm/base.py +14 -0
- minder/llm/local.py +381 -0
- minder/llm/openai.py +89 -0
- minder/models/__init__.py +109 -0
- minder/models/base.py +10 -0
- minder/models/client.py +137 -0
- minder/models/document.py +34 -0
- minder/models/error.py +32 -0
- minder/models/graph.py +114 -0
- minder/models/history.py +32 -0
- minder/models/job.py +62 -0
- minder/models/prompt.py +41 -0
- minder/models/repository.py +62 -0
- minder/models/rule.py +68 -0
- minder/models/session.py +51 -0
- minder/models/skill.py +52 -0
- minder/models/user.py +41 -0
- minder/models/workflow.py +35 -0
- minder/observability/__init__.py +57 -0
- minder/observability/audit.py +243 -0
- minder/observability/logging.py +253 -0
- minder/observability/metrics.py +448 -0
- minder/observability/tracing.py +215 -0
- minder/presentation/__init__.py +1 -0
- minder/presentation/http/__init__.py +1 -0
- minder/presentation/http/admin/__init__.py +3 -0
- minder/presentation/http/admin/api.py +1309 -0
- minder/presentation/http/admin/context.py +94 -0
- minder/presentation/http/admin/dashboard.py +111 -0
- minder/presentation/http/admin/jobs.py +208 -0
- minder/presentation/http/admin/memories.py +185 -0
- minder/presentation/http/admin/prompts.py +219 -0
- minder/presentation/http/admin/routes.py +127 -0
- minder/presentation/http/admin/runtime.py +650 -0
- minder/presentation/http/admin/search.py +368 -0
- minder/presentation/http/admin/skills.py +230 -0
- minder/prompts/__init__.py +646 -0
- minder/prompts/formatter.py +142 -0
- minder/resources/__init__.py +318 -0
- minder/retrieval/__init__.py +5 -0
- minder/retrieval/hybrid.py +178 -0
- minder/retrieval/mmr.py +116 -0
- minder/retrieval/multi_hop.py +115 -0
- minder/runtime.py +15 -0
- minder/server.py +145 -0
- minder/store/__init__.py +64 -0
- minder/store/document.py +115 -0
- minder/store/error.py +82 -0
- minder/store/feedback.py +114 -0
- minder/store/graph.py +588 -0
- minder/store/history.py +57 -0
- minder/store/interfaces.py +512 -0
- minder/store/milvus/__init__.py +11 -0
- minder/store/milvus/client.py +26 -0
- minder/store/milvus/collections.py +15 -0
- minder/store/milvus/vector_store.py +232 -0
- minder/store/mongodb/__init__.py +11 -0
- minder/store/mongodb/client.py +49 -0
- minder/store/mongodb/indexes.py +90 -0
- minder/store/mongodb/operational_store.py +993 -0
- minder/store/relational.py +1087 -0
- minder/store/repo_state.py +58 -0
- minder/store/rule.py +93 -0
- minder/store/vector.py +79 -0
- minder/tools/__init__.py +47 -0
- minder/tools/auth.py +94 -0
- minder/tools/graph.py +839 -0
- minder/tools/ingest.py +353 -0
- minder/tools/memory.py +381 -0
- minder/tools/query.py +307 -0
- minder/tools/registry.py +269 -0
- minder/tools/repo_scanner.py +1266 -0
- minder/tools/search.py +15 -0
- minder/tools/session.py +316 -0
- minder/tools/skills.py +899 -0
- minder/tools/workflow.py +215 -0
- minder/transport/__init__.py +4 -0
- minder/transport/base.py +286 -0
- minder/transport/sse.py +252 -0
- minder/transport/stdio.py +29 -0
- minder_cli-0.2.0.dist-info/METADATA +318 -0
- minder_cli-0.2.0.dist-info/RECORD +132 -0
- minder_cli-0.2.0.dist-info/WHEEL +4 -0
- minder_cli-0.2.0.dist-info/entry_points.txt +2 -0
- minder_cli-0.2.0.dist-info/licenses/LICENSE +201 -0
minder/tools/skills.py
ADDED
|
@@ -0,0 +1,899 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import math
|
|
5
|
+
import subprocess
|
|
6
|
+
import tempfile
|
|
7
|
+
import uuid
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from datetime import UTC, datetime
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any
|
|
12
|
+
from urllib.parse import urlparse
|
|
13
|
+
from collections.abc import Awaitable, Callable
|
|
14
|
+
|
|
15
|
+
from minder.continuity import compatibility_score_for_memory, step_keywords
|
|
16
|
+
from minder.config import MinderConfig
|
|
17
|
+
from minder.embedding.local import LocalEmbeddingProvider
|
|
18
|
+
from minder.observability.metrics import record_continuity_skill_recall
|
|
19
|
+
from minder.store.interfaces import IOperationalStore
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass(frozen=True)
|
|
23
|
+
class _ImportTarget:
|
|
24
|
+
source_path: str
|
|
25
|
+
files: tuple[Path, ...]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class SkillTools:
|
|
29
|
+
_ALLOWED_EXCERPT_KINDS = {"none", "reusable_excerpt"}
|
|
30
|
+
_IMPORT_SUFFIXES = {".json", ".md", ".markdown", ".txt"}
|
|
31
|
+
_CANONICAL_SKILL_FILENAMES = {
|
|
32
|
+
"skill.md",
|
|
33
|
+
"skill.markdown",
|
|
34
|
+
"skill.txt",
|
|
35
|
+
}
|
|
36
|
+
_DEFAULT_IMPORT_SOURCE_PATH = "skills"
|
|
37
|
+
_AUTO_IMPORT_SOURCE_PATH = "auto"
|
|
38
|
+
_DISCOVERY_DIRECTORY_NAMES = {
|
|
39
|
+
"skill",
|
|
40
|
+
"skills",
|
|
41
|
+
"skill-pack",
|
|
42
|
+
"skill-packs",
|
|
43
|
+
"skill_pack",
|
|
44
|
+
"skill_packs",
|
|
45
|
+
"skillpacks",
|
|
46
|
+
"playbook",
|
|
47
|
+
"playbooks",
|
|
48
|
+
"runbook",
|
|
49
|
+
"runbooks",
|
|
50
|
+
"checklists",
|
|
51
|
+
}
|
|
52
|
+
_DISCOVERY_FILE_HINTS = ("skill", "playbook", "runbook", "checklist")
|
|
53
|
+
_PRUNED_IMPORT_NAMES = {
|
|
54
|
+
".git",
|
|
55
|
+
".hg",
|
|
56
|
+
".svn",
|
|
57
|
+
".venv",
|
|
58
|
+
"venv",
|
|
59
|
+
"node_modules",
|
|
60
|
+
"dist",
|
|
61
|
+
"build",
|
|
62
|
+
"coverage",
|
|
63
|
+
"__pycache__",
|
|
64
|
+
".mypy_cache",
|
|
65
|
+
".pytest_cache",
|
|
66
|
+
}
|
|
67
|
+
_ALLOWED_HIDDEN_IMPORT_DIRS = {".agents", ".minder"}
|
|
68
|
+
_ARTIFACT_TAGS = {
|
|
69
|
+
"problem_statement",
|
|
70
|
+
"acceptance_criteria",
|
|
71
|
+
"analysis_notes",
|
|
72
|
+
"use_cases",
|
|
73
|
+
"test_plan",
|
|
74
|
+
"failing_tests",
|
|
75
|
+
"implementation_notes",
|
|
76
|
+
"changed_files",
|
|
77
|
+
"verification_report",
|
|
78
|
+
"test_results",
|
|
79
|
+
"review_notes",
|
|
80
|
+
"approval_summary",
|
|
81
|
+
"release_notes",
|
|
82
|
+
"rollback_plan",
|
|
83
|
+
"step_notes",
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
def __init__(self, store: IOperationalStore, config: MinderConfig) -> None:
|
|
87
|
+
self._store = store
|
|
88
|
+
self._embedder = LocalEmbeddingProvider(
|
|
89
|
+
config.embedding.model_path,
|
|
90
|
+
dimensions=min(config.embedding.dimensions, 16),
|
|
91
|
+
runtime="auto",
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
async def minder_skill_store(
|
|
95
|
+
self,
|
|
96
|
+
*,
|
|
97
|
+
title: str,
|
|
98
|
+
content: str,
|
|
99
|
+
language: str,
|
|
100
|
+
tags: list[str] | None = None,
|
|
101
|
+
workflow_steps: list[str] | None = None,
|
|
102
|
+
artifact_types: list[str] | None = None,
|
|
103
|
+
provenance: str | None = None,
|
|
104
|
+
quality_score: float = 0.0,
|
|
105
|
+
source_metadata: dict[str, Any] | None = None,
|
|
106
|
+
excerpt_kind: str = "none",
|
|
107
|
+
) -> dict[str, Any]:
|
|
108
|
+
skill = await self._store.create_skill(
|
|
109
|
+
id=uuid.uuid4(),
|
|
110
|
+
title=title,
|
|
111
|
+
content=content,
|
|
112
|
+
language=language,
|
|
113
|
+
tags=self._normalized_tags(
|
|
114
|
+
tags=tags,
|
|
115
|
+
workflow_steps=workflow_steps,
|
|
116
|
+
artifact_types=artifact_types,
|
|
117
|
+
provenance=provenance,
|
|
118
|
+
),
|
|
119
|
+
embedding=self._embedder.embed(f"{title}\n{content}"),
|
|
120
|
+
usage_count=0,
|
|
121
|
+
quality_score=max(float(quality_score), 0.0),
|
|
122
|
+
source_metadata=self._normalized_source_metadata(source_metadata),
|
|
123
|
+
excerpt_kind=self._validated_excerpt_kind(excerpt_kind),
|
|
124
|
+
)
|
|
125
|
+
return self._serialize_skill(skill)
|
|
126
|
+
|
|
127
|
+
async def minder_skill_recall(
|
|
128
|
+
self,
|
|
129
|
+
query: str,
|
|
130
|
+
*,
|
|
131
|
+
limit: int = 5,
|
|
132
|
+
current_step: str | None = None,
|
|
133
|
+
artifact_type: str | None = None,
|
|
134
|
+
min_quality_score: float = 0.0,
|
|
135
|
+
) -> list[dict[str, Any]]:
|
|
136
|
+
query_embedding = self._embedder.embed(query)
|
|
137
|
+
ranked: list[dict[str, Any]] = []
|
|
138
|
+
for skill in await self._store.list_skills():
|
|
139
|
+
quality_score = float(getattr(skill, "quality_score", 0.0) or 0.0)
|
|
140
|
+
if quality_score < min_quality_score:
|
|
141
|
+
continue
|
|
142
|
+
embedding = skill.embedding if isinstance(skill.embedding, list) else None
|
|
143
|
+
if not embedding:
|
|
144
|
+
continue
|
|
145
|
+
semantic_score = self._cosine_similarity(query_embedding, embedding)
|
|
146
|
+
compatibility_score, compatibility_reasons = compatibility_score_for_memory(
|
|
147
|
+
tags=list(skill.tags) if isinstance(skill.tags, list) else [],
|
|
148
|
+
title=str(skill.title),
|
|
149
|
+
content=str(skill.content),
|
|
150
|
+
current_step=current_step,
|
|
151
|
+
artifact_type=artifact_type,
|
|
152
|
+
)
|
|
153
|
+
blended_score = min(
|
|
154
|
+
(semantic_score * 0.65)
|
|
155
|
+
+ (compatibility_score * 0.2)
|
|
156
|
+
+ (min(quality_score, 1.0) * 0.15),
|
|
157
|
+
1.5,
|
|
158
|
+
)
|
|
159
|
+
ranked_item = {
|
|
160
|
+
**self._serialize_skill(skill),
|
|
161
|
+
"semantic_score": round(semantic_score, 4),
|
|
162
|
+
"step_compatibility": round(compatibility_score, 4),
|
|
163
|
+
"continuity_reasons": compatibility_reasons,
|
|
164
|
+
"score": round(blended_score, 4),
|
|
165
|
+
}
|
|
166
|
+
ranked.append(ranked_item)
|
|
167
|
+
ranked.sort(key=lambda item: float(item["score"]), reverse=True)
|
|
168
|
+
limited = ranked[:limit]
|
|
169
|
+
for item in limited:
|
|
170
|
+
record_continuity_skill_recall(
|
|
171
|
+
step_compatibility=float(item["step_compatibility"]),
|
|
172
|
+
quality_score=float(item["quality_score"]),
|
|
173
|
+
)
|
|
174
|
+
return limited
|
|
175
|
+
|
|
176
|
+
async def minder_skill_list(
|
|
177
|
+
self,
|
|
178
|
+
*,
|
|
179
|
+
current_step: str | None = None,
|
|
180
|
+
tag: str | None = None,
|
|
181
|
+
min_quality_score: float = 0.0,
|
|
182
|
+
) -> list[dict[str, Any]]:
|
|
183
|
+
required_tags = {
|
|
184
|
+
str(tag).strip().lower()
|
|
185
|
+
for tag in [tag]
|
|
186
|
+
if tag is not None and str(tag).strip()
|
|
187
|
+
}
|
|
188
|
+
if current_step:
|
|
189
|
+
required_tags.update(step_keywords(current_step))
|
|
190
|
+
items: list[dict[str, Any]] = []
|
|
191
|
+
for skill in await self._store.list_skills():
|
|
192
|
+
quality_score = float(getattr(skill, "quality_score", 0.0) or 0.0)
|
|
193
|
+
if quality_score < min_quality_score:
|
|
194
|
+
continue
|
|
195
|
+
normalized_tags = {
|
|
196
|
+
str(item).strip().lower()
|
|
197
|
+
for item in list(getattr(skill, "tags", []) or [])
|
|
198
|
+
if str(item).strip()
|
|
199
|
+
}
|
|
200
|
+
if required_tags and not required_tags <= normalized_tags:
|
|
201
|
+
continue
|
|
202
|
+
items.append(self._serialize_skill(skill))
|
|
203
|
+
items.sort(
|
|
204
|
+
key=lambda item: (-float(item["quality_score"]), str(item["title"]).lower())
|
|
205
|
+
)
|
|
206
|
+
return items
|
|
207
|
+
|
|
208
|
+
async def minder_skill_update(
|
|
209
|
+
self,
|
|
210
|
+
skill_id: str,
|
|
211
|
+
*,
|
|
212
|
+
title: str | None = None,
|
|
213
|
+
content: str | None = None,
|
|
214
|
+
language: str | None = None,
|
|
215
|
+
tags: list[str] | None = None,
|
|
216
|
+
workflow_steps: list[str] | None = None,
|
|
217
|
+
artifact_types: list[str] | None = None,
|
|
218
|
+
provenance: str | None = None,
|
|
219
|
+
quality_score: float | None = None,
|
|
220
|
+
source_metadata: dict[str, Any] | None = None,
|
|
221
|
+
excerpt_kind: str | None = None,
|
|
222
|
+
) -> dict[str, Any]:
|
|
223
|
+
existing = await self._store.get_skill_by_id(uuid.UUID(skill_id))
|
|
224
|
+
if existing is None:
|
|
225
|
+
raise ValueError(f"Skill not found: {skill_id}")
|
|
226
|
+
|
|
227
|
+
update_data: dict[str, Any] = {}
|
|
228
|
+
next_title = title if title is not None else str(existing.title)
|
|
229
|
+
next_content = content if content is not None else str(existing.content)
|
|
230
|
+
if title is not None:
|
|
231
|
+
update_data["title"] = title
|
|
232
|
+
if content is not None:
|
|
233
|
+
update_data["content"] = content
|
|
234
|
+
if language is not None:
|
|
235
|
+
update_data["language"] = language
|
|
236
|
+
if quality_score is not None:
|
|
237
|
+
update_data["quality_score"] = max(float(quality_score), 0.0)
|
|
238
|
+
if source_metadata is not None:
|
|
239
|
+
update_data["source_metadata"] = self._normalized_source_metadata(
|
|
240
|
+
source_metadata
|
|
241
|
+
)
|
|
242
|
+
if excerpt_kind is not None:
|
|
243
|
+
update_data["excerpt_kind"] = self._validated_excerpt_kind(excerpt_kind)
|
|
244
|
+
if any(
|
|
245
|
+
value is not None
|
|
246
|
+
for value in (tags, workflow_steps, artifact_types, provenance)
|
|
247
|
+
):
|
|
248
|
+
update_data["tags"] = self._normalized_tags(
|
|
249
|
+
tags=(
|
|
250
|
+
tags
|
|
251
|
+
if tags is not None
|
|
252
|
+
else list(getattr(existing, "tags", []) or [])
|
|
253
|
+
),
|
|
254
|
+
workflow_steps=workflow_steps,
|
|
255
|
+
artifact_types=artifact_types,
|
|
256
|
+
provenance=provenance,
|
|
257
|
+
)
|
|
258
|
+
if title is not None or content is not None:
|
|
259
|
+
update_data["embedding"] = self._embedder.embed(
|
|
260
|
+
f"{next_title}\n{next_content}"
|
|
261
|
+
)
|
|
262
|
+
updated = await self._store.update_skill(uuid.UUID(skill_id), **update_data)
|
|
263
|
+
if updated is None:
|
|
264
|
+
raise ValueError(f"Skill not found: {skill_id}")
|
|
265
|
+
return self._serialize_skill(updated)
|
|
266
|
+
|
|
267
|
+
async def minder_skill_import_git(
|
|
268
|
+
self,
|
|
269
|
+
*,
|
|
270
|
+
repo_url: str,
|
|
271
|
+
source_path: str = "skills",
|
|
272
|
+
ref: str | None = None,
|
|
273
|
+
provider: str | None = None,
|
|
274
|
+
excerpt_kind: str = "none",
|
|
275
|
+
progress_callback: Callable[[dict[str, Any]], Awaitable[None]] | None = None,
|
|
276
|
+
) -> dict[str, Any]:
|
|
277
|
+
normalized_repo_url = self._normalize_repo_url(repo_url)
|
|
278
|
+
normalized_source_path = self._normalize_source_path(source_path)
|
|
279
|
+
resolved_provider = self._resolve_provider(provider, normalized_repo_url)
|
|
280
|
+
validated_excerpt_kind = self._validated_excerpt_kind(excerpt_kind)
|
|
281
|
+
|
|
282
|
+
async def emit_progress(**payload: Any) -> None:
|
|
283
|
+
if progress_callback is None:
|
|
284
|
+
return
|
|
285
|
+
await progress_callback(payload)
|
|
286
|
+
|
|
287
|
+
with tempfile.TemporaryDirectory(prefix="minder-skill-import-") as tmp_dir:
|
|
288
|
+
await emit_progress(
|
|
289
|
+
event_type="clone_started",
|
|
290
|
+
message="Cloning Git repository",
|
|
291
|
+
)
|
|
292
|
+
command = ["git", "clone", "--depth", "1"]
|
|
293
|
+
if ref:
|
|
294
|
+
command += ["--branch", ref]
|
|
295
|
+
command += [repo_url, tmp_dir]
|
|
296
|
+
result = subprocess.run(
|
|
297
|
+
command,
|
|
298
|
+
capture_output=True,
|
|
299
|
+
text=True,
|
|
300
|
+
check=False,
|
|
301
|
+
)
|
|
302
|
+
if result.returncode != 0:
|
|
303
|
+
message = (
|
|
304
|
+
result.stderr.strip() or result.stdout.strip() or "git clone failed"
|
|
305
|
+
)
|
|
306
|
+
raise ValueError(message)
|
|
307
|
+
|
|
308
|
+
repo_root = Path(tmp_dir)
|
|
309
|
+
import_targets = self._resolve_import_targets(
|
|
310
|
+
repo_root=repo_root,
|
|
311
|
+
source_path=normalized_source_path,
|
|
312
|
+
)
|
|
313
|
+
await emit_progress(
|
|
314
|
+
event_type="discovery_completed",
|
|
315
|
+
message="Resolved import targets",
|
|
316
|
+
details={
|
|
317
|
+
"resolved_paths": [target.source_path for target in import_targets],
|
|
318
|
+
},
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
existing_by_source_key = self._skills_by_source_key(
|
|
322
|
+
await self._store.list_skills()
|
|
323
|
+
)
|
|
324
|
+
imported: list[dict[str, Any]] = []
|
|
325
|
+
created_count = 0
|
|
326
|
+
updated_count = 0
|
|
327
|
+
imported_file_paths: set[str] = set()
|
|
328
|
+
total_files = sum(len(target.files) for target in import_targets)
|
|
329
|
+
processed_files = 0
|
|
330
|
+
|
|
331
|
+
for target in import_targets:
|
|
332
|
+
for file_path in target.files:
|
|
333
|
+
relative_file_path = file_path.relative_to(repo_root).as_posix()
|
|
334
|
+
if relative_file_path in imported_file_paths:
|
|
335
|
+
continue
|
|
336
|
+
imported_file_paths.add(relative_file_path)
|
|
337
|
+
processed_files += 1
|
|
338
|
+
await emit_progress(
|
|
339
|
+
event_type="file_processing",
|
|
340
|
+
message=f"Processing {relative_file_path}",
|
|
341
|
+
progress_current=processed_files,
|
|
342
|
+
progress_total=total_files,
|
|
343
|
+
details={
|
|
344
|
+
"resolved_path": target.source_path,
|
|
345
|
+
"file_path": relative_file_path,
|
|
346
|
+
},
|
|
347
|
+
)
|
|
348
|
+
documents = self._load_import_documents(file_path)
|
|
349
|
+
for index, document in enumerate(documents):
|
|
350
|
+
auxiliary_paths = self._collect_auxiliary_paths(
|
|
351
|
+
repo_root=repo_root,
|
|
352
|
+
file_path=file_path,
|
|
353
|
+
)
|
|
354
|
+
source_metadata = self._build_import_source_metadata(
|
|
355
|
+
provider=resolved_provider,
|
|
356
|
+
repo_url=normalized_repo_url,
|
|
357
|
+
ref=ref,
|
|
358
|
+
source_path=target.source_path,
|
|
359
|
+
file_path=relative_file_path,
|
|
360
|
+
document_index=index,
|
|
361
|
+
auxiliary_paths=auxiliary_paths,
|
|
362
|
+
)
|
|
363
|
+
source_key = str(source_metadata["import_key"])
|
|
364
|
+
existing = existing_by_source_key.get(source_key)
|
|
365
|
+
next_excerpt_kind = document.get(
|
|
366
|
+
"excerpt_kind", validated_excerpt_kind
|
|
367
|
+
)
|
|
368
|
+
if existing is None:
|
|
369
|
+
stored = await self.minder_skill_store(
|
|
370
|
+
title=document["title"],
|
|
371
|
+
content=document["content"],
|
|
372
|
+
language=document["language"],
|
|
373
|
+
tags=document["tags"],
|
|
374
|
+
workflow_steps=document["workflow_steps"],
|
|
375
|
+
artifact_types=document["artifact_types"],
|
|
376
|
+
provenance=document["provenance"],
|
|
377
|
+
quality_score=document["quality_score"],
|
|
378
|
+
source_metadata=source_metadata,
|
|
379
|
+
excerpt_kind=next_excerpt_kind,
|
|
380
|
+
)
|
|
381
|
+
created_count += 1
|
|
382
|
+
imported.append(
|
|
383
|
+
{
|
|
384
|
+
"action": "created",
|
|
385
|
+
"id": stored["id"],
|
|
386
|
+
"title": stored["title"],
|
|
387
|
+
"source": stored["source"],
|
|
388
|
+
}
|
|
389
|
+
)
|
|
390
|
+
existing_by_source_key[source_key] = stored
|
|
391
|
+
continue
|
|
392
|
+
|
|
393
|
+
updated = await self.minder_skill_update(
|
|
394
|
+
str(existing["id"]),
|
|
395
|
+
title=document["title"],
|
|
396
|
+
content=document["content"],
|
|
397
|
+
language=document["language"],
|
|
398
|
+
tags=document["tags"],
|
|
399
|
+
workflow_steps=document["workflow_steps"],
|
|
400
|
+
artifact_types=document["artifact_types"],
|
|
401
|
+
provenance=document["provenance"],
|
|
402
|
+
quality_score=document["quality_score"],
|
|
403
|
+
source_metadata=source_metadata,
|
|
404
|
+
excerpt_kind=next_excerpt_kind,
|
|
405
|
+
)
|
|
406
|
+
updated_count += 1
|
|
407
|
+
imported.append(
|
|
408
|
+
{
|
|
409
|
+
"action": "updated",
|
|
410
|
+
"id": updated["id"],
|
|
411
|
+
"title": updated["title"],
|
|
412
|
+
"source": updated["source"],
|
|
413
|
+
}
|
|
414
|
+
)
|
|
415
|
+
existing_by_source_key[source_key] = updated
|
|
416
|
+
|
|
417
|
+
return {
|
|
418
|
+
"provider": resolved_provider,
|
|
419
|
+
"repo_url": normalized_repo_url,
|
|
420
|
+
"ref": ref,
|
|
421
|
+
"path": normalized_source_path,
|
|
422
|
+
"resolved_paths": [target.source_path for target in import_targets],
|
|
423
|
+
"created_count": created_count,
|
|
424
|
+
"updated_count": updated_count,
|
|
425
|
+
"imported_count": created_count + updated_count,
|
|
426
|
+
"imported": imported,
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
async def minder_skill_delete(self, skill_id: str) -> dict[str, bool]:
|
|
430
|
+
await self._store.delete_skill(uuid.UUID(skill_id))
|
|
431
|
+
return {"deleted": True}
|
|
432
|
+
|
|
433
|
+
def _serialize_skill(self, skill: Any) -> dict[str, Any]:
|
|
434
|
+
tags = list(getattr(skill, "tags", []) or [])
|
|
435
|
+
source_metadata = self._normalized_source_metadata(
|
|
436
|
+
getattr(skill, "source_metadata", None)
|
|
437
|
+
)
|
|
438
|
+
return {
|
|
439
|
+
"id": str(skill.id),
|
|
440
|
+
"title": str(skill.title),
|
|
441
|
+
"content": str(skill.content),
|
|
442
|
+
"language": str(getattr(skill, "language", "")),
|
|
443
|
+
"tags": tags,
|
|
444
|
+
"quality_score": round(
|
|
445
|
+
float(getattr(skill, "quality_score", 0.0) or 0.0), 4
|
|
446
|
+
),
|
|
447
|
+
"usage_count": int(getattr(skill, "usage_count", 0) or 0),
|
|
448
|
+
"workflow_step_tags": [
|
|
449
|
+
tag for tag in tags if ":" not in tag and tag not in self._ARTIFACT_TAGS
|
|
450
|
+
],
|
|
451
|
+
"artifact_type_tags": [tag for tag in tags if tag in self._ARTIFACT_TAGS],
|
|
452
|
+
"provenance": next(
|
|
453
|
+
(tag.split(":", 1)[1] for tag in tags if tag.startswith("source:")),
|
|
454
|
+
None,
|
|
455
|
+
),
|
|
456
|
+
"source": source_metadata,
|
|
457
|
+
"excerpt_kind": self._validated_excerpt_kind(
|
|
458
|
+
str(getattr(skill, "excerpt_kind", "none") or "none")
|
|
459
|
+
),
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
@classmethod
|
|
463
|
+
def _validated_excerpt_kind(cls, excerpt_kind: str) -> str:
|
|
464
|
+
normalized = str(excerpt_kind or "none").strip().lower() or "none"
|
|
465
|
+
if normalized not in cls._ALLOWED_EXCERPT_KINDS:
|
|
466
|
+
raise ValueError(f"Unsupported excerpt_kind: {excerpt_kind}")
|
|
467
|
+
return normalized
|
|
468
|
+
|
|
469
|
+
@staticmethod
|
|
470
|
+
def _normalized_source_metadata(
|
|
471
|
+
source_metadata: dict[str, Any] | None,
|
|
472
|
+
) -> dict[str, Any] | None:
|
|
473
|
+
if not isinstance(source_metadata, dict) or not source_metadata:
|
|
474
|
+
return None
|
|
475
|
+
normalized = {
|
|
476
|
+
str(key): value
|
|
477
|
+
for key, value in source_metadata.items()
|
|
478
|
+
if value is not None and str(key).strip()
|
|
479
|
+
}
|
|
480
|
+
return normalized or None
|
|
481
|
+
|
|
482
|
+
@staticmethod
|
|
483
|
+
def _normalize_source_path(source_path: str) -> str:
|
|
484
|
+
normalized = str(source_path or "skills").strip().strip("/")
|
|
485
|
+
if not normalized:
|
|
486
|
+
return "skills"
|
|
487
|
+
if normalized.lower() == SkillTools._AUTO_IMPORT_SOURCE_PATH:
|
|
488
|
+
return SkillTools._AUTO_IMPORT_SOURCE_PATH
|
|
489
|
+
if Path(normalized).is_absolute() or ".." in Path(normalized).parts:
|
|
490
|
+
raise ValueError(f"Invalid skill source path: {source_path}")
|
|
491
|
+
return normalized
|
|
492
|
+
|
|
493
|
+
@classmethod
|
|
494
|
+
def _resolve_import_targets(
|
|
495
|
+
cls,
|
|
496
|
+
*,
|
|
497
|
+
repo_root: Path,
|
|
498
|
+
source_path: str,
|
|
499
|
+
) -> list[_ImportTarget]:
|
|
500
|
+
auto_discovery = source_path in {
|
|
501
|
+
cls._DEFAULT_IMPORT_SOURCE_PATH,
|
|
502
|
+
cls._AUTO_IMPORT_SOURCE_PATH,
|
|
503
|
+
}
|
|
504
|
+
targets: list[_ImportTarget] = []
|
|
505
|
+
seen_paths: set[str] = set()
|
|
506
|
+
|
|
507
|
+
def add_target(candidate: Path) -> None:
|
|
508
|
+
target = cls._build_import_target(repo_root=repo_root, candidate=candidate)
|
|
509
|
+
if target is None or target.source_path in seen_paths:
|
|
510
|
+
return
|
|
511
|
+
target_parts = Path(target.source_path).parts
|
|
512
|
+
for existing in targets:
|
|
513
|
+
existing_parts = Path(existing.source_path).parts
|
|
514
|
+
if target_parts[: len(existing_parts)] == existing_parts:
|
|
515
|
+
return
|
|
516
|
+
filtered_targets = [
|
|
517
|
+
existing
|
|
518
|
+
for existing in targets
|
|
519
|
+
if Path(existing.source_path).parts[: len(target_parts)] != target_parts
|
|
520
|
+
]
|
|
521
|
+
if len(filtered_targets) != len(targets):
|
|
522
|
+
targets[:] = filtered_targets
|
|
523
|
+
seen_paths.clear()
|
|
524
|
+
seen_paths.update(existing.source_path for existing in targets)
|
|
525
|
+
seen_paths.add(target.source_path)
|
|
526
|
+
targets.append(target)
|
|
527
|
+
|
|
528
|
+
if source_path != cls._AUTO_IMPORT_SOURCE_PATH:
|
|
529
|
+
requested_path = repo_root / source_path
|
|
530
|
+
if requested_path.exists():
|
|
531
|
+
add_target(requested_path)
|
|
532
|
+
if not auto_discovery:
|
|
533
|
+
return targets
|
|
534
|
+
elif not auto_discovery:
|
|
535
|
+
raise ValueError(
|
|
536
|
+
f"Skill source path not found in repository: {source_path}"
|
|
537
|
+
)
|
|
538
|
+
|
|
539
|
+
if auto_discovery:
|
|
540
|
+
for candidate in cls._discover_skill_candidates(repo_root):
|
|
541
|
+
add_target(candidate)
|
|
542
|
+
if targets:
|
|
543
|
+
return targets
|
|
544
|
+
raise ValueError(
|
|
545
|
+
f"Skill source path not found in repository: {source_path}. "
|
|
546
|
+
"Auto-discovery could not find any supported skill documents."
|
|
547
|
+
)
|
|
548
|
+
|
|
549
|
+
raise ValueError(f"No supported skill documents found under {source_path}")
|
|
550
|
+
|
|
551
|
+
@classmethod
|
|
552
|
+
def _build_import_target(
|
|
553
|
+
cls,
|
|
554
|
+
*,
|
|
555
|
+
repo_root: Path,
|
|
556
|
+
candidate: Path,
|
|
557
|
+
) -> _ImportTarget | None:
|
|
558
|
+
try:
|
|
559
|
+
relative_candidate = candidate.relative_to(repo_root)
|
|
560
|
+
except ValueError:
|
|
561
|
+
return None
|
|
562
|
+
if cls._should_ignore_relative_parts(relative_candidate.parts):
|
|
563
|
+
return None
|
|
564
|
+
if candidate.is_file():
|
|
565
|
+
if not cls._is_supported_import_file(candidate):
|
|
566
|
+
return None
|
|
567
|
+
return _ImportTarget(
|
|
568
|
+
source_path=relative_candidate.as_posix(),
|
|
569
|
+
files=(candidate,),
|
|
570
|
+
)
|
|
571
|
+
if not candidate.is_dir():
|
|
572
|
+
return None
|
|
573
|
+
files = tuple(cls._collect_import_files(candidate, repo_root=repo_root))
|
|
574
|
+
if not files:
|
|
575
|
+
return None
|
|
576
|
+
return _ImportTarget(
|
|
577
|
+
source_path=relative_candidate.as_posix(),
|
|
578
|
+
files=files,
|
|
579
|
+
)
|
|
580
|
+
|
|
581
|
+
@classmethod
|
|
582
|
+
def _collect_import_files(cls, root: Path, *, repo_root: Path) -> list[Path]:
|
|
583
|
+
canonical_root_file = cls._canonical_skill_file_for_dir(root)
|
|
584
|
+
if canonical_root_file is not None:
|
|
585
|
+
return [canonical_root_file]
|
|
586
|
+
return [
|
|
587
|
+
path
|
|
588
|
+
for path in sorted(root.rglob("*"))
|
|
589
|
+
if path.is_file()
|
|
590
|
+
and cls._is_supported_import_file(path)
|
|
591
|
+
and not cls._should_ignore_relative_parts(
|
|
592
|
+
path.relative_to(repo_root).parts,
|
|
593
|
+
)
|
|
594
|
+
and cls._should_import_supported_file(path, repo_root=repo_root)
|
|
595
|
+
]
|
|
596
|
+
|
|
597
|
+
@classmethod
|
|
598
|
+
def _discover_skill_candidates(cls, repo_root: Path) -> list[Path]:
|
|
599
|
+
candidates: list[tuple[int, str, Path]] = []
|
|
600
|
+
for path in repo_root.rglob("*"):
|
|
601
|
+
try:
|
|
602
|
+
relative = path.relative_to(repo_root)
|
|
603
|
+
except ValueError:
|
|
604
|
+
continue
|
|
605
|
+
if cls._should_ignore_relative_parts(relative.parts):
|
|
606
|
+
continue
|
|
607
|
+
name = path.name.lower()
|
|
608
|
+
relative_text = relative.as_posix().lower()
|
|
609
|
+
score = 0
|
|
610
|
+
if path.is_dir():
|
|
611
|
+
if name in cls._DISCOVERY_DIRECTORY_NAMES:
|
|
612
|
+
score += 5
|
|
613
|
+
if "skill" in name:
|
|
614
|
+
score += 4
|
|
615
|
+
if any(hint in relative_text for hint in cls._DISCOVERY_FILE_HINTS):
|
|
616
|
+
score += 1
|
|
617
|
+
if score <= 0:
|
|
618
|
+
continue
|
|
619
|
+
elif path.is_file():
|
|
620
|
+
if not cls._is_supported_import_file(path):
|
|
621
|
+
continue
|
|
622
|
+
if not cls._should_import_supported_file(path, repo_root=repo_root):
|
|
623
|
+
continue
|
|
624
|
+
if any(hint in name for hint in cls._DISCOVERY_FILE_HINTS):
|
|
625
|
+
score += 4
|
|
626
|
+
if "skills" in relative_text:
|
|
627
|
+
score += 2
|
|
628
|
+
if score <= 0:
|
|
629
|
+
continue
|
|
630
|
+
else:
|
|
631
|
+
continue
|
|
632
|
+
candidates.append((score, relative.as_posix(), path))
|
|
633
|
+
|
|
634
|
+
candidates.sort(key=lambda item: (-item[0], item[1]))
|
|
635
|
+
return [path for _, _, path in candidates]
|
|
636
|
+
|
|
637
|
+
@classmethod
|
|
638
|
+
def _is_supported_import_file(cls, path: Path) -> bool:
|
|
639
|
+
return path.suffix.lower() in cls._IMPORT_SUFFIXES
|
|
640
|
+
|
|
641
|
+
@classmethod
|
|
642
|
+
def _canonical_skill_file_for_dir(cls, directory: Path) -> Path | None:
|
|
643
|
+
for path in sorted(directory.iterdir() if directory.exists() else []):
|
|
644
|
+
if not path.is_file() or not cls._is_supported_import_file(path):
|
|
645
|
+
continue
|
|
646
|
+
if path.name.casefold() in cls._CANONICAL_SKILL_FILENAMES:
|
|
647
|
+
return path
|
|
648
|
+
return None
|
|
649
|
+
|
|
650
|
+
@classmethod
|
|
651
|
+
def _canonical_skill_ancestor_file(
|
|
652
|
+
cls,
|
|
653
|
+
*,
|
|
654
|
+
path: Path,
|
|
655
|
+
repo_root: Path,
|
|
656
|
+
) -> Path | None:
|
|
657
|
+
current = path.parent
|
|
658
|
+
while current != repo_root and repo_root in current.parents:
|
|
659
|
+
canonical = cls._canonical_skill_file_for_dir(current)
|
|
660
|
+
if canonical is not None:
|
|
661
|
+
return canonical
|
|
662
|
+
current = current.parent
|
|
663
|
+
canonical = cls._canonical_skill_file_for_dir(repo_root)
|
|
664
|
+
if canonical is not None:
|
|
665
|
+
return canonical
|
|
666
|
+
return None
|
|
667
|
+
|
|
668
|
+
@classmethod
|
|
669
|
+
def _should_import_supported_file(cls, path: Path, *, repo_root: Path) -> bool:
|
|
670
|
+
canonical_ancestor = cls._canonical_skill_ancestor_file(
|
|
671
|
+
path=path,
|
|
672
|
+
repo_root=repo_root,
|
|
673
|
+
)
|
|
674
|
+
if canonical_ancestor is None:
|
|
675
|
+
return True
|
|
676
|
+
return canonical_ancestor == path
|
|
677
|
+
|
|
678
|
+
@classmethod
|
|
679
|
+
def _collect_auxiliary_paths(
|
|
680
|
+
cls,
|
|
681
|
+
*,
|
|
682
|
+
repo_root: Path,
|
|
683
|
+
file_path: Path,
|
|
684
|
+
) -> list[str]:
|
|
685
|
+
skill_root = file_path.parent
|
|
686
|
+
canonical = cls._canonical_skill_file_for_dir(skill_root)
|
|
687
|
+
if canonical is None or canonical != file_path:
|
|
688
|
+
return []
|
|
689
|
+
auxiliary_paths: list[str] = []
|
|
690
|
+
for candidate in sorted(skill_root.rglob("*")):
|
|
691
|
+
if candidate == canonical:
|
|
692
|
+
continue
|
|
693
|
+
if cls._should_ignore_relative_parts(
|
|
694
|
+
candidate.relative_to(repo_root).parts
|
|
695
|
+
):
|
|
696
|
+
continue
|
|
697
|
+
if candidate.is_file() and not cls._is_supported_import_file(candidate):
|
|
698
|
+
auxiliary_paths.append(candidate.relative_to(skill_root).as_posix())
|
|
699
|
+
continue
|
|
700
|
+
if candidate.is_file():
|
|
701
|
+
auxiliary_paths.append(candidate.relative_to(skill_root).as_posix())
|
|
702
|
+
continue
|
|
703
|
+
if candidate.is_dir() and candidate != skill_root:
|
|
704
|
+
auxiliary_paths.append(candidate.relative_to(skill_root).as_posix())
|
|
705
|
+
return auxiliary_paths
|
|
706
|
+
|
|
707
|
+
@classmethod
|
|
708
|
+
def _should_ignore_relative_parts(cls, parts: tuple[str, ...]) -> bool:
|
|
709
|
+
for part in parts:
|
|
710
|
+
if part in cls._PRUNED_IMPORT_NAMES:
|
|
711
|
+
return True
|
|
712
|
+
if part.startswith(".") and part not in cls._ALLOWED_HIDDEN_IMPORT_DIRS:
|
|
713
|
+
return True
|
|
714
|
+
return False
|
|
715
|
+
|
|
716
|
+
@staticmethod
|
|
717
|
+
def _normalize_repo_url(repo_url: str) -> str:
|
|
718
|
+
raw = str(repo_url or "").strip()
|
|
719
|
+
if not raw:
|
|
720
|
+
raise ValueError("repo_url is required")
|
|
721
|
+
parsed = urlparse(raw)
|
|
722
|
+
if parsed.scheme or raw.startswith("git@"):
|
|
723
|
+
return raw.rstrip("/")
|
|
724
|
+
path = Path(raw).expanduser()
|
|
725
|
+
if path.exists():
|
|
726
|
+
return path.resolve().as_posix()
|
|
727
|
+
return raw.rstrip("/")
|
|
728
|
+
|
|
729
|
+
@staticmethod
|
|
730
|
+
def _resolve_provider(provider: str | None, repo_url: str) -> str:
|
|
731
|
+
if provider:
|
|
732
|
+
normalized = str(provider).strip().lower()
|
|
733
|
+
if normalized in {"github", "gitlab", "generic_git"}:
|
|
734
|
+
return normalized
|
|
735
|
+
raise ValueError(f"Unsupported provider: {provider}")
|
|
736
|
+
lowered = repo_url.lower()
|
|
737
|
+
if "github.com" in lowered:
|
|
738
|
+
return "github"
|
|
739
|
+
if "gitlab" in lowered:
|
|
740
|
+
return "gitlab"
|
|
741
|
+
return "generic_git"
|
|
742
|
+
|
|
743
|
+
def _skills_by_source_key(self, skills: list[Any]) -> dict[str, dict[str, Any]]:
|
|
744
|
+
indexed: dict[str, dict[str, Any]] = {}
|
|
745
|
+
for skill in skills:
|
|
746
|
+
serialized = self._serialize_skill(skill)
|
|
747
|
+
source = serialized.get("source") or {}
|
|
748
|
+
source_key = str(source.get("import_key") or "").strip()
|
|
749
|
+
if source_key:
|
|
750
|
+
indexed[source_key] = serialized
|
|
751
|
+
return indexed
|
|
752
|
+
|
|
753
|
+
def _build_import_source_metadata(
|
|
754
|
+
self,
|
|
755
|
+
*,
|
|
756
|
+
provider: str,
|
|
757
|
+
repo_url: str,
|
|
758
|
+
ref: str | None,
|
|
759
|
+
source_path: str,
|
|
760
|
+
file_path: str,
|
|
761
|
+
document_index: int,
|
|
762
|
+
auxiliary_paths: list[str] | None = None,
|
|
763
|
+
) -> dict[str, Any]:
|
|
764
|
+
import_key = "::".join(
|
|
765
|
+
[
|
|
766
|
+
provider,
|
|
767
|
+
repo_url,
|
|
768
|
+
ref or "HEAD",
|
|
769
|
+
source_path,
|
|
770
|
+
file_path,
|
|
771
|
+
str(document_index),
|
|
772
|
+
]
|
|
773
|
+
)
|
|
774
|
+
return {
|
|
775
|
+
"provider": provider,
|
|
776
|
+
"repo_url": repo_url,
|
|
777
|
+
"ref": ref,
|
|
778
|
+
"path": source_path,
|
|
779
|
+
"file_path": file_path,
|
|
780
|
+
"auxiliary_paths": list(auxiliary_paths or []),
|
|
781
|
+
"import_key": import_key,
|
|
782
|
+
"imported_at": datetime.now(UTC).isoformat(),
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
def _load_import_documents(self, file_path: Path) -> list[dict[str, Any]]:
|
|
786
|
+
suffix = file_path.suffix.lower()
|
|
787
|
+
raw = file_path.read_text(encoding="utf-8")
|
|
788
|
+
if suffix in {".md", ".markdown", ".txt"}:
|
|
789
|
+
title = self._extract_document_title(raw, fallback=file_path.stem)
|
|
790
|
+
return [
|
|
791
|
+
{
|
|
792
|
+
"title": title,
|
|
793
|
+
"content": raw.strip(),
|
|
794
|
+
"language": "markdown" if suffix != ".txt" else "text",
|
|
795
|
+
"tags": [],
|
|
796
|
+
"workflow_steps": [],
|
|
797
|
+
"artifact_types": [],
|
|
798
|
+
"provenance": None,
|
|
799
|
+
"quality_score": 0.0,
|
|
800
|
+
}
|
|
801
|
+
]
|
|
802
|
+
if suffix == ".json":
|
|
803
|
+
payload = json.loads(raw)
|
|
804
|
+
if isinstance(payload, dict) and isinstance(payload.get("skills"), list):
|
|
805
|
+
candidates = payload.get("skills") or []
|
|
806
|
+
elif isinstance(payload, list):
|
|
807
|
+
candidates = payload
|
|
808
|
+
else:
|
|
809
|
+
candidates = [payload]
|
|
810
|
+
documents = [
|
|
811
|
+
self._coerce_import_document(item, file_path=file_path)
|
|
812
|
+
for item in candidates
|
|
813
|
+
]
|
|
814
|
+
return [document for document in documents if document is not None]
|
|
815
|
+
raise ValueError(f"Unsupported skill import file: {file_path.name}")
|
|
816
|
+
|
|
817
|
+
def _coerce_import_document(
|
|
818
|
+
self,
|
|
819
|
+
payload: Any,
|
|
820
|
+
*,
|
|
821
|
+
file_path: Path,
|
|
822
|
+
) -> dict[str, Any] | None:
|
|
823
|
+
if not isinstance(payload, dict):
|
|
824
|
+
return None
|
|
825
|
+
content = str(payload.get("content", "") or "").strip()
|
|
826
|
+
title = str(payload.get("title", "") or "").strip() or file_path.stem
|
|
827
|
+
if not content:
|
|
828
|
+
return None
|
|
829
|
+
return {
|
|
830
|
+
"title": title,
|
|
831
|
+
"content": content,
|
|
832
|
+
"language": str(payload.get("language", "markdown") or "markdown"),
|
|
833
|
+
"tags": [str(tag) for tag in list(payload.get("tags", []) or [])],
|
|
834
|
+
"workflow_steps": [
|
|
835
|
+
str(step) for step in list(payload.get("workflow_steps", []) or [])
|
|
836
|
+
],
|
|
837
|
+
"artifact_types": [
|
|
838
|
+
str(item) for item in list(payload.get("artifact_types", []) or [])
|
|
839
|
+
],
|
|
840
|
+
"provenance": (
|
|
841
|
+
str(payload.get("provenance"))
|
|
842
|
+
if payload.get("provenance") is not None
|
|
843
|
+
else None
|
|
844
|
+
),
|
|
845
|
+
"quality_score": float(payload.get("quality_score", 0.0) or 0.0),
|
|
846
|
+
"excerpt_kind": (
|
|
847
|
+
str(payload.get("excerpt_kind"))
|
|
848
|
+
if payload.get("excerpt_kind") is not None
|
|
849
|
+
else "none"
|
|
850
|
+
),
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
@staticmethod
|
|
854
|
+
def _extract_document_title(raw: str, *, fallback: str) -> str:
|
|
855
|
+
for line in raw.splitlines():
|
|
856
|
+
stripped = line.strip()
|
|
857
|
+
if stripped.startswith("#"):
|
|
858
|
+
return stripped.lstrip("#").strip() or fallback
|
|
859
|
+
return fallback
|
|
860
|
+
|
|
861
|
+
@staticmethod
|
|
862
|
+
def _normalized_tags(
|
|
863
|
+
*,
|
|
864
|
+
tags: list[str] | None,
|
|
865
|
+
workflow_steps: list[str] | None,
|
|
866
|
+
artifact_types: list[str] | None,
|
|
867
|
+
provenance: str | None,
|
|
868
|
+
) -> list[str]:
|
|
869
|
+
normalized: list[str] = []
|
|
870
|
+
seen: set[str] = set()
|
|
871
|
+
|
|
872
|
+
def add(value: str) -> None:
|
|
873
|
+
token = str(value or "").strip().lower()
|
|
874
|
+
if not token or token in seen:
|
|
875
|
+
return
|
|
876
|
+
seen.add(token)
|
|
877
|
+
normalized.append(token)
|
|
878
|
+
|
|
879
|
+
for tag in tags or []:
|
|
880
|
+
add(tag)
|
|
881
|
+
for step in workflow_steps or []:
|
|
882
|
+
for token in sorted(step_keywords(step)):
|
|
883
|
+
add(token)
|
|
884
|
+
for artifact in artifact_types or []:
|
|
885
|
+
add(artifact)
|
|
886
|
+
if provenance:
|
|
887
|
+
add(f"source:{provenance}")
|
|
888
|
+
return normalized
|
|
889
|
+
|
|
890
|
+
@staticmethod
|
|
891
|
+
def _cosine_similarity(left: list[float], right: list[float]) -> float:
|
|
892
|
+
if not left or not right or len(left) != len(right):
|
|
893
|
+
return 0.0
|
|
894
|
+
numerator = sum(a * b for a, b in zip(left, right, strict=False))
|
|
895
|
+
left_norm = math.sqrt(sum(value * value for value in left))
|
|
896
|
+
right_norm = math.sqrt(sum(value * value for value in right))
|
|
897
|
+
if left_norm == 0 or right_norm == 0:
|
|
898
|
+
return 0.0
|
|
899
|
+
return numerator / (left_norm * right_norm)
|