claude-sql 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,354 @@
1
+ """Walk local skill + plugin directories and persist a catalog parquet.
2
+
3
+ Seeds ``~/.claude/skills_catalog.parquet`` from three filesystem sources:
4
+
5
+ 1. ``~/.claude/skills/<name>/SKILL.md`` -- user-installed skills, keyed by
6
+ bare name only.
7
+ 2. ``~/.claude/plugins/cache/<owner>/<plugin>/<version>/skills/<name>/SKILL.md``
8
+ -- plugin-provided skills, emitted as *both* the bare ``<name>`` and the
9
+ namespaced ``<plugin>:<name>`` (both invocation shapes show up in
10
+ transcripts, so both need to join through ``skill_usage``).
11
+ 3. ``~/.claude/plugins/cache/<owner>/<plugin>/<version>/commands/<name>.md``
12
+ -- plugin-provided slash commands, same bare + namespaced treatment.
13
+
14
+ Plus a constant :data:`BUILTIN_SLASH_COMMANDS` written verbatim so the
15
+ view can tag ``/clear``, ``/compact``, ``/plugin`` etc. without SQL
16
+ hardcoding.
17
+
18
+ Multiple cached plugin versions collapse to the newest -- we sort the
19
+ version-dir names with :mod:`packaging.version` when the string is a
20
+ semver, and fall back to ``mtime`` when it isn't (e.g. the
21
+ ``unknown/`` version directory Claude Code writes for some plugins).
22
+
23
+ Public API
24
+ ----------
25
+ sync(settings, *, dry_run=False) -> dict[str, int]
26
+ Walk filesystem, write parquet at ``settings.skills_catalog_parquet_path``.
27
+ Returns ``{"rows": N, "skills": S, "commands": C, "builtins": B}``.
28
+ """
29
+
30
+ from __future__ import annotations
31
+
32
+ import json
33
+ import re
34
+ from collections.abc import Iterable
35
+ from datetime import UTC, datetime
36
+ from pathlib import Path
37
+ from typing import Any
38
+
39
+ import polars as pl
40
+ import yaml
41
+ from loguru import logger
42
+ from packaging.version import InvalidVersion, Version as _Version
43
+
44
+ from claude_sql.config import Settings
45
+
46
+ # Built-in Claude Code slash commands. These never map to a SKILL.md on
47
+ # disk but show up as ``<command-name>/clear</command-name>`` in the
48
+ # transcripts; tagging them keeps the ``skill_usage`` view's
49
+ # ``is_builtin`` column honest.
50
+ BUILTIN_SLASH_COMMANDS: tuple[tuple[str, str], ...] = (
51
+ ("clear", "Clear the current conversation context."),
52
+ ("compact", "Compact the current conversation to free up context."),
53
+ ("cost", "Show token and cost usage for the current session."),
54
+ ("help", "Show the Claude Code help menu."),
55
+ ("ide", "Connect to an IDE extension."),
56
+ ("init", "Initialize a new CLAUDE.md for the working directory."),
57
+ ("mcp", "Manage MCP servers attached to this session."),
58
+ ("memory", "Manage persistent conversation memory."),
59
+ ("model", "Switch the active Claude model."),
60
+ ("plugin", "Manage installed Claude Code plugins."),
61
+ ("reload-plugins", "Reload plugin definitions without restarting."),
62
+ ("resume", "Resume a prior session."),
63
+ ("review", "Request a code review of the current changes."),
64
+ ("security-review", "Run a security review over pending changes."),
65
+ ("status", "Show current session status."),
66
+ )
67
+
68
+
69
+ _FRONTMATTER_RE = re.compile(r"\A---\s*\n(.*?)\n---\s*\n", re.DOTALL)
70
+
71
+
72
+ def _parse_frontmatter(path: Path) -> dict[str, Any]:
73
+ """Return the YAML frontmatter at the top of ``path`` as a dict.
74
+
75
+ Silent-fallback on missing / unparseable frontmatter so a malformed
76
+ SKILL.md file doesn't abort the whole sync -- we just get a row with
77
+ ``description=None`` and a warning.
78
+ """
79
+ try:
80
+ text = path.read_text(encoding="utf-8", errors="replace")
81
+ except OSError as exc:
82
+ logger.warning("skills_catalog: cannot read {}: {}", path, exc)
83
+ return {}
84
+ match = _FRONTMATTER_RE.match(text)
85
+ if not match:
86
+ return {}
87
+ try:
88
+ data = yaml.safe_load(match.group(1))
89
+ except yaml.YAMLError as exc:
90
+ logger.warning("skills_catalog: yaml parse error in {}: {}", path, exc)
91
+ return {}
92
+ return data if isinstance(data, dict) else {}
93
+
94
+
95
+ def _coerce_str(value: Any) -> str | None:
96
+ """Flatten multiline YAML scalars (literal ``>``, ``|``) to a single line."""
97
+ if value is None:
98
+ return None
99
+ text = str(value).strip()
100
+ # Multiline YAML can leave newlines; flatten so the parquet stays narrow.
101
+ text = re.sub(r"\s+", " ", text)
102
+ return text or None
103
+
104
+
105
+ def _read_plugin_manifest(version_dir: Path) -> dict[str, Any]:
106
+ """Return the parsed ``.claude-plugin/plugin.json`` or an empty dict."""
107
+ manifest = version_dir / ".claude-plugin" / "plugin.json"
108
+ try:
109
+ return json.loads(manifest.read_text(encoding="utf-8"))
110
+ except (OSError, json.JSONDecodeError) as exc:
111
+ logger.debug("skills_catalog: cannot read {}: {}", manifest, exc)
112
+ return {}
113
+
114
+
115
+ def _version_sort_key(version_dir: Path) -> tuple[int, Any, float]:
116
+ """Sort plugin-version directories newest-first.
117
+
118
+ Returns a tuple ``(tier, version_or_none, mtime)``:
119
+
120
+ * ``tier=0`` when the directory name parses as a PEP 440 / semver
121
+ :class:`packaging.version.Version` -- sort those on ``Version``.
122
+ * ``tier=1`` for anything else (``unknown/`` etc.); those fall back
123
+ to the directory's ``mtime``.
124
+
125
+ The outer caller reverses the sort so newest wins.
126
+ """
127
+ name = version_dir.name
128
+ try:
129
+ ver = _Version(name)
130
+ return (0, ver, version_dir.stat().st_mtime)
131
+ except InvalidVersion:
132
+ try:
133
+ return (1, None, version_dir.stat().st_mtime)
134
+ except OSError:
135
+ return (1, None, 0.0)
136
+
137
+
138
+ def _walk_user_skills(root: Path, now: datetime) -> Iterable[dict[str, Any]]:
139
+ """Yield catalog rows for each ``~/.claude/skills/<name>/SKILL.md``."""
140
+ if not root.exists():
141
+ return
142
+ for skill_dir in sorted(root.iterdir()):
143
+ if not skill_dir.is_dir():
144
+ continue
145
+ skill_md = skill_dir / "SKILL.md"
146
+ if not skill_md.exists():
147
+ continue
148
+ meta = _parse_frontmatter(skill_md)
149
+ # ``name`` in frontmatter wins over the directory name when both are
150
+ # present; we keep them aligned in the canonical ``name`` field.
151
+ name = _coerce_str(meta.get("name")) or skill_dir.name
152
+ yield {
153
+ "skill_id": name,
154
+ "name": name,
155
+ "plugin": None,
156
+ "plugin_version": None,
157
+ "source_kind": "user-skill",
158
+ "description": _coerce_str(meta.get("description")),
159
+ "argument_hint": _coerce_str(meta.get("argument-hint"))
160
+ or _coerce_str(meta.get("argument_hint")),
161
+ "source_path": str(skill_md),
162
+ "synced_at": now,
163
+ }
164
+
165
+
166
+ def _walk_plugins(root: Path, now: datetime) -> Iterable[dict[str, Any]]:
167
+ """Yield catalog rows for every skill + command under the plugins cache.
168
+
169
+ Plugin layout:
170
+
171
+ .. code-block:: text
172
+
173
+ <root>/<owner>/<plugin>/<version>/.claude-plugin/plugin.json
174
+ <root>/<owner>/<plugin>/<version>/skills/<name>/SKILL.md
175
+ <root>/<owner>/<plugin>/<version>/commands/<name>.md
176
+
177
+ Only the newest ``<version>`` per ``(owner, plugin)`` is emitted --
178
+ older cached generations are ignored. Each skill + command emits
179
+ two rows: one bare (``skill_id=<name>``) and one namespaced
180
+ (``skill_id=<plugin>:<name>``) -- both invocation shapes show up in
181
+ real transcripts, so the catalog must carry both keys.
182
+ """
183
+ if not root.exists():
184
+ return
185
+
186
+ for owner_dir in sorted(p for p in root.iterdir() if p.is_dir()):
187
+ for plugin_dir in sorted(p for p in owner_dir.iterdir() if p.is_dir()):
188
+ version_dirs = [p for p in plugin_dir.iterdir() if p.is_dir()]
189
+ if not version_dirs:
190
+ continue
191
+ # Newest version wins. Tier-0 (semver) beats tier-1 (unknown);
192
+ # within a tier the higher Version / later mtime wins.
193
+ version_dirs.sort(key=_version_sort_key, reverse=True)
194
+ chosen = version_dirs[0]
195
+ manifest = _read_plugin_manifest(chosen)
196
+ plugin_name = _coerce_str(manifest.get("name")) or plugin_dir.name
197
+ plugin_version = _coerce_str(manifest.get("version")) or chosen.name
198
+
199
+ skills_root = chosen / "skills"
200
+ if skills_root.is_dir():
201
+ for skill_dir in sorted(skills_root.iterdir()):
202
+ skill_md = skill_dir / "SKILL.md"
203
+ if not skill_md.exists():
204
+ continue
205
+ meta = _parse_frontmatter(skill_md)
206
+ name = _coerce_str(meta.get("name")) or skill_dir.name
207
+ description = _coerce_str(meta.get("description"))
208
+ base_row = {
209
+ "name": name,
210
+ "plugin": plugin_name,
211
+ "plugin_version": plugin_version,
212
+ "source_kind": "plugin-skill",
213
+ "description": description,
214
+ "argument_hint": _coerce_str(meta.get("argument-hint"))
215
+ or _coerce_str(meta.get("argument_hint")),
216
+ "source_path": str(skill_md),
217
+ "synced_at": now,
218
+ }
219
+ # Bare form AND namespaced form -- both show up in the
220
+ # transcripts and we want either key to join cleanly.
221
+ yield {**base_row, "skill_id": name}
222
+ yield {**base_row, "skill_id": f"{plugin_name}:{name}"}
223
+
224
+ commands_root = chosen / "commands"
225
+ if commands_root.is_dir():
226
+ for command_file in sorted(commands_root.glob("*.md")):
227
+ meta = _parse_frontmatter(command_file)
228
+ name = command_file.stem
229
+ description = _coerce_str(meta.get("description"))
230
+ base_row = {
231
+ "name": name,
232
+ "plugin": plugin_name,
233
+ "plugin_version": plugin_version,
234
+ "source_kind": "plugin-command",
235
+ "description": description,
236
+ "argument_hint": _coerce_str(meta.get("argument-hint"))
237
+ or _coerce_str(meta.get("argument_hint")),
238
+ "source_path": str(command_file),
239
+ "synced_at": now,
240
+ }
241
+ yield {**base_row, "skill_id": name}
242
+ yield {**base_row, "skill_id": f"{plugin_name}:{name}"}
243
+
244
+
245
+ def _builtin_rows(now: datetime) -> Iterable[dict[str, Any]]:
246
+ """Yield one row per :data:`BUILTIN_SLASH_COMMANDS` entry."""
247
+ for name, blurb in BUILTIN_SLASH_COMMANDS:
248
+ yield {
249
+ "skill_id": name,
250
+ "name": name,
251
+ "plugin": None,
252
+ "plugin_version": None,
253
+ "source_kind": "builtin",
254
+ "description": blurb,
255
+ "argument_hint": None,
256
+ "source_path": None,
257
+ "synced_at": now,
258
+ }
259
+
260
+
261
+ _CATALOG_SCHEMA: dict[str, Any] = {
262
+ "skill_id": pl.Utf8,
263
+ "name": pl.Utf8,
264
+ "plugin": pl.Utf8,
265
+ "plugin_version": pl.Utf8,
266
+ "source_kind": pl.Utf8,
267
+ "description": pl.Utf8,
268
+ "argument_hint": pl.Utf8,
269
+ "source_path": pl.Utf8,
270
+ "synced_at": pl.Datetime(time_unit="us", time_zone="UTC"),
271
+ }
272
+
273
+
274
+ def _collect_rows(settings: Settings) -> list[dict[str, Any]]:
275
+ """Produce the full, de-duplicated row set for :func:`sync`."""
276
+ now = datetime.now(tz=UTC)
277
+ rows: list[dict[str, Any]] = []
278
+ rows.extend(_walk_user_skills(settings.user_skills_dir, now))
279
+ rows.extend(_walk_plugins(settings.plugins_cache_dir, now))
280
+ rows.extend(_builtin_rows(now))
281
+
282
+ # De-dup on (skill_id, source_kind) keeping the first occurrence.
283
+ # user-skill wins over plugin-skill for the same bare name because we
284
+ # yield user skills first; plugin-skill wins over plugin-command.
285
+ seen: set[tuple[str, str]] = set()
286
+ deduped: list[dict[str, Any]] = []
287
+ for row in rows:
288
+ key = (row["skill_id"], row["source_kind"])
289
+ if key in seen:
290
+ continue
291
+ seen.add(key)
292
+ deduped.append(row)
293
+ return deduped
294
+
295
+
296
+ def sync(settings: Settings, *, dry_run: bool = False) -> dict[str, int]:
297
+ """Walk local skills + plugins and write the catalog parquet.
298
+
299
+ Parameters
300
+ ----------
301
+ settings
302
+ Configured :class:`claude_sql.config.Settings`; reads
303
+ :attr:`Settings.user_skills_dir`,
304
+ :attr:`Settings.plugins_cache_dir`, and
305
+ :attr:`Settings.skills_catalog_parquet_path`.
306
+ dry_run
307
+ When ``True`` the walk runs and the row counts are returned, but
308
+ nothing is written to disk.
309
+
310
+ Returns
311
+ -------
312
+ dict
313
+ ``{"rows": total, "skills": user+plugin skill rows, "commands":
314
+ plugin-command rows, "builtins": builtin rows}``.
315
+
316
+ Notes
317
+ -----
318
+ The parquet is written atomically -- to a sibling ``.tmp`` path then
319
+ renamed -- so a crashed sync never leaves the catalog view staring at
320
+ a truncated file.
321
+ """
322
+ rows = _collect_rows(settings)
323
+ stats = {
324
+ "rows": len(rows),
325
+ "skills": sum(1 for r in rows if r["source_kind"] in ("user-skill", "plugin-skill")),
326
+ "commands": sum(1 for r in rows if r["source_kind"] == "plugin-command"),
327
+ "builtins": sum(1 for r in rows if r["source_kind"] == "builtin"),
328
+ }
329
+
330
+ if dry_run:
331
+ logger.info(
332
+ "skills_catalog.sync(dry_run=True): {} rows ({} skills, {} commands, {} builtins)",
333
+ stats["rows"],
334
+ stats["skills"],
335
+ stats["commands"],
336
+ stats["builtins"],
337
+ )
338
+ return stats
339
+
340
+ out_path: Path = settings.skills_catalog_parquet_path
341
+ out_path.parent.mkdir(parents=True, exist_ok=True)
342
+ df = pl.DataFrame(rows, schema=_CATALOG_SCHEMA)
343
+ tmp_path = out_path.with_suffix(out_path.suffix + ".tmp")
344
+ df.write_parquet(tmp_path)
345
+ tmp_path.replace(out_path)
346
+ logger.info(
347
+ "skills_catalog.sync: wrote {} rows to {} ({} skills, {} commands, {} builtins)",
348
+ stats["rows"],
349
+ out_path,
350
+ stats["skills"],
351
+ stats["commands"],
352
+ stats["builtins"],
353
+ )
354
+ return stats