reprompt-cli 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
reprompt/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ """reprompt - Discover, analyze, and evolve your best prompts from AI coding sessions."""
2
+
3
+ from __future__ import annotations
4
+
5
+ __version__ = "0.1.1"
@@ -0,0 +1,3 @@
1
+ """Session adapters for various AI coding tools."""
2
+
3
+ from __future__ import annotations
@@ -0,0 +1,25 @@
1
+ """Base adapter interface for AI coding session parsers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from abc import ABC, abstractmethod
6
+ from pathlib import Path
7
+
8
+ from reprompt.core.models import Prompt
9
+
10
+
11
+ class BaseAdapter(ABC):
12
+ """Abstract base class for session adapters."""
13
+
14
+ name: str
15
+ default_session_path: str
16
+
17
+ @abstractmethod
18
+ def parse_session(self, path: Path) -> list[Prompt]:
19
+ """Parse a session file and return a list of Prompt objects."""
20
+ ...
21
+
22
+ @abstractmethod
23
+ def detect_installed(self) -> bool:
24
+ """Check if the tool's session directory exists."""
25
+ ...
@@ -0,0 +1,140 @@
1
+ """Claude Code session adapter."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ import re
8
+ from pathlib import Path
9
+
10
+ from reprompt.adapters.base import BaseAdapter
11
+ from reprompt.core.models import Prompt
12
+
13
+ SKIP_EXACT = {
14
+ "\u597d\u7684",
15
+ "OK",
16
+ "ok",
17
+ "Ok",
18
+ "\u662f\u7684",
19
+ "\u53ef\u4ee5",
20
+ "sure",
21
+ "Sure",
22
+ "yes",
23
+ "Yes",
24
+ "Done",
25
+ "done",
26
+ "Sent",
27
+ "sent",
28
+ "\u597d",
29
+ "\u5bf9",
30
+ "\u884c",
31
+ "\u55ef",
32
+ "Tool loaded.",
33
+ "1",
34
+ "2",
35
+ "3",
36
+ "A",
37
+ "B",
38
+ "C",
39
+ "D",
40
+ }
41
+
42
+ SKIP_PREFIXES = (
43
+ "<",
44
+ "Tool loaded",
45
+ "Base directory for this skill",
46
+ )
47
+
48
+
49
+ def should_keep_prompt(text: str) -> bool:
50
+ """Filter out noise prompts -- short messages, exact matches, prefixes."""
51
+ text = text.strip()
52
+ if len(text) < 10:
53
+ return False
54
+ if text in SKIP_EXACT:
55
+ return False
56
+ if any(text.startswith(p) for p in SKIP_PREFIXES):
57
+ return False
58
+ if not re.search(r"[a-zA-Z\u4e00-\u9fff]", text):
59
+ return False
60
+ return True
61
+
62
+
63
+ def _extract_text(message: dict[str, object]) -> str:
64
+ """Extract text from a message, handling both string and list content."""
65
+ content = message.get("content", "")
66
+ if isinstance(content, list):
67
+ parts = [
68
+ p.get("text", "") for p in content if isinstance(p, dict) and p.get("type") == "text"
69
+ ]
70
+ return " ".join(parts).strip()
71
+ return str(content).strip()
72
+
73
+
74
+ class ClaudeCodeAdapter(BaseAdapter):
75
+ """Adapter for Claude Code JSONL session files."""
76
+
77
+ name = "claude-code"
78
+ default_session_path = "~/.claude/projects"
79
+
80
+ def __init__(self, session_path: Path | None = None) -> None:
81
+ self._session_path = session_path or Path(os.path.expanduser(self.default_session_path))
82
+
83
+ def detect_installed(self) -> bool:
84
+ """Check if Claude Code session directory exists."""
85
+ return self._session_path.is_dir()
86
+
87
+ def parse_session(self, path: Path) -> list[Prompt]:
88
+ """Parse a Claude Code JSONL session file into Prompt objects."""
89
+ prompts: list[Prompt] = []
90
+ session_id = path.stem
91
+
92
+ with open(path, encoding="utf-8") as f:
93
+ for line in f:
94
+ line = line.strip()
95
+ if not line:
96
+ continue
97
+ try:
98
+ entry = json.loads(line)
99
+ except json.JSONDecodeError:
100
+ continue
101
+
102
+ # Only process user messages
103
+ if entry.get("type") != "user":
104
+ continue
105
+
106
+ message = entry.get("message", {})
107
+ if message.get("role") != "user":
108
+ continue
109
+
110
+ text = _extract_text(message)
111
+ if not should_keep_prompt(text):
112
+ continue
113
+
114
+ project = self._project_from_path(str(path))
115
+ timestamp = entry.get("timestamp", "")
116
+
117
+ prompts.append(
118
+ Prompt(
119
+ text=text,
120
+ source=self.name,
121
+ session_id=session_id,
122
+ project=project,
123
+ timestamp=timestamp,
124
+ )
125
+ )
126
+
127
+ return prompts
128
+
129
+ def _project_from_path(self, file_path: str) -> str:
130
+ """Extract project name from Claude Code session path.
131
+
132
+ Path format: ~/.claude/projects/-Users-chris-projects-myproject/session.jsonl
133
+ The parent directory name has dashes replacing path separators.
134
+ """
135
+ parent = os.path.basename(os.path.dirname(file_path))
136
+ parts = parent.split("-")
137
+ for i, p in enumerate(parts):
138
+ if p == "projects" and i + 1 < len(parts):
139
+ return "-".join(parts[i + 1 :])
140
+ return parent
@@ -0,0 +1,79 @@
1
+ """OpenClaw/OpenCode session adapter."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ from pathlib import Path
8
+
9
+ from reprompt.adapters.base import BaseAdapter
10
+ from reprompt.adapters.claude_code import should_keep_prompt
11
+ from reprompt.core.models import Prompt
12
+
13
+
14
+ class OpenClawAdapter(BaseAdapter):
15
+ """Adapter for OpenClaw/OpenCode JSONL session files.
16
+
17
+ OpenClaw sessions use a simpler format than Claude Code:
18
+ - No 'type' wrapper -- directly has 'role' field
19
+ - 'content' is always a string (not list)
20
+ - Session path: ~/.opencode/sessions/
21
+ """
22
+
23
+ name = "openclaw"
24
+ default_session_path = "~/.opencode/sessions"
25
+
26
+ def __init__(self, session_path: Path | None = None) -> None:
27
+ self._session_path = session_path or Path(os.path.expanduser(self.default_session_path))
28
+
29
+ def detect_installed(self) -> bool:
30
+ """Check if OpenClaw session directory exists."""
31
+ return self._session_path.is_dir()
32
+
33
+ def parse_session(self, path: Path) -> list[Prompt]:
34
+ """Parse an OpenClaw JSONL session file into Prompt objects."""
35
+ prompts: list[Prompt] = []
36
+
37
+ with open(path, encoding="utf-8") as f:
38
+ for line in f:
39
+ line = line.strip()
40
+ if not line:
41
+ continue
42
+ try:
43
+ entry = json.loads(line)
44
+ except json.JSONDecodeError:
45
+ continue
46
+
47
+ # Only process user messages
48
+ if entry.get("role") != "user":
49
+ continue
50
+
51
+ text = str(entry.get("content", "")).strip()
52
+ if not should_keep_prompt(text):
53
+ continue
54
+
55
+ session_id = entry.get("session_id", path.stem)
56
+ timestamp = entry.get("timestamp", "")
57
+ project = self._project_from_path(str(path))
58
+
59
+ prompts.append(
60
+ Prompt(
61
+ text=text,
62
+ source=self.name,
63
+ session_id=session_id,
64
+ project=project,
65
+ timestamp=timestamp,
66
+ )
67
+ )
68
+
69
+ return prompts
70
+
71
+ def _project_from_path(self, file_path: str) -> str:
72
+ """Extract project name from OpenClaw session path.
73
+
74
+ Path format: ~/.opencode/sessions/<project-name>/session.jsonl
75
+ """
76
+ parent = os.path.basename(os.path.dirname(file_path))
77
+ if parent == "sessions":
78
+ return ""
79
+ return parent
reprompt/cli.py ADDED
@@ -0,0 +1,177 @@
1
+ """CLI entry point."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ import typer
8
+ from rich.console import Console
9
+
10
+ from reprompt import __version__
11
+
12
+ app = typer.Typer(
13
+ name="reprompt",
14
+ help="Discover, analyze, and evolve your best prompts from AI coding sessions.",
15
+ no_args_is_help=True,
16
+ )
17
+ console = Console()
18
+
19
+
20
+ def _version_callback(value: bool) -> None:
21
+ if value:
22
+ typer.echo(f"reprompt {__version__}")
23
+ raise typer.Exit()
24
+
25
+
26
+ @app.callback()
27
+ def main(
28
+ version: bool = typer.Option(
29
+ False, "--version", "-V", callback=_version_callback, is_eager=True
30
+ ),
31
+ ) -> None:
32
+ """reprompt -- Discover, analyze, and evolve your best prompts from AI coding sessions."""
33
+
34
+
35
+ @app.command()
36
+ def scan(
37
+ source: str | None = typer.Option(None, help="Source adapter (claude-code, openclaw)"),
38
+ path: str | None = typer.Option(None, help="Custom session path"),
39
+ ) -> None:
40
+ """Scan AI tool sessions for prompts."""
41
+ from reprompt.config import Settings
42
+ from reprompt.core.pipeline import run_scan
43
+
44
+ settings = Settings()
45
+ result = run_scan(source=source, path=path, settings=settings)
46
+
47
+ console.print("[bold]Scan complete[/bold]")
48
+ console.print(f" Sessions scanned: {result.sessions_scanned}")
49
+ console.print(f" Prompts found: {result.total_parsed}")
50
+ console.print(f" Unique: {result.unique_after_dedup}")
51
+ console.print(f" Duplicates: {result.duplicates}")
52
+ console.print(f" New stored: {result.new_stored}")
53
+
54
+
55
+ @app.command()
56
+ def report(
57
+ format: str = typer.Option("terminal", help="Output format: terminal, json"),
58
+ top: int = typer.Option(20, help="Number of top terms to show"),
59
+ ) -> None:
60
+ """Generate analytics report."""
61
+ from reprompt.config import Settings
62
+ from reprompt.core.pipeline import build_report_data
63
+ from reprompt.output.json_out import format_json_report
64
+ from reprompt.output.terminal import render_report
65
+
66
+ settings = Settings()
67
+ data = build_report_data(settings=settings)
68
+
69
+ if format == "json":
70
+ console.print(format_json_report(data))
71
+ else:
72
+ console.print(render_report(data))
73
+
74
+
75
+ @app.command()
76
+ def library(
77
+ category: str | None = typer.Option(None, help="Filter by category"),
78
+ export: str | None = typer.Argument(None, help="Export to file path (Markdown)"),
79
+ ) -> None:
80
+ """Show or export your prompt library."""
81
+ from reprompt.config import Settings
82
+ from reprompt.output.markdown import export_library_markdown
83
+ from reprompt.storage.db import PromptDB
84
+
85
+ settings = Settings()
86
+ db = PromptDB(settings.db_path)
87
+ patterns = db.get_patterns(category=category)
88
+
89
+ if export:
90
+ md = export_library_markdown(patterns)
91
+ Path(export).write_text(md)
92
+ console.print(f"Library exported to {export}")
93
+ else:
94
+ if not patterns:
95
+ console.print("No patterns yet. Run [bold]reprompt scan[/bold] first.")
96
+ return
97
+ from rich.table import Table
98
+
99
+ table = Table(title="Prompt Library")
100
+ table.add_column("#", style="dim", width=4)
101
+ table.add_column("Pattern", max_width=50)
102
+ table.add_column("Uses", justify="right")
103
+ table.add_column("Category")
104
+ for i, p in enumerate(patterns, 1):
105
+ table.add_row(
106
+ str(i),
107
+ str(p.get("pattern_text", ""))[:50],
108
+ str(p.get("frequency", 0)),
109
+ str(p.get("category", "")),
110
+ )
111
+ console.print(table)
112
+
113
+
114
+ @app.command()
115
+ def status() -> None:
116
+ """Show database statistics."""
117
+ from reprompt.config import Settings
118
+ from reprompt.storage.db import PromptDB
119
+
120
+ settings = Settings()
121
+ db = PromptDB(settings.db_path)
122
+ stats = db.get_stats()
123
+
124
+ console.print("[bold]reprompt status[/bold]")
125
+ console.print(f" Total prompts: {stats.get('total_prompts', 0)}")
126
+ console.print(f" Unique prompts: {stats.get('unique_prompts', 0)}")
127
+ console.print(f" Sessions: {stats.get('sessions_processed', 0)}")
128
+ console.print(f" Patterns: {stats.get('patterns', 0)}")
129
+ console.print(f" DB path: {settings.db_path}")
130
+
131
+
132
+ @app.command()
133
+ def purge(
134
+ older_than: str = typer.Option("90d", help="Delete prompts older than (e.g. 90d)"),
135
+ ) -> None:
136
+ """Clean up old data."""
137
+ import re
138
+
139
+ from reprompt.config import Settings
140
+ from reprompt.storage.db import PromptDB
141
+
142
+ m = re.fullmatch(r"(\d+)d?", older_than.strip(), re.IGNORECASE)
143
+ if not m:
144
+ raise typer.BadParameter("Use format like '90d' or '30'")
145
+ days = int(m.group(1))
146
+ settings = Settings()
147
+ db = PromptDB(settings.db_path)
148
+ deleted = db.purge_old_prompts(days)
149
+ console.print(f"Purged {deleted} prompts older than {days} days")
150
+
151
+
152
+ @app.command("install-hook")
153
+ def install_hook(
154
+ source: str = typer.Option("claude-code", help="AI tool to install hook for"),
155
+ ) -> None:
156
+ """Install post-session hook for automatic scanning."""
157
+ home = Path.home()
158
+
159
+ if source == "claude-code":
160
+ hooks_dir = home / ".claude" / "hooks"
161
+ hook_path = hooks_dir / "reprompt-scan.sh"
162
+
163
+ if hook_path.exists():
164
+ console.print(f"Hook already exists at {hook_path}")
165
+ return
166
+
167
+ if not (home / ".claude").exists():
168
+ console.print("[yellow]Claude Code not detected (~/.claude/ not found)[/yellow]")
169
+ return
170
+
171
+ hooks_dir.mkdir(parents=True, exist_ok=True)
172
+ hook_path.write_text("#!/bin/sh\nreprompt scan --source claude-code\n")
173
+ hook_path.chmod(0o755)
174
+ console.print(f"[green]Hook installed at {hook_path}[/green]")
175
+ console.print("reprompt will automatically scan after Claude Code sessions.")
176
+ else:
177
+ console.print(f"[yellow]Hook installation for '{source}' not yet supported[/yellow]")
reprompt/config.py ADDED
@@ -0,0 +1,44 @@
1
+ """Configuration with env var override support."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from pathlib import Path
7
+
8
+ from pydantic_settings import BaseSettings
9
+
10
+
11
+ def _default_db_path() -> str:
12
+ if os.name == "nt":
13
+ base = Path(os.environ.get("LOCALAPPDATA", "~"))
14
+ elif hasattr(os, "uname") and os.uname().sysname == "Darwin":
15
+ base = Path("~/Library/Application Support")
16
+ else:
17
+ base = Path(os.environ.get("XDG_DATA_HOME", "~/.local/share"))
18
+ return str(base / "reprompt" / "reprompt.db")
19
+
20
+
21
+ class Settings(BaseSettings):
22
+ model_config = {"env_prefix": "REPROMPT_"}
23
+
24
+ # Embedding
25
+ embedding_backend: str = "tfidf"
26
+ ollama_url: str = "http://localhost:11434"
27
+
28
+ # Storage
29
+ db_path: Path = Path(os.path.expanduser(_default_db_path()))
30
+
31
+ # Dedup
32
+ dedup_threshold: float = 0.85
33
+
34
+ # Library
35
+ library_min_frequency: int = 3
36
+ library_categories: list[str] = [
37
+ "debug",
38
+ "implement",
39
+ "review",
40
+ "test",
41
+ "refactor",
42
+ "explain",
43
+ "config",
44
+ ]
@@ -0,0 +1 @@
1
+ """Core domain models and logic."""
@@ -0,0 +1,68 @@
1
+ """TF-IDF analysis and K-means clustering."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ import numpy as np
8
+ from sklearn.cluster import KMeans
9
+ from sklearn.feature_extraction.text import TfidfVectorizer
10
+
11
+
12
+ def compute_tfidf_stats(texts: list[str], top_n: int = 20) -> list[dict[str, Any]]:
13
+ """Compute TF-IDF stats, return top N terms with scores.
14
+
15
+ Returns list of dicts: [{"term": str, "count": int, "df": int, "tfidf_avg": float}]
16
+ """
17
+ if not texts:
18
+ return []
19
+
20
+ vectorizer = TfidfVectorizer(max_features=5000)
21
+ tfidf_matrix = vectorizer.fit_transform(texts)
22
+ feature_names = vectorizer.get_feature_names_out()
23
+
24
+ # Average TF-IDF score per term across all documents
25
+ avg_scores = np.asarray(tfidf_matrix.mean(axis=0)).flatten()
26
+
27
+ # Document frequency (number of docs containing each term)
28
+ df = np.asarray((tfidf_matrix > 0).sum(axis=0)).flatten()
29
+
30
+ # Sum of TF-IDF weights (approximate count)
31
+ count = np.asarray(tfidf_matrix.sum(axis=0)).flatten()
32
+
33
+ results = []
34
+ for i, term in enumerate(feature_names):
35
+ results.append(
36
+ {
37
+ "term": term,
38
+ "count": int(count[i] * len(texts)),
39
+ "df": int(df[i]),
40
+ "tfidf_avg": float(avg_scores[i]),
41
+ }
42
+ )
43
+
44
+ results.sort(key=lambda x: x["tfidf_avg"], reverse=True)
45
+ return results[:top_n]
46
+
47
+
48
+ def cluster_prompts(texts: list[str], n_clusters: int = 5) -> dict[int, list[str]]:
49
+ """Cluster prompts using K-means on TF-IDF vectors.
50
+
51
+ Returns {cluster_id: [texts]}
52
+ """
53
+ if not texts:
54
+ return {}
55
+
56
+ n_clusters = min(n_clusters, len(texts))
57
+
58
+ vectorizer = TfidfVectorizer(max_features=5000)
59
+ tfidf_matrix = vectorizer.fit_transform(texts)
60
+
61
+ kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
62
+ labels = kmeans.fit_predict(tfidf_matrix)
63
+
64
+ clusters: dict[int, list[str]] = {}
65
+ for text, label in zip(texts, labels):
66
+ clusters.setdefault(int(label), []).append(text)
67
+
68
+ return clusters
reprompt/core/dedup.py ADDED
@@ -0,0 +1,89 @@
1
+ """Two-layer deduplication engine.
2
+
3
+ L0: SHA-256 exact hash dedup (always runs)
4
+ L1: TF-IDF cosine similarity dedup (runs on hash-unique prompts)
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from reprompt.core.models import Prompt
10
+ from reprompt.embeddings.base import BaseEmbedder
11
+
12
+
13
+ def _get_embedder(backend: str) -> BaseEmbedder:
14
+ """Factory function to create an embedder by name."""
15
+ if backend == "tfidf":
16
+ from reprompt.embeddings.tfidf import TfidfEmbedder
17
+
18
+ return TfidfEmbedder()
19
+ elif backend == "ollama":
20
+ from reprompt.embeddings.ollama import OllamaEmbedder
21
+
22
+ return OllamaEmbedder()
23
+ else:
24
+ raise ValueError(f"Unknown embedding backend: {backend}")
25
+
26
+
27
+ class DedupEngine:
28
+ """Two-layer deduplication: exact hash then semantic similarity."""
29
+
30
+ def __init__(self, backend: str = "tfidf", threshold: float = 0.85) -> None:
31
+ self._backend = backend
32
+ self._threshold = threshold
33
+
34
+ def deduplicate(self, prompts: list[Prompt]) -> tuple[list[Prompt], list[Prompt]]:
35
+ """Deduplicate prompts using hash then semantic similarity.
36
+
37
+ Returns:
38
+ (unique_prompts, duplicate_prompts)
39
+ """
40
+ if not prompts:
41
+ return [], []
42
+
43
+ # L0: Exact hash dedup -- keep first occurrence per hash
44
+ seen_hashes: dict[str, int] = {}
45
+ hash_unique: list[Prompt] = []
46
+ hash_dupes: list[Prompt] = []
47
+
48
+ for prompt in prompts:
49
+ if prompt.hash in seen_hashes:
50
+ hash_dupes.append(prompt)
51
+ else:
52
+ seen_hashes[prompt.hash] = len(hash_unique)
53
+ hash_unique.append(prompt)
54
+
55
+ # L1: Semantic dedup on hash-unique prompts
56
+ if len(hash_unique) < 2:
57
+ return hash_unique, hash_dupes
58
+
59
+ embedder = _get_embedder(self._backend)
60
+ texts = [p.text for p in hash_unique]
61
+ embeddings = embedder.embed(texts)
62
+
63
+ if embeddings.size == 0:
64
+ return hash_unique, hash_dupes
65
+
66
+ # Mark semantic duplicates (later items are dupes of earlier ones)
67
+ is_dupe = [False] * len(hash_unique)
68
+
69
+ for i in range(len(hash_unique)):
70
+ if is_dupe[i]:
71
+ continue
72
+ for j in range(i + 1, len(hash_unique)):
73
+ if is_dupe[j]:
74
+ continue
75
+ sim = embedder.cosine_similarity(embeddings[i], embeddings[j])
76
+ if sim >= self._threshold:
77
+ is_dupe[j] = True
78
+
79
+ semantic_unique: list[Prompt] = []
80
+ semantic_dupes: list[Prompt] = []
81
+
82
+ for idx, prompt in enumerate(hash_unique):
83
+ if is_dupe[idx]:
84
+ semantic_dupes.append(prompt)
85
+ else:
86
+ semantic_unique.append(prompt)
87
+
88
+ all_dupes = hash_dupes + semantic_dupes
89
+ return semantic_unique, all_dupes