codetex 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codetex/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """Codetex — LLM-friendly repo summarizer."""
2
+
3
+ __version__ = "0.1.0"
codetex/claude_code.py ADDED
@@ -0,0 +1,69 @@
1
+ """Claude Code CLI provider — shells out to `claude -p`."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import json
7
+
8
+ from codetex.provider import LLMError, SummarizeResult
9
+
10
+
11
+ class ClaudeCodeProvider:
12
+ def __init__(self, max_concurrent: int = 2) -> None:
13
+ self._semaphore = asyncio.Semaphore(max_concurrent)
14
+
15
+ async def summarize(
16
+ self, prompt: str, system: str | None = None
17
+ ) -> SummarizeResult:
18
+ async with self._semaphore:
19
+ cmd: list[str] = ["claude", "-p", "--output-format", "json"]
20
+ if system:
21
+ cmd.extend(["--system-prompt", system])
22
+ cmd.append(prompt)
23
+
24
+ try:
25
+ proc = await asyncio.create_subprocess_exec(
26
+ *cmd,
27
+ stdout=asyncio.subprocess.PIPE,
28
+ stderr=asyncio.subprocess.PIPE,
29
+ )
30
+ stdout, stderr = await proc.communicate()
31
+ except FileNotFoundError:
32
+ raise LLMError(
33
+ "Claude Code CLI not found. "
34
+ "Install it from https://claude.ai/code "
35
+ "or use --provider anthropic."
36
+ )
37
+
38
+ if proc.returncode != 0:
39
+ raise LLMError(
40
+ f"Claude Code CLI failed (exit {proc.returncode}): "
41
+ f"{stderr.decode(errors='replace').strip()}"
42
+ )
43
+
44
+ try:
45
+ data = json.loads(stdout.decode())
46
+ except (json.JSONDecodeError, UnicodeDecodeError) as e:
47
+ raise LLMError(f"Failed to parse Claude Code response: {e}") from e
48
+
49
+ if data.get("is_error"):
50
+ msg = data.get("result", "unknown error")
51
+ raise LLMError(f"Claude Code error: {msg}")
52
+
53
+ usage = data.get("usage", {})
54
+ return SummarizeResult(
55
+ text=data.get("result", ""),
56
+ input_tokens=usage.get("input_tokens", 0),
57
+ output_tokens=usage.get("output_tokens", 0),
58
+ )
59
+
60
+ async def summarize_batch(
61
+ self,
62
+ prompts: list[str],
63
+ system: str | None = None,
64
+ ) -> list[SummarizeResult | LLMError]:
65
+ tasks = [self.summarize(p, system) for p in prompts]
66
+ results = await asyncio.gather(*tasks, return_exceptions=True)
67
+ return [
68
+ r if isinstance(r, SummarizeResult) else LLMError(str(r)) for r in results
69
+ ]
codetex/cli.py ADDED
@@ -0,0 +1,149 @@
1
+ """CLI interface using Typer."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ from pathlib import Path
7
+ from typing import Annotated
8
+
9
+ import typer
10
+ from rich.console import Console
11
+ from rich.progress import Progress, SpinnerColumn, TextColumn
12
+
13
+ from codetex.indexer import index
14
+ from codetex.provider import LLMProviderBase
15
+
16
+
17
+ def _build_provider(provider_name: str, max_concurrent: int | None) -> LLMProviderBase:
18
+ if provider_name == "claude-code":
19
+ from codetex.claude_code import ClaudeCodeProvider
20
+ return ClaudeCodeProvider(max_concurrent=max_concurrent or 2)
21
+ elif provider_name == "anthropic":
22
+ from codetex.llm import AnthropicProvider
23
+ return AnthropicProvider(max_concurrent=max_concurrent or 5)
24
+ else:
25
+ raise ValueError(
26
+ f"Unknown provider: {provider_name!r}."
27
+ " Choose 'claude-code' or 'anthropic'."
28
+ )
29
+
30
+
31
+ app = typer.Typer(
32
+ name="codetex",
33
+ help="LLM-friendly repo summarizer",
34
+ no_args_is_help=True,
35
+ )
36
+ console = Console()
37
+
38
+
39
+ async def _run_index(
40
+ path: Path,
41
+ folder: str | None,
42
+ force: bool,
43
+ dry_run: bool,
44
+ provider: LLMProviderBase,
45
+ ) -> None:
46
+ with Progress(
47
+ SpinnerColumn(),
48
+ TextColumn("[progress.description]{task.description}"),
49
+ console=console,
50
+ ) as progress:
51
+ task = progress.add_task("Indexing...", total=None)
52
+
53
+ async def on_progress(step: str, current: int, total: int) -> None:
54
+ progress.update(task, description=step)
55
+
56
+ result = await index(
57
+ repo_path=path,
58
+ folder=folder,
59
+ force=force,
60
+ dry_run=dry_run,
61
+ on_progress=on_progress,
62
+ provider=provider,
63
+ )
64
+
65
+ console.print()
66
+ if result.mode == "dry_run":
67
+ console.print(
68
+ f"[yellow]Dry run:[/yellow] would index {result.files_indexed} files"
69
+ )
70
+ if result.files_added:
71
+ console.print(f" Added: {result.files_added}")
72
+ if result.files_modified:
73
+ console.print(f" Modified: {result.files_modified}")
74
+ if result.files_deleted:
75
+ console.print(f" Deleted: {result.files_deleted}")
76
+ elif result.mode == "full":
77
+ console.print(
78
+ f"[green]Indexed {result.files_indexed} files[/green] "
79
+ f"({result.llm_calls} LLM calls, {result.tokens_used:,} tokens, "
80
+ f"{result.duration_seconds:.1f}s)"
81
+ )
82
+ console.print(" Output: .codetex/SUMMARY.md")
83
+ console.print(f" Commit: {result.commit_sha[:7]}")
84
+ else:
85
+ console.print(
86
+ f"[green]Synced[/green] "
87
+ f"+{result.files_added} ~{result.files_modified} -{result.files_deleted} "
88
+ f"({result.llm_calls} LLM calls, {result.tokens_used:,} tokens, "
89
+ f"{result.duration_seconds:.1f}s)"
90
+ )
91
+ if result.files_indexed == 0 and result.files_deleted == 0:
92
+ console.print(" No changes detected")
93
+ console.print(f" Commit: {result.commit_sha[:7]}")
94
+
95
+
96
+ @app.command(name="index")
97
+ def index_cmd(
98
+ path: Annotated[
99
+ Path,
100
+ typer.Argument(help="Path to the git repository"),
101
+ ] = Path("."),
102
+ folder: Annotated[
103
+ str | None,
104
+ typer.Option("--folder", "-f", help="Only index files under this path"),
105
+ ] = None,
106
+ force: Annotated[
107
+ bool,
108
+ typer.Option("--force", help="Force full re-index"),
109
+ ] = False,
110
+ dry_run: Annotated[
111
+ bool,
112
+ typer.Option(
113
+ "--dry-run", help="Show what would be indexed without calling LLM"
114
+ ),
115
+ ] = False,
116
+ provider_name: Annotated[
117
+ str,
118
+ typer.Option("--provider", "-p", help="LLM provider: claude-code or anthropic"),
119
+ ] = "claude-code",
120
+ max_concurrent: Annotated[
121
+ int | None,
122
+ typer.Option(
123
+ "--max-concurrent",
124
+ help="Max parallel LLM calls (default: 2 for claude-code, 5 for anthropic)",
125
+ ),
126
+ ] = None,
127
+ ) -> None:
128
+ """Index a repository and generate .codetex/SUMMARY.md."""
129
+ try:
130
+ provider = _build_provider(provider_name, max_concurrent)
131
+ asyncio.run(_run_index(path, folder, force, dry_run, provider))
132
+ except ValueError as e:
133
+ console.print(f"[red]Error:[/red] {e}")
134
+ raise typer.Exit(1)
135
+ except Exception as e:
136
+ console.print(f"[red]Error:[/red] {e}")
137
+ raise typer.Exit(1)
138
+
139
+
140
+ @app.command()
141
+ def serve() -> None:
142
+ """Start the MCP server (stdio transport)."""
143
+ from codetex.server import mcp_server
144
+
145
+ mcp_server.run(transport="stdio")
146
+
147
+
148
+ def main() -> None:
149
+ app()
codetex/git.py ADDED
@@ -0,0 +1,97 @@
1
+ """Git subprocess wrapper."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ from dataclasses import dataclass, field
7
+ from pathlib import Path
8
+
9
+
10
+ class GitError(Exception):
11
+ pass
12
+
13
+
14
+ @dataclass
15
+ class DiffResult:
16
+ added: list[str] = field(default_factory=list)
17
+ modified: list[str] = field(default_factory=list)
18
+ deleted: list[str] = field(default_factory=list)
19
+ renamed: list[tuple[str, str]] = field(default_factory=list)
20
+
21
+ @property
22
+ def total_changed(self) -> int:
23
+ return len(self.added) + len(self.modified) + len(self.deleted)
24
+
25
+
26
+ async def _run(
27
+ *args: str, cwd: str | Path | None = None
28
+ ) -> tuple[str, str]:
29
+ proc = await asyncio.create_subprocess_exec(
30
+ "git",
31
+ *args,
32
+ stdout=asyncio.subprocess.PIPE,
33
+ stderr=asyncio.subprocess.PIPE,
34
+ cwd=cwd,
35
+ )
36
+ stdout_bytes, stderr_bytes = await proc.communicate()
37
+ stdout = stdout_bytes.decode("utf-8", errors="replace").strip()
38
+ stderr = stderr_bytes.decode("utf-8", errors="replace").strip()
39
+ if proc.returncode != 0:
40
+ raise GitError(f"git {args[0]} failed: {stderr or stdout}")
41
+ return stdout, stderr
42
+
43
+
44
+ async def get_head_sha(repo_path: str | Path) -> str:
45
+ stdout, _ = await _run("rev-parse", "HEAD", cwd=repo_path)
46
+ return stdout
47
+
48
+
49
+ async def get_repo_name(repo_path: str | Path) -> str:
50
+ path = Path(repo_path).resolve()
51
+ return path.name
52
+
53
+
54
+ async def list_tracked_files(repo_path: str | Path) -> list[str]:
55
+ stdout, _ = await _run("ls-files", cwd=repo_path)
56
+ if not stdout:
57
+ return []
58
+ return stdout.splitlines()
59
+
60
+
61
+ async def diff_name_status(
62
+ repo_path: str | Path, old_sha: str, new_sha: str
63
+ ) -> DiffResult:
64
+ stdout, _ = await _run(
65
+ "diff", "--name-status", f"{old_sha}..{new_sha}", cwd=repo_path
66
+ )
67
+ return _parse_diff_output(stdout)
68
+
69
+
70
+ async def is_git_repo(path: str | Path) -> bool:
71
+ try:
72
+ await _run("rev-parse", "--git-dir", cwd=path)
73
+ return True
74
+ except (GitError, FileNotFoundError):
75
+ return False
76
+
77
+
78
+ def _parse_diff_output(output: str) -> DiffResult:
79
+ result = DiffResult()
80
+ if not output:
81
+ return result
82
+ for line in output.splitlines():
83
+ parts = line.split("\t")
84
+ if len(parts) < 2:
85
+ continue
86
+ status = parts[0]
87
+ if status == "A":
88
+ result.added.append(parts[1])
89
+ elif status == "M":
90
+ result.modified.append(parts[1])
91
+ elif status == "D":
92
+ result.deleted.append(parts[1])
93
+ elif status.startswith("R") and len(parts) >= 3:
94
+ result.renamed.append((parts[1], parts[2]))
95
+ result.added.append(parts[2])
96
+ result.deleted.append(parts[1])
97
+ return result
codetex/ignore.py ADDED
@@ -0,0 +1,83 @@
1
+ """File ignore filter using .gitignore-style patterns."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ import pathspec
8
+
9
+ DEFAULT_EXCLUDES = {
10
+ ".git",
11
+ ".hg",
12
+ ".svn",
13
+ "__pycache__",
14
+ "node_modules",
15
+ ".venv",
16
+ "venv",
17
+ ".env",
18
+ ".tox",
19
+ ".mypy_cache",
20
+ ".ruff_cache",
21
+ ".pytest_cache",
22
+ "dist",
23
+ "build",
24
+ ".eggs",
25
+ "*.egg-info",
26
+ ".codetex",
27
+ }
28
+
29
+ # Max file size to index (2 MB)
30
+ MAX_FILE_SIZE = 2 * 1024 * 1024
31
+
32
+
33
+ class IgnoreFilter:
34
+ def __init__(self, repo_path: str | Path) -> None:
35
+ self._repo_path = Path(repo_path)
36
+ self._spec = self._load_patterns()
37
+
38
+ def _load_patterns(self) -> pathspec.PathSpec:
39
+ patterns: list[str] = []
40
+ # Default excludes
41
+ for exc in sorted(DEFAULT_EXCLUDES):
42
+ patterns.append(exc)
43
+ # .gitignore
44
+ gitignore = self._repo_path / ".gitignore"
45
+ if gitignore.exists():
46
+ patterns.extend(
47
+ line
48
+ for line in gitignore.read_text(
49
+ encoding="utf-8", errors="replace"
50
+ ).splitlines()
51
+ if line.strip() and not line.startswith("#")
52
+ )
53
+ # .codetexignore
54
+ codetexignore = self._repo_path / ".codetexignore"
55
+ if codetexignore.exists():
56
+ patterns.extend(
57
+ line
58
+ for line in codetexignore.read_text(
59
+ encoding="utf-8", errors="replace"
60
+ ).splitlines()
61
+ if line.strip() and not line.startswith("#")
62
+ )
63
+ return pathspec.PathSpec.from_lines("gitignore", patterns)
64
+
65
+ def is_excluded(self, relative_path: str) -> bool:
66
+ if self._spec.match_file(relative_path):
67
+ return True
68
+ full_path = self._repo_path / relative_path
69
+ if full_path.is_file():
70
+ try:
71
+ if full_path.stat().st_size > MAX_FILE_SIZE:
72
+ return True
73
+ # Binary detection: check for null bytes in first 8KB
74
+ with open(full_path, "rb") as f:
75
+ chunk = f.read(8192)
76
+ if b"\x00" in chunk:
77
+ return True
78
+ except OSError:
79
+ return True
80
+ return False
81
+
82
+ def filter_files(self, files: list[str]) -> list[str]:
83
+ return [f for f in files if not self.is_excluded(f)]