codetex 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codetex/__init__.py +3 -0
- codetex/claude_code.py +69 -0
- codetex/cli.py +149 -0
- codetex/git.py +97 -0
- codetex/ignore.py +83 -0
- codetex/indexer.py +344 -0
- codetex/llm.py +174 -0
- codetex/markdown.py +68 -0
- codetex/models.py +63 -0
- codetex/parser.py +645 -0
- codetex/provider.py +29 -0
- codetex/server.py +86 -0
- codetex/state.py +108 -0
- codetex-0.1.0.dist-info/METADATA +20 -0
- codetex-0.1.0.dist-info/RECORD +17 -0
- codetex-0.1.0.dist-info/WHEEL +4 -0
- codetex-0.1.0.dist-info/entry_points.txt +2 -0
codetex/__init__.py
ADDED
codetex/claude_code.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""Claude Code CLI provider — shells out to `claude -p`."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import json
|
|
7
|
+
|
|
8
|
+
from codetex.provider import LLMError, SummarizeResult
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ClaudeCodeProvider:
|
|
12
|
+
def __init__(self, max_concurrent: int = 2) -> None:
|
|
13
|
+
self._semaphore = asyncio.Semaphore(max_concurrent)
|
|
14
|
+
|
|
15
|
+
async def summarize(
|
|
16
|
+
self, prompt: str, system: str | None = None
|
|
17
|
+
) -> SummarizeResult:
|
|
18
|
+
async with self._semaphore:
|
|
19
|
+
cmd: list[str] = ["claude", "-p", "--output-format", "json"]
|
|
20
|
+
if system:
|
|
21
|
+
cmd.extend(["--system-prompt", system])
|
|
22
|
+
cmd.append(prompt)
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
proc = await asyncio.create_subprocess_exec(
|
|
26
|
+
*cmd,
|
|
27
|
+
stdout=asyncio.subprocess.PIPE,
|
|
28
|
+
stderr=asyncio.subprocess.PIPE,
|
|
29
|
+
)
|
|
30
|
+
stdout, stderr = await proc.communicate()
|
|
31
|
+
except FileNotFoundError:
|
|
32
|
+
raise LLMError(
|
|
33
|
+
"Claude Code CLI not found. "
|
|
34
|
+
"Install it from https://claude.ai/code "
|
|
35
|
+
"or use --provider anthropic."
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
if proc.returncode != 0:
|
|
39
|
+
raise LLMError(
|
|
40
|
+
f"Claude Code CLI failed (exit {proc.returncode}): "
|
|
41
|
+
f"{stderr.decode(errors='replace').strip()}"
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
try:
|
|
45
|
+
data = json.loads(stdout.decode())
|
|
46
|
+
except (json.JSONDecodeError, UnicodeDecodeError) as e:
|
|
47
|
+
raise LLMError(f"Failed to parse Claude Code response: {e}") from e
|
|
48
|
+
|
|
49
|
+
if data.get("is_error"):
|
|
50
|
+
msg = data.get("result", "unknown error")
|
|
51
|
+
raise LLMError(f"Claude Code error: {msg}")
|
|
52
|
+
|
|
53
|
+
usage = data.get("usage", {})
|
|
54
|
+
return SummarizeResult(
|
|
55
|
+
text=data.get("result", ""),
|
|
56
|
+
input_tokens=usage.get("input_tokens", 0),
|
|
57
|
+
output_tokens=usage.get("output_tokens", 0),
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
async def summarize_batch(
|
|
61
|
+
self,
|
|
62
|
+
prompts: list[str],
|
|
63
|
+
system: str | None = None,
|
|
64
|
+
) -> list[SummarizeResult | LLMError]:
|
|
65
|
+
tasks = [self.summarize(p, system) for p in prompts]
|
|
66
|
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
67
|
+
return [
|
|
68
|
+
r if isinstance(r, SummarizeResult) else LLMError(str(r)) for r in results
|
|
69
|
+
]
|
codetex/cli.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"""CLI interface using Typer."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Annotated
|
|
8
|
+
|
|
9
|
+
import typer
|
|
10
|
+
from rich.console import Console
|
|
11
|
+
from rich.progress import Progress, SpinnerColumn, TextColumn
|
|
12
|
+
|
|
13
|
+
from codetex.indexer import index
|
|
14
|
+
from codetex.provider import LLMProviderBase
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _build_provider(provider_name: str, max_concurrent: int | None) -> LLMProviderBase:
|
|
18
|
+
if provider_name == "claude-code":
|
|
19
|
+
from codetex.claude_code import ClaudeCodeProvider
|
|
20
|
+
return ClaudeCodeProvider(max_concurrent=max_concurrent or 2)
|
|
21
|
+
elif provider_name == "anthropic":
|
|
22
|
+
from codetex.llm import AnthropicProvider
|
|
23
|
+
return AnthropicProvider(max_concurrent=max_concurrent or 5)
|
|
24
|
+
else:
|
|
25
|
+
raise ValueError(
|
|
26
|
+
f"Unknown provider: {provider_name!r}."
|
|
27
|
+
" Choose 'claude-code' or 'anthropic'."
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
app = typer.Typer(
|
|
32
|
+
name="codetex",
|
|
33
|
+
help="LLM-friendly repo summarizer",
|
|
34
|
+
no_args_is_help=True,
|
|
35
|
+
)
|
|
36
|
+
console = Console()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
async def _run_index(
|
|
40
|
+
path: Path,
|
|
41
|
+
folder: str | None,
|
|
42
|
+
force: bool,
|
|
43
|
+
dry_run: bool,
|
|
44
|
+
provider: LLMProviderBase,
|
|
45
|
+
) -> None:
|
|
46
|
+
with Progress(
|
|
47
|
+
SpinnerColumn(),
|
|
48
|
+
TextColumn("[progress.description]{task.description}"),
|
|
49
|
+
console=console,
|
|
50
|
+
) as progress:
|
|
51
|
+
task = progress.add_task("Indexing...", total=None)
|
|
52
|
+
|
|
53
|
+
async def on_progress(step: str, current: int, total: int) -> None:
|
|
54
|
+
progress.update(task, description=step)
|
|
55
|
+
|
|
56
|
+
result = await index(
|
|
57
|
+
repo_path=path,
|
|
58
|
+
folder=folder,
|
|
59
|
+
force=force,
|
|
60
|
+
dry_run=dry_run,
|
|
61
|
+
on_progress=on_progress,
|
|
62
|
+
provider=provider,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
console.print()
|
|
66
|
+
if result.mode == "dry_run":
|
|
67
|
+
console.print(
|
|
68
|
+
f"[yellow]Dry run:[/yellow] would index {result.files_indexed} files"
|
|
69
|
+
)
|
|
70
|
+
if result.files_added:
|
|
71
|
+
console.print(f" Added: {result.files_added}")
|
|
72
|
+
if result.files_modified:
|
|
73
|
+
console.print(f" Modified: {result.files_modified}")
|
|
74
|
+
if result.files_deleted:
|
|
75
|
+
console.print(f" Deleted: {result.files_deleted}")
|
|
76
|
+
elif result.mode == "full":
|
|
77
|
+
console.print(
|
|
78
|
+
f"[green]Indexed {result.files_indexed} files[/green] "
|
|
79
|
+
f"({result.llm_calls} LLM calls, {result.tokens_used:,} tokens, "
|
|
80
|
+
f"{result.duration_seconds:.1f}s)"
|
|
81
|
+
)
|
|
82
|
+
console.print(" Output: .codetex/SUMMARY.md")
|
|
83
|
+
console.print(f" Commit: {result.commit_sha[:7]}")
|
|
84
|
+
else:
|
|
85
|
+
console.print(
|
|
86
|
+
f"[green]Synced[/green] "
|
|
87
|
+
f"+{result.files_added} ~{result.files_modified} -{result.files_deleted} "
|
|
88
|
+
f"({result.llm_calls} LLM calls, {result.tokens_used:,} tokens, "
|
|
89
|
+
f"{result.duration_seconds:.1f}s)"
|
|
90
|
+
)
|
|
91
|
+
if result.files_indexed == 0 and result.files_deleted == 0:
|
|
92
|
+
console.print(" No changes detected")
|
|
93
|
+
console.print(f" Commit: {result.commit_sha[:7]}")
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
@app.command(name="index")
|
|
97
|
+
def index_cmd(
|
|
98
|
+
path: Annotated[
|
|
99
|
+
Path,
|
|
100
|
+
typer.Argument(help="Path to the git repository"),
|
|
101
|
+
] = Path("."),
|
|
102
|
+
folder: Annotated[
|
|
103
|
+
str | None,
|
|
104
|
+
typer.Option("--folder", "-f", help="Only index files under this path"),
|
|
105
|
+
] = None,
|
|
106
|
+
force: Annotated[
|
|
107
|
+
bool,
|
|
108
|
+
typer.Option("--force", help="Force full re-index"),
|
|
109
|
+
] = False,
|
|
110
|
+
dry_run: Annotated[
|
|
111
|
+
bool,
|
|
112
|
+
typer.Option(
|
|
113
|
+
"--dry-run", help="Show what would be indexed without calling LLM"
|
|
114
|
+
),
|
|
115
|
+
] = False,
|
|
116
|
+
provider_name: Annotated[
|
|
117
|
+
str,
|
|
118
|
+
typer.Option("--provider", "-p", help="LLM provider: claude-code or anthropic"),
|
|
119
|
+
] = "claude-code",
|
|
120
|
+
max_concurrent: Annotated[
|
|
121
|
+
int | None,
|
|
122
|
+
typer.Option(
|
|
123
|
+
"--max-concurrent",
|
|
124
|
+
help="Max parallel LLM calls (default: 2 for claude-code, 5 for anthropic)",
|
|
125
|
+
),
|
|
126
|
+
] = None,
|
|
127
|
+
) -> None:
|
|
128
|
+
"""Index a repository and generate .codetex/SUMMARY.md."""
|
|
129
|
+
try:
|
|
130
|
+
provider = _build_provider(provider_name, max_concurrent)
|
|
131
|
+
asyncio.run(_run_index(path, folder, force, dry_run, provider))
|
|
132
|
+
except ValueError as e:
|
|
133
|
+
console.print(f"[red]Error:[/red] {e}")
|
|
134
|
+
raise typer.Exit(1)
|
|
135
|
+
except Exception as e:
|
|
136
|
+
console.print(f"[red]Error:[/red] {e}")
|
|
137
|
+
raise typer.Exit(1)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
@app.command()
|
|
141
|
+
def serve() -> None:
|
|
142
|
+
"""Start the MCP server (stdio transport)."""
|
|
143
|
+
from codetex.server import mcp_server
|
|
144
|
+
|
|
145
|
+
mcp_server.run(transport="stdio")
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def main() -> None:
|
|
149
|
+
app()
|
codetex/git.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""Git subprocess wrapper."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class GitError(Exception):
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class DiffResult:
|
|
16
|
+
added: list[str] = field(default_factory=list)
|
|
17
|
+
modified: list[str] = field(default_factory=list)
|
|
18
|
+
deleted: list[str] = field(default_factory=list)
|
|
19
|
+
renamed: list[tuple[str, str]] = field(default_factory=list)
|
|
20
|
+
|
|
21
|
+
@property
|
|
22
|
+
def total_changed(self) -> int:
|
|
23
|
+
return len(self.added) + len(self.modified) + len(self.deleted)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
async def _run(
|
|
27
|
+
*args: str, cwd: str | Path | None = None
|
|
28
|
+
) -> tuple[str, str]:
|
|
29
|
+
proc = await asyncio.create_subprocess_exec(
|
|
30
|
+
"git",
|
|
31
|
+
*args,
|
|
32
|
+
stdout=asyncio.subprocess.PIPE,
|
|
33
|
+
stderr=asyncio.subprocess.PIPE,
|
|
34
|
+
cwd=cwd,
|
|
35
|
+
)
|
|
36
|
+
stdout_bytes, stderr_bytes = await proc.communicate()
|
|
37
|
+
stdout = stdout_bytes.decode("utf-8", errors="replace").strip()
|
|
38
|
+
stderr = stderr_bytes.decode("utf-8", errors="replace").strip()
|
|
39
|
+
if proc.returncode != 0:
|
|
40
|
+
raise GitError(f"git {args[0]} failed: {stderr or stdout}")
|
|
41
|
+
return stdout, stderr
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
async def get_head_sha(repo_path: str | Path) -> str:
|
|
45
|
+
stdout, _ = await _run("rev-parse", "HEAD", cwd=repo_path)
|
|
46
|
+
return stdout
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
async def get_repo_name(repo_path: str | Path) -> str:
|
|
50
|
+
path = Path(repo_path).resolve()
|
|
51
|
+
return path.name
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
async def list_tracked_files(repo_path: str | Path) -> list[str]:
|
|
55
|
+
stdout, _ = await _run("ls-files", cwd=repo_path)
|
|
56
|
+
if not stdout:
|
|
57
|
+
return []
|
|
58
|
+
return stdout.splitlines()
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
async def diff_name_status(
|
|
62
|
+
repo_path: str | Path, old_sha: str, new_sha: str
|
|
63
|
+
) -> DiffResult:
|
|
64
|
+
stdout, _ = await _run(
|
|
65
|
+
"diff", "--name-status", f"{old_sha}..{new_sha}", cwd=repo_path
|
|
66
|
+
)
|
|
67
|
+
return _parse_diff_output(stdout)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
async def is_git_repo(path: str | Path) -> bool:
|
|
71
|
+
try:
|
|
72
|
+
await _run("rev-parse", "--git-dir", cwd=path)
|
|
73
|
+
return True
|
|
74
|
+
except (GitError, FileNotFoundError):
|
|
75
|
+
return False
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _parse_diff_output(output: str) -> DiffResult:
|
|
79
|
+
result = DiffResult()
|
|
80
|
+
if not output:
|
|
81
|
+
return result
|
|
82
|
+
for line in output.splitlines():
|
|
83
|
+
parts = line.split("\t")
|
|
84
|
+
if len(parts) < 2:
|
|
85
|
+
continue
|
|
86
|
+
status = parts[0]
|
|
87
|
+
if status == "A":
|
|
88
|
+
result.added.append(parts[1])
|
|
89
|
+
elif status == "M":
|
|
90
|
+
result.modified.append(parts[1])
|
|
91
|
+
elif status == "D":
|
|
92
|
+
result.deleted.append(parts[1])
|
|
93
|
+
elif status.startswith("R") and len(parts) >= 3:
|
|
94
|
+
result.renamed.append((parts[1], parts[2]))
|
|
95
|
+
result.added.append(parts[2])
|
|
96
|
+
result.deleted.append(parts[1])
|
|
97
|
+
return result
|
codetex/ignore.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""File ignore filter using .gitignore-style patterns."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import pathspec
|
|
8
|
+
|
|
9
|
+
DEFAULT_EXCLUDES = {
|
|
10
|
+
".git",
|
|
11
|
+
".hg",
|
|
12
|
+
".svn",
|
|
13
|
+
"__pycache__",
|
|
14
|
+
"node_modules",
|
|
15
|
+
".venv",
|
|
16
|
+
"venv",
|
|
17
|
+
".env",
|
|
18
|
+
".tox",
|
|
19
|
+
".mypy_cache",
|
|
20
|
+
".ruff_cache",
|
|
21
|
+
".pytest_cache",
|
|
22
|
+
"dist",
|
|
23
|
+
"build",
|
|
24
|
+
".eggs",
|
|
25
|
+
"*.egg-info",
|
|
26
|
+
".codetex",
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
# Max file size to index (2 MB)
|
|
30
|
+
MAX_FILE_SIZE = 2 * 1024 * 1024
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class IgnoreFilter:
|
|
34
|
+
def __init__(self, repo_path: str | Path) -> None:
|
|
35
|
+
self._repo_path = Path(repo_path)
|
|
36
|
+
self._spec = self._load_patterns()
|
|
37
|
+
|
|
38
|
+
def _load_patterns(self) -> pathspec.PathSpec:
|
|
39
|
+
patterns: list[str] = []
|
|
40
|
+
# Default excludes
|
|
41
|
+
for exc in sorted(DEFAULT_EXCLUDES):
|
|
42
|
+
patterns.append(exc)
|
|
43
|
+
# .gitignore
|
|
44
|
+
gitignore = self._repo_path / ".gitignore"
|
|
45
|
+
if gitignore.exists():
|
|
46
|
+
patterns.extend(
|
|
47
|
+
line
|
|
48
|
+
for line in gitignore.read_text(
|
|
49
|
+
encoding="utf-8", errors="replace"
|
|
50
|
+
).splitlines()
|
|
51
|
+
if line.strip() and not line.startswith("#")
|
|
52
|
+
)
|
|
53
|
+
# .codetexignore
|
|
54
|
+
codetexignore = self._repo_path / ".codetexignore"
|
|
55
|
+
if codetexignore.exists():
|
|
56
|
+
patterns.extend(
|
|
57
|
+
line
|
|
58
|
+
for line in codetexignore.read_text(
|
|
59
|
+
encoding="utf-8", errors="replace"
|
|
60
|
+
).splitlines()
|
|
61
|
+
if line.strip() and not line.startswith("#")
|
|
62
|
+
)
|
|
63
|
+
return pathspec.PathSpec.from_lines("gitignore", patterns)
|
|
64
|
+
|
|
65
|
+
def is_excluded(self, relative_path: str) -> bool:
|
|
66
|
+
if self._spec.match_file(relative_path):
|
|
67
|
+
return True
|
|
68
|
+
full_path = self._repo_path / relative_path
|
|
69
|
+
if full_path.is_file():
|
|
70
|
+
try:
|
|
71
|
+
if full_path.stat().st_size > MAX_FILE_SIZE:
|
|
72
|
+
return True
|
|
73
|
+
# Binary detection: check for null bytes in first 8KB
|
|
74
|
+
with open(full_path, "rb") as f:
|
|
75
|
+
chunk = f.read(8192)
|
|
76
|
+
if b"\x00" in chunk:
|
|
77
|
+
return True
|
|
78
|
+
except OSError:
|
|
79
|
+
return True
|
|
80
|
+
return False
|
|
81
|
+
|
|
82
|
+
def filter_files(self, files: list[str]) -> list[str]:
|
|
83
|
+
return [f for f in files if not self.is_excluded(f)]
|