breadcrumb-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- breadcrumb/__init__.py +7 -0
- breadcrumb/ai/__init__.py +1 -0
- breadcrumb/ai/prompts.py +60 -0
- breadcrumb/ai/router.py +187 -0
- breadcrumb/cli.py +144 -0
- breadcrumb/commands/__init__.py +1 -0
- breadcrumb/commands/ask.py +98 -0
- breadcrumb/commands/audit.py +77 -0
- breadcrumb/commands/chat.py +123 -0
- breadcrumb/commands/commit.py +87 -0
- breadcrumb/commands/diff.py +90 -0
- breadcrumb/commands/digest.py +80 -0
- breadcrumb/commands/explain_error.py +63 -0
- breadcrumb/commands/init.py +67 -0
- breadcrumb/commands/share.py +209 -0
- breadcrumb/config.py +84 -0
- breadcrumb/history.py +110 -0
- breadcrumb/ingest.py +163 -0
- breadcrumb_cli-0.1.0.dist-info/METADATA +342 -0
- breadcrumb_cli-0.1.0.dist-info/RECORD +23 -0
- breadcrumb_cli-0.1.0.dist-info/WHEEL +4 -0
- breadcrumb_cli-0.1.0.dist-info/entry_points.txt +2 -0
- breadcrumb_cli-0.1.0.dist-info/licenses/LICENSE +23 -0
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Export chat sessions as shareable HTML.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
# ruff: noqa: W293
|
|
6
|
+
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Dict, List, Optional
|
|
9
|
+
|
|
10
|
+
from rich.console import Console
|
|
11
|
+
|
|
12
|
+
console = Console()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def generate_html(title: str, messages: List[Dict]) -> str:
|
|
16
|
+
"""Generate beautiful HTML from chat messages."""
|
|
17
|
+
html = f"""<!DOCTYPE html>
|
|
18
|
+
<html lang="en">
|
|
19
|
+
<head>
|
|
20
|
+
<meta charset="UTF-8">
|
|
21
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
22
|
+
<title>{title}</title>
|
|
23
|
+
<style>
|
|
24
|
+
* {{
|
|
25
|
+
margin: 0;
|
|
26
|
+
padding: 0;
|
|
27
|
+
box-sizing: border-box;
|
|
28
|
+
}}
|
|
29
|
+
|
|
30
|
+
body {{
|
|
31
|
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', sans-serif;
|
|
32
|
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
|
33
|
+
padding: 20px;
|
|
34
|
+
min-height: 100vh;
|
|
35
|
+
}}
|
|
36
|
+
|
|
37
|
+
.container {{
|
|
38
|
+
max-width: 800px;
|
|
39
|
+
margin: 0 auto;
|
|
40
|
+
background: white;
|
|
41
|
+
border-radius: 12px;
|
|
42
|
+
box-shadow: 0 20px 60px rgba(0, 0, 0, 0.3);
|
|
43
|
+
overflow: hidden;
|
|
44
|
+
}}
|
|
45
|
+
|
|
46
|
+
.header {{
|
|
47
|
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
|
48
|
+
color: white;
|
|
49
|
+
padding: 40px 20px;
|
|
50
|
+
text-align: center;
|
|
51
|
+
}}
|
|
52
|
+
|
|
53
|
+
.header h1 {{
|
|
54
|
+
font-size: 32px;
|
|
55
|
+
margin-bottom: 10px;
|
|
56
|
+
}}
|
|
57
|
+
|
|
58
|
+
.header p {{
|
|
59
|
+
opacity: 0.9;
|
|
60
|
+
font-size: 14px;
|
|
61
|
+
}}
|
|
62
|
+
|
|
63
|
+
.messages {{
|
|
64
|
+
padding: 30px;
|
|
65
|
+
max-height: 70vh;
|
|
66
|
+
overflow-y: auto;
|
|
67
|
+
}}
|
|
68
|
+
|
|
69
|
+
.message {{
|
|
70
|
+
margin-bottom: 20px;
|
|
71
|
+
animation: fadeIn 0.3s ease-in;
|
|
72
|
+
}}
|
|
73
|
+
|
|
74
|
+
@keyframes fadeIn {{
|
|
75
|
+
from {{ opacity: 0; transform: translateY(10px); }}
|
|
76
|
+
to {{ opacity: 1; transform: translateY(0); }}
|
|
77
|
+
}}
|
|
78
|
+
|
|
79
|
+
.message.user {{
|
|
80
|
+
text-align: right;
|
|
81
|
+
}}
|
|
82
|
+
|
|
83
|
+
.message.user .content {{
|
|
84
|
+
background: #667eea;
|
|
85
|
+
color: white;
|
|
86
|
+
margin-left: 20%;
|
|
87
|
+
}}
|
|
88
|
+
|
|
89
|
+
.message.assistant .content {{
|
|
90
|
+
background: #f0f0f0;
|
|
91
|
+
color: #333;
|
|
92
|
+
margin-right: 20%;
|
|
93
|
+
}}
|
|
94
|
+
|
|
95
|
+
.role {{
|
|
96
|
+
font-size: 12px;
|
|
97
|
+
font-weight: 600;
|
|
98
|
+
margin-bottom: 5px;
|
|
99
|
+
opacity: 0.7;
|
|
100
|
+
text-transform: uppercase;
|
|
101
|
+
}}
|
|
102
|
+
|
|
103
|
+
.content {{
|
|
104
|
+
padding: 12px 16px;
|
|
105
|
+
border-radius: 8px;
|
|
106
|
+
line-height: 1.5;
|
|
107
|
+
word-wrap: break-word;
|
|
108
|
+
}}
|
|
109
|
+
|
|
110
|
+
.content code {{
|
|
111
|
+
background: rgba(0, 0, 0, 0.1);
|
|
112
|
+
padding: 2px 6px;
|
|
113
|
+
border-radius: 3px;
|
|
114
|
+
font-family: 'Monaco', 'Courier New', monospace;
|
|
115
|
+
font-size: 13px;
|
|
116
|
+
}}
|
|
117
|
+
|
|
118
|
+
.content pre {{
|
|
119
|
+
background: rgba(0, 0, 0, 0.05);
|
|
120
|
+
padding: 10px;
|
|
121
|
+
border-radius: 5px;
|
|
122
|
+
overflow-x: auto;
|
|
123
|
+
margin: 10px 0;
|
|
124
|
+
}}
|
|
125
|
+
|
|
126
|
+
.content pre code {{
|
|
127
|
+
background: none;
|
|
128
|
+
padding: 0;
|
|
129
|
+
}}
|
|
130
|
+
|
|
131
|
+
.footer {{
|
|
132
|
+
padding: 20px;
|
|
133
|
+
background: #f9f9f9;
|
|
134
|
+
text-align: center;
|
|
135
|
+
font-size: 12px;
|
|
136
|
+
color: #999;
|
|
137
|
+
}}
|
|
138
|
+
</style>
|
|
139
|
+
</head>
|
|
140
|
+
<body>
|
|
141
|
+
<div class="container">
|
|
142
|
+
<div class="header">
|
|
143
|
+
<h1>🍞 Bread Crumb Chat</h1>
|
|
144
|
+
<p>{title}</p>
|
|
145
|
+
</div>
|
|
146
|
+
|
|
147
|
+
<div class="messages">
|
|
148
|
+
"""
|
|
149
|
+
|
|
150
|
+
for msg in messages:
|
|
151
|
+
role = msg.get("role", "unknown")
|
|
152
|
+
content = msg.get("content", "").replace("<", "<").replace(">", ">")
|
|
153
|
+
|
|
154
|
+
html += f""" <div class="message {role}">
|
|
155
|
+
<div class="role">{role}</div>
|
|
156
|
+
<div class="content">{content}</div>
|
|
157
|
+
</div>
|
|
158
|
+
"""
|
|
159
|
+
|
|
160
|
+
html += f""" </div>
|
|
161
|
+
|
|
162
|
+
<div class="footer">
|
|
163
|
+
<p>Powered by Bread Crumb</p>
|
|
164
|
+
<p>Generated on {Path.cwd()}</p>
|
|
165
|
+
</div>
|
|
166
|
+
</div>
|
|
167
|
+
</body>
|
|
168
|
+
</html>"""
|
|
169
|
+
|
|
170
|
+
return html
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def cmd_share(session_file: Path, output_file: Optional[Path] = None) -> Optional[Path]:
|
|
174
|
+
"""
|
|
175
|
+
Export a chat session as shareable HTML.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
session_file: Path to session JSON file
|
|
179
|
+
output_file: Where to save the HTML (default: session.html)
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
Path to created HTML file
|
|
183
|
+
"""
|
|
184
|
+
import json
|
|
185
|
+
|
|
186
|
+
if not session_file.exists():
|
|
187
|
+
console.print(f"[red]Session file not found: {session_file}[/red]")
|
|
188
|
+
return None
|
|
189
|
+
|
|
190
|
+
try:
|
|
191
|
+
data = json.loads(session_file.read_text())
|
|
192
|
+
messages = data.get("messages", [])
|
|
193
|
+
except Exception as e:
|
|
194
|
+
console.print(f"[red]Error reading session: {e}[/red]")
|
|
195
|
+
return None
|
|
196
|
+
|
|
197
|
+
title = f"Bread Crumb Chat - {data.get('session', 'default')}"
|
|
198
|
+
html = generate_html(title, messages)
|
|
199
|
+
|
|
200
|
+
if not output_file:
|
|
201
|
+
output_file = session_file.parent / f"{session_file.stem}.html"
|
|
202
|
+
|
|
203
|
+
try:
|
|
204
|
+
output_file.write_text(html)
|
|
205
|
+
console.print(f"[green]✓ Exported to {output_file}[/green]")
|
|
206
|
+
return output_file
|
|
207
|
+
except Exception as e:
|
|
208
|
+
console.print(f"[red]Error writing HTML: {e}[/red]")
|
|
209
|
+
return None
|
breadcrumb/config.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Configuration management for Bread Crumb.
|
|
3
|
+
Handles API keys, provider preferences, and global settings.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, Dict, Optional
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Config:
|
|
12
|
+
"""Manages user configuration in ~/.breadcrumb/config.json"""
|
|
13
|
+
|
|
14
|
+
def __init__(self):
|
|
15
|
+
self.app_dir = Path.home() / ".breadcrumb"
|
|
16
|
+
self.config_file = self.app_dir / "config.json"
|
|
17
|
+
self.app_dir.mkdir(exist_ok=True)
|
|
18
|
+
self._config = self._load()
|
|
19
|
+
|
|
20
|
+
def _load(self) -> Dict[str, Any]:
|
|
21
|
+
"""Load config from file or return defaults."""
|
|
22
|
+
if self.config_file.exists():
|
|
23
|
+
try:
|
|
24
|
+
return json.loads(self.config_file.read_text())
|
|
25
|
+
except Exception:
|
|
26
|
+
return self._defaults()
|
|
27
|
+
return self._defaults()
|
|
28
|
+
|
|
29
|
+
def _defaults(self) -> Dict[str, Any]:
|
|
30
|
+
"""Default configuration."""
|
|
31
|
+
return {
|
|
32
|
+
"provider": "anthropic",
|
|
33
|
+
"anthropic_key": "",
|
|
34
|
+
"openai_key": "",
|
|
35
|
+
"gemini_key": "",
|
|
36
|
+
"ollama_url": "http://localhost:11434",
|
|
37
|
+
"model_anthropic": "claude-3-5-sonnet-20241022",
|
|
38
|
+
"model_openai": "gpt-4o",
|
|
39
|
+
"model_gemini": "gemini-2.0-flash",
|
|
40
|
+
"model_ollama": "llama2",
|
|
41
|
+
"max_tokens": 200000,
|
|
42
|
+
"temperature": 0.7,
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
def set(self, key: str, value: Any) -> None:
|
|
46
|
+
"""Set a configuration value."""
|
|
47
|
+
self._config[key] = value
|
|
48
|
+
self.save()
|
|
49
|
+
|
|
50
|
+
def get(self, key: str, default: Any = None) -> Any:
|
|
51
|
+
"""Get a configuration value."""
|
|
52
|
+
return self._config.get(key, default)
|
|
53
|
+
|
|
54
|
+
def save(self) -> None:
|
|
55
|
+
"""Save config to file."""
|
|
56
|
+
self.config_file.write_text(json.dumps(self._config, indent=2))
|
|
57
|
+
|
|
58
|
+
def get_api_key(self, provider: str) -> str:
|
|
59
|
+
"""Get API key for provider."""
|
|
60
|
+
key_map = {
|
|
61
|
+
"anthropic": "anthropic_key",
|
|
62
|
+
"openai": "openai_key",
|
|
63
|
+
"gemini": "gemini_key",
|
|
64
|
+
}
|
|
65
|
+
return self.get(key_map.get(provider, ""), "")
|
|
66
|
+
|
|
67
|
+
def set_api_key(self, provider: str, key: str) -> None:
|
|
68
|
+
"""Set API key for provider."""
|
|
69
|
+
key_map = {
|
|
70
|
+
"anthropic": "anthropic_key",
|
|
71
|
+
"openai": "openai_key",
|
|
72
|
+
"gemini": "gemini_key",
|
|
73
|
+
}
|
|
74
|
+
self.set(key_map.get(provider, ""), key)
|
|
75
|
+
|
|
76
|
+
def get_model(self, provider: Optional[str] = None) -> str:
|
|
77
|
+
"""Get model name for provider."""
|
|
78
|
+
provider = provider or self.get("provider", "anthropic")
|
|
79
|
+
model_key = f"model_{provider}"
|
|
80
|
+
return self.get(model_key, "")
|
|
81
|
+
|
|
82
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
83
|
+
"""Return config as dictionary."""
|
|
84
|
+
return self._config.copy()
|
breadcrumb/history.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Session management and conversation history persistence.
|
|
3
|
+
Supports named sessions per repository.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Dict, List
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class SessionManager:
|
|
13
|
+
"""Manages named chat sessions for repositories."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, repo_path: Path, session_name: str = "default"):
|
|
16
|
+
self.repo_path = Path(repo_path)
|
|
17
|
+
self.session_name = session_name
|
|
18
|
+
self.history_dir = Path.home() / ".breadcrumb" / "sessions"
|
|
19
|
+
self.history_dir.mkdir(parents=True, exist_ok=True)
|
|
20
|
+
|
|
21
|
+
# Use repo hash to differentiate repos with same name
|
|
22
|
+
import hashlib
|
|
23
|
+
|
|
24
|
+
repo_hash = hashlib.md5(str(self.repo_path.absolute()).encode()).hexdigest()[:8]
|
|
25
|
+
self.session_file = self.history_dir / f"{repo_hash}_{session_name}.json"
|
|
26
|
+
|
|
27
|
+
self.messages: List[Dict[str, str]] = self._load()
|
|
28
|
+
|
|
29
|
+
def _load(self) -> List[Dict[str, str]]:
|
|
30
|
+
"""Load session from disk."""
|
|
31
|
+
if self.session_file.exists():
|
|
32
|
+
try:
|
|
33
|
+
data = json.loads(self.session_file.read_text())
|
|
34
|
+
return data.get("messages", [])
|
|
35
|
+
except Exception:
|
|
36
|
+
return []
|
|
37
|
+
return []
|
|
38
|
+
|
|
39
|
+
def save(self) -> None:
|
|
40
|
+
"""Save session to disk."""
|
|
41
|
+
data = {
|
|
42
|
+
"repo": str(self.repo_path),
|
|
43
|
+
"session": self.session_name,
|
|
44
|
+
"created": self.session_file.stat().st_ctime
|
|
45
|
+
if self.session_file.exists()
|
|
46
|
+
else datetime.now().timestamp(),
|
|
47
|
+
"updated": datetime.now().timestamp(),
|
|
48
|
+
"messages": self.messages,
|
|
49
|
+
}
|
|
50
|
+
self.session_file.write_text(json.dumps(data, indent=2))
|
|
51
|
+
|
|
52
|
+
def add_message(self, role: str, content: str) -> None:
|
|
53
|
+
"""Add a message to the session."""
|
|
54
|
+
self.messages.append(
|
|
55
|
+
{
|
|
56
|
+
"role": role,
|
|
57
|
+
"content": content,
|
|
58
|
+
"timestamp": datetime.now().isoformat(),
|
|
59
|
+
}
|
|
60
|
+
)
|
|
61
|
+
self.save()
|
|
62
|
+
|
|
63
|
+
def get_messages(self) -> List[Dict[str, str]]:
|
|
64
|
+
"""Get all messages in the session."""
|
|
65
|
+
return self.messages
|
|
66
|
+
|
|
67
|
+
def get_messages_for_api(self) -> List[Dict[str, str]]:
|
|
68
|
+
"""Get messages formatted for API calls (without timestamps)."""
|
|
69
|
+
return [{"role": m["role"], "content": m["content"]} for m in self.messages]
|
|
70
|
+
|
|
71
|
+
def clear(self) -> None:
|
|
72
|
+
"""Clear session history."""
|
|
73
|
+
self.messages = []
|
|
74
|
+
self.save()
|
|
75
|
+
|
|
76
|
+
def count_tokens(self) -> int:
|
|
77
|
+
"""Rough token count for current session."""
|
|
78
|
+
content = "".join(m["content"] for m in self.messages)
|
|
79
|
+
return len(content) // 4 # Rough estimate
|
|
80
|
+
|
|
81
|
+
@staticmethod
|
|
82
|
+
def list_sessions(repo_path: Path) -> List[str]:
|
|
83
|
+
"""List all available sessions for a repository."""
|
|
84
|
+
import hashlib
|
|
85
|
+
|
|
86
|
+
history_dir = Path.home() / ".breadcrumb" / "sessions"
|
|
87
|
+
if not history_dir.exists():
|
|
88
|
+
return []
|
|
89
|
+
|
|
90
|
+
repo_hash = hashlib.md5(str(repo_path.absolute()).encode()).hexdigest()[:8]
|
|
91
|
+
pattern = f"{repo_hash}_*.json"
|
|
92
|
+
|
|
93
|
+
sessions = []
|
|
94
|
+
for file in history_dir.glob(pattern):
|
|
95
|
+
session_name = file.stem.replace(f"{repo_hash}_", "")
|
|
96
|
+
sessions.append(session_name)
|
|
97
|
+
|
|
98
|
+
return sorted(sessions)
|
|
99
|
+
|
|
100
|
+
@staticmethod
|
|
101
|
+
def delete_session(repo_path: Path, session_name: str) -> None:
|
|
102
|
+
"""Delete a named session."""
|
|
103
|
+
import hashlib
|
|
104
|
+
|
|
105
|
+
history_dir = Path.home() / ".breadcrumb" / "sessions"
|
|
106
|
+
repo_hash = hashlib.md5(str(repo_path.absolute()).encode()).hexdigest()[:8]
|
|
107
|
+
session_file = history_dir / f"{repo_hash}_{session_name}.json"
|
|
108
|
+
|
|
109
|
+
if session_file.exists():
|
|
110
|
+
session_file.unlink()
|
breadcrumb/ingest.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
"""
|
|
2
|
+
File ingestion and .breadcrumbignore support.
|
|
3
|
+
Walks repository, respects ignore patterns, and filters code files.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
|
|
9
|
+
import pathspec
|
|
10
|
+
|
|
11
|
+
SKIP_DIRS = {
|
|
12
|
+
".git",
|
|
13
|
+
"node_modules",
|
|
14
|
+
"__pycache__",
|
|
15
|
+
".venv",
|
|
16
|
+
"venv",
|
|
17
|
+
"env",
|
|
18
|
+
"dist",
|
|
19
|
+
"build",
|
|
20
|
+
".next",
|
|
21
|
+
".nuxt",
|
|
22
|
+
"coverage",
|
|
23
|
+
"vendor",
|
|
24
|
+
"target",
|
|
25
|
+
".cargo",
|
|
26
|
+
".mypy_cache",
|
|
27
|
+
".pytest_cache",
|
|
28
|
+
".ruff_cache",
|
|
29
|
+
".turbo",
|
|
30
|
+
"bundle.js",
|
|
31
|
+
".DS_Store",
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
CODE_EXTS = {
|
|
35
|
+
".py",
|
|
36
|
+
".ts",
|
|
37
|
+
".tsx",
|
|
38
|
+
".js",
|
|
39
|
+
".jsx",
|
|
40
|
+
".go",
|
|
41
|
+
".rs",
|
|
42
|
+
".rb",
|
|
43
|
+
".java",
|
|
44
|
+
".cpp",
|
|
45
|
+
".c",
|
|
46
|
+
".h",
|
|
47
|
+
".cs",
|
|
48
|
+
".php",
|
|
49
|
+
".swift",
|
|
50
|
+
".kt",
|
|
51
|
+
".sql",
|
|
52
|
+
".graphql",
|
|
53
|
+
".yaml",
|
|
54
|
+
".yml",
|
|
55
|
+
".toml",
|
|
56
|
+
".json",
|
|
57
|
+
".md",
|
|
58
|
+
".html",
|
|
59
|
+
".css",
|
|
60
|
+
".scss",
|
|
61
|
+
".vue",
|
|
62
|
+
".svelte",
|
|
63
|
+
".sh",
|
|
64
|
+
".bash",
|
|
65
|
+
".tf",
|
|
66
|
+
".dockerfile",
|
|
67
|
+
".makefile",
|
|
68
|
+
".env.example",
|
|
69
|
+
".lock",
|
|
70
|
+
".xml",
|
|
71
|
+
".gradle",
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class FileIngester:
|
|
76
|
+
"""Walks repository respecting .breadcrumbignore rules."""
|
|
77
|
+
|
|
78
|
+
def __init__(self, repo_path: Path):
|
|
79
|
+
self.repo_path = Path(repo_path)
|
|
80
|
+
self.spec = self._load_ignore_patterns()
|
|
81
|
+
|
|
82
|
+
def _load_ignore_patterns(self) -> Optional[pathspec.PathSpec]:
|
|
83
|
+
"""Load .breadcrumbignore patterns if it exists."""
|
|
84
|
+
ignore_file = self.repo_path / ".breadcrumbignore"
|
|
85
|
+
if ignore_file.exists():
|
|
86
|
+
patterns = ignore_file.read_text().strip().split("\n")
|
|
87
|
+
patterns = [p.strip() for p in patterns if p.strip() and not p.startswith("#")]
|
|
88
|
+
if patterns:
|
|
89
|
+
return pathspec.PathSpec.from_lines("gitwildmatch", patterns)
|
|
90
|
+
return None
|
|
91
|
+
|
|
92
|
+
def _should_skip(self, path: Path) -> bool:
|
|
93
|
+
"""Check if path should be skipped."""
|
|
94
|
+
# Skip by directory name (check all parents)
|
|
95
|
+
for part in path.parts:
|
|
96
|
+
if part in SKIP_DIRS:
|
|
97
|
+
return True
|
|
98
|
+
|
|
99
|
+
# Skip by .breadcrumbignore
|
|
100
|
+
if self.spec:
|
|
101
|
+
rel_path = path.relative_to(self.repo_path)
|
|
102
|
+
if self.spec.match_file(str(rel_path)):
|
|
103
|
+
return True
|
|
104
|
+
|
|
105
|
+
return False
|
|
106
|
+
|
|
107
|
+
def _is_code_file(self, path: Path) -> bool:
|
|
108
|
+
"""Check if file is a code file we care about."""
|
|
109
|
+
return path.suffix.lower() in CODE_EXTS or path.name.lower().endswith(
|
|
110
|
+
("dockerfile", "makefile")
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
def get_files(self) -> List[Path]:
|
|
114
|
+
"""Get all code files respecting ignore patterns."""
|
|
115
|
+
files = []
|
|
116
|
+
|
|
117
|
+
try:
|
|
118
|
+
for path in self.repo_path.rglob("*"):
|
|
119
|
+
if path.is_dir():
|
|
120
|
+
if self._should_skip(path):
|
|
121
|
+
# Skip entire directory
|
|
122
|
+
continue
|
|
123
|
+
elif path.is_file():
|
|
124
|
+
if not self._should_skip(path) and self._is_code_file(path):
|
|
125
|
+
files.append(path)
|
|
126
|
+
except PermissionError:
|
|
127
|
+
pass
|
|
128
|
+
|
|
129
|
+
return sorted(files)
|
|
130
|
+
|
|
131
|
+
def get_content(self, max_file_size: int = 50000, max_total: int = 180000) -> str:
|
|
132
|
+
"""
|
|
133
|
+
Get concatenated file contents with size limits.
|
|
134
|
+
Large files get truncated; if total exceeds max_total, files are skipped.
|
|
135
|
+
"""
|
|
136
|
+
files = self.get_files()
|
|
137
|
+
content_parts: list[str] = []
|
|
138
|
+
total_size = 0
|
|
139
|
+
|
|
140
|
+
for file_path in files:
|
|
141
|
+
try:
|
|
142
|
+
file_content = file_path.read_text(errors="ignore")
|
|
143
|
+
|
|
144
|
+
# Truncate large files
|
|
145
|
+
if len(file_content) > max_file_size:
|
|
146
|
+
file_content = file_content[:max_file_size] + "\n... [truncated]"
|
|
147
|
+
|
|
148
|
+
# Check if adding this file exceeds total
|
|
149
|
+
if total_size + len(file_content) > max_total:
|
|
150
|
+
# Indicate truncation happened
|
|
151
|
+
content_parts.append(
|
|
152
|
+
"\n... [context limit reached, "
|
|
153
|
+
f"{len(files) - len(content_parts)} files skipped]"
|
|
154
|
+
)
|
|
155
|
+
break
|
|
156
|
+
|
|
157
|
+
rel_path = file_path.relative_to(self.repo_path)
|
|
158
|
+
content_parts.append(f"# {rel_path}\n{file_content}\n")
|
|
159
|
+
total_size += len(file_content)
|
|
160
|
+
except Exception:
|
|
161
|
+
pass
|
|
162
|
+
|
|
163
|
+
return "\n".join(content_parts)
|