codebookx 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,144 @@
1
+ import sqlite3
2
+ from pathlib import Path
3
+ from typing import Optional
4
+
5
+ class KnowledgeGraph:
6
+ def __init__(self, db_path: str):
7
+ self.db_path = db_path
8
+ self._init_db()
9
+
10
+ def _get_conn(self):
11
+ conn = sqlite3.connect(self.db_path)
12
+ conn.execute("PRAGMA foreign_keys = ON")
13
+ return conn
14
+
15
+ def _init_db(self):
16
+ with self._get_conn() as conn:
17
+ conn.execute("PRAGMA journal_mode = WAL")
18
+ conn.execute("""
19
+ CREATE TABLE IF NOT EXISTS files (
20
+ id INTEGER PRIMARY KEY,
21
+ path TEXT UNIQUE,
22
+ hash TEXT
23
+ )
24
+ """)
25
+ conn.execute("""
26
+ CREATE TABLE IF NOT EXISTS symbols (
27
+ id INTEGER PRIMARY KEY,
28
+ file_id INTEGER,
29
+ name TEXT,
30
+ type TEXT,
31
+ start_line INTEGER,
32
+ end_line INTEGER,
33
+ code TEXT,
34
+ FOREIGN KEY(file_id) REFERENCES files(id) ON DELETE CASCADE
35
+ )
36
+ """)
37
+ conn.execute("""
38
+ CREATE TABLE IF NOT EXISTS relations (
39
+ id INTEGER PRIMARY KEY,
40
+ from_id INTEGER,
41
+ to_id INTEGER,
42
+ type TEXT, -- CALLS, IMPORTS, CONTAINS
43
+ FOREIGN KEY(from_id) REFERENCES symbols(id) ON DELETE CASCADE,
44
+ FOREIGN KEY(to_id) REFERENCES symbols(id) ON DELETE CASCADE
45
+ )
46
+ """)
47
+
48
+ def get_file_hash(self, path: str) -> Optional[str]:
49
+ with self._get_conn() as conn:
50
+ res = conn.execute("SELECT hash FROM files WHERE path=?", (path,)).fetchone()
51
+ return res[0] if res else None
52
+
53
+ def clear_file_symbols(self, file_id: int):
54
+ with self._get_conn() as conn:
55
+ conn.execute("DELETE FROM symbols WHERE file_id=?", (file_id,))
56
+
57
+ def add_file(self, path: str, file_hash: str) -> int:
58
+ with self._get_conn() as conn:
59
+ cursor = conn.execute(
60
+ "INSERT INTO files (path, hash) VALUES (?, ?) ON CONFLICT(path) DO UPDATE SET hash=excluded.hash",
61
+ (path, file_hash)
62
+ )
63
+ # Fetch the id for the path
64
+ return conn.execute("SELECT id FROM files WHERE path=?", (path,)).fetchone()[0]
65
+
66
+ def add_symbol(self, file_id: int, name: str, sym_type: str, start: int, end: int, code: str) -> int:
67
+ with self._get_conn() as conn:
68
+ cursor = conn.execute(
69
+ "INSERT INTO symbols (file_id, name, type, start_line, end_line, code) VALUES (?, ?, ?, ?, ?, ?)",
70
+ (file_id, name, sym_type, start, end, code)
71
+ )
72
+ return cursor.lastrowid
73
+
74
+ def add_relation(self, from_id: int, to_id: int, rel_type: str):
75
+ with self._get_conn() as conn:
76
+ conn.execute(
77
+ "INSERT INTO relations (from_id, to_id, type) VALUES (?, ?, ?)",
78
+ (from_id, to_id, rel_type)
79
+ )
80
+
81
+ def get_all_symbol_context(self) -> str:
82
+ with self._get_conn() as conn:
83
+ rows = conn.execute("""
84
+ SELECT s.name, s.type, f.path, s.code
85
+ FROM symbols s
86
+ JOIN files f ON s.file_id = f.id
87
+ ORDER BY f.path, s.name
88
+ """).fetchall()
89
+ parts = []
90
+ for name, sym_type, path, code in rows:
91
+ parts.append(f"{sym_type}: {name} ({path})")
92
+ if code:
93
+ parts.append(f" ```\n{code[:500]}\n ```")
94
+ return "\n".join(parts)
95
+
96
+ def get_symbol_context_for_question(self, question: str, top_n: int = 30) -> str:
97
+ stop_words = {"the", "a", "an", "is", "are", "was", "were", "be", "been",
98
+ "being", "have", "has", "had", "do", "does", "did", "will",
99
+ "would", "could", "should", "may", "might", "how", "what",
100
+ "when", "where", "why", "which", "who", "this", "that",
101
+ "these", "those", "it", "its", "in", "on", "at", "to",
102
+ "for", "of", "with", "by", "from", "and", "or", "not",
103
+ "please", "tell", "me", "about", "work", "explain"}
104
+ tokens = set(
105
+ t.lower().rstrip("?.!,;:") for t in question.split()
106
+ if t.lower().rstrip("?.!,;:") not in stop_words
107
+ and len(t.rstrip("?.!,;:")) > 1
108
+ )
109
+ with self._get_conn() as conn:
110
+ rows = conn.execute("""
111
+ SELECT s.name, s.type, f.path, s.code
112
+ FROM symbols s
113
+ JOIN files f ON s.file_id = f.id
114
+ ORDER BY s.name
115
+ """).fetchall()
116
+ scored = []
117
+ for name, sym_type, path, code in rows:
118
+ name_lower = name.lower()
119
+ score = sum(1 for t in tokens if t in name_lower)
120
+ if score > 0:
121
+ scored.append((score, sym_type, name, path, code))
122
+ scored.sort(key=lambda x: -x[0])
123
+ parts = []
124
+ for _, sym_type, name, path, code in scored[:top_n]:
125
+ parts.append(f"{sym_type}: {name} ({path})")
126
+ if code:
127
+ parts.append(f" ```\n{code[:500]}\n ```")
128
+ return "\n".join(parts)
129
+
130
+ def get_symbol_id_by_name(self, name: str) -> Optional[int]:
131
+ with self._get_conn() as conn:
132
+ res = conn.execute(
133
+ "SELECT id FROM symbols WHERE name=? LIMIT 1", (name,)
134
+ ).fetchone()
135
+ return res[0] if res else None
136
+
137
+ def get_symbol_ids_by_file(self, file_path: str) -> list[int]:
138
+ with self._get_conn() as conn:
139
+ rows = conn.execute("""
140
+ SELECT s.id FROM symbols s
141
+ JOIN files f ON s.file_id = f.id
142
+ WHERE f.path=?
143
+ """, (file_path,)).fetchall()
144
+ return [r[0] for r in rows]
@@ -0,0 +1,127 @@
1
+ import hashlib
2
+ from pathlib import Path
3
+ from typing import List, Dict, Any
4
+ from .graph import KnowledgeGraph
5
+ from .parser import extract_snippets, extract_python_relations, resolve_relative_imports, extract_ts_relations, resolve_ts_relative_imports
6
+ from .vendor.claude_mem_lite import generate_code_skeleton
7
+
8
+ class Indexer:
9
+ def __init__(self, root_path: str, db_path: str):
10
+ self.root = Path(root_path).resolve()
11
+ self.kg = KnowledgeGraph(db_path)
12
+ self.skip_dirs = {".git", "node_modules", "__pycache__", "dist", "build"}
13
+
14
+ def get_file_hash(self, file_path: Path) -> str:
15
+ """Calculate SHA-256 hash of file content."""
16
+ hasher = hashlib.sha256()
17
+ with open(file_path, "rb") as f:
18
+ for chunk in iter(lambda: f.read(4096), b""):
19
+ hasher.update(chunk)
20
+ return hasher.hexdigest()
21
+
22
+ def index(self, force: bool = False):
23
+ """Run the multi-phase indexing pipeline."""
24
+ print(f"🔍 Indexing {self.root}...")
25
+
26
+ # Phase 1: Discovery
27
+ files = self._discover_files()
28
+
29
+ # Phase 2: Parsing & Ingestion
30
+ for file_path in files:
31
+ rel_path = str(file_path.relative_to(self.root))
32
+ file_hash = self.get_file_hash(file_path)
33
+
34
+ # Check if file changed or force re-index
35
+ existing_hash = self.kg.get_file_hash(rel_path)
36
+ if not force and existing_hash == file_hash:
37
+ continue
38
+
39
+ # TODO: Replace with RETURNING id when SQLite minimum version allows
40
+ file_id = self.kg.add_file(rel_path, file_hash)
41
+ self.kg.clear_file_symbols(file_id)
42
+
43
+ # Use core extraction for now (Phase 1 legacy)
44
+ source = file_path.read_text(errors="ignore")
45
+ snippets = extract_snippets(file_path, source)
46
+
47
+ # P1.1: Skeleton fallback for non-Python/JS languages
48
+ if not snippets and file_path.suffix in (".go", ".rs", ".java", ".cpp", ".cs"):
49
+ skeleton = generate_code_skeleton(file_path)
50
+ if skeleton:
51
+ snippets = [{
52
+ "name": file_path.stem,
53
+ "start": 1,
54
+ "end": skeleton.count("\n") + 1,
55
+ "code": skeleton,
56
+ "type": "module",
57
+ }]
58
+
59
+ # Map FQN to DB ID for relations
60
+ symbol_ids = {}
61
+ for snip in snippets:
62
+ sym_id = self.kg.add_symbol(
63
+ file_id,
64
+ snip["name"],
65
+ snip["type"],
66
+ snip["start"],
67
+ snip["end"],
68
+ snip["code"]
69
+ )
70
+ symbol_ids[snip["name"]] = sym_id
71
+
72
+ # Wire relations if parent exists
73
+ parent_name = snip.get("parent")
74
+ if parent_name and parent_name in symbol_ids:
75
+ self.kg.add_relation(symbol_ids[parent_name], sym_id, "CONTAINS")
76
+
77
+ # Phase 3: Two-pass post-processing for CALLS/IMPORTS (Python + JS/TS)
78
+ ts_extensions = {".ts", ".tsx", ".js", ".jsx"}
79
+ py_files = [f for f in files if f.suffix == ".py"]
80
+ ts_files = [f for f in files if f.suffix in ts_extensions]
81
+ lang_files = [
82
+ (py_files, extract_python_relations, resolve_relative_imports),
83
+ (ts_files, extract_ts_relations, resolve_ts_relative_imports),
84
+ ]
85
+
86
+ if py_files or ts_files:
87
+ print(" Resolving CALLS/IMPORTS...")
88
+
89
+ # Pass 1: Build map of all symbol names -> id across all languages
90
+ all_symbols = {}
91
+ for file_path in py_files + ts_files:
92
+ source = file_path.read_text(errors="ignore")
93
+ snippets = extract_snippets(file_path, source)
94
+ for snip in snippets:
95
+ sid = self.kg.get_symbol_id_by_name(snip["name"])
96
+ if sid:
97
+ all_symbols[snip["name"]] = sid
98
+
99
+ # Pass 2: Extract + resolve + wire relations per language
100
+ for file_list, extract_fn, resolve_fn in lang_files:
101
+ for file_path in file_list:
102
+ source = file_path.read_text(errors="ignore")
103
+ rels = extract_fn(source)
104
+ rels = resolve_fn(file_path, rels)
105
+ for rel in rels:
106
+ target_name = rel["target"]
107
+ bare_name = target_name.split(".")[-1]
108
+ resolved = all_symbols.get(target_name) or all_symbols.get(bare_name)
109
+ if resolved:
110
+ from_ids = self.kg.get_symbol_ids_by_file(
111
+ str(file_path.relative_to(self.root))
112
+ )
113
+ for fid in from_ids:
114
+ self.kg.add_relation(fid, resolved, rel["type"])
115
+
116
+ print(f"✅ Indexing complete. Knowledge Graph updated.")
117
+
118
+ def _discover_files(self) -> List[Path]:
119
+ import os
120
+ files = []
121
+ for root, dirs, filenames in os.walk(self.root):
122
+ dirs[:] = [d for d in dirs if d not in self.skip_dirs]
123
+ for f in filenames:
124
+ file_path = Path(root) / f
125
+ if file_path.suffix in (".py", ".ts", ".tsx", ".js", ".jsx", ".go", ".rs", ".java", ".cpp", ".cs"):
126
+ files.append(file_path)
127
+ return files
@@ -0,0 +1,337 @@
1
+ import ast
2
+ import re
3
+ from pathlib import Path
4
+
5
+ def extract_python_functions(source: str) -> list[dict]:
6
+ snippets = []
7
+ try:
8
+ tree = ast.parse(source)
9
+ lines = source.splitlines()
10
+
11
+ def traverse(node, parent_name=None):
12
+ for child in ast.iter_child_nodes(node):
13
+ if isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
14
+ fqn = f"{parent_name}.{child.name}" if parent_name else child.name
15
+ start = child.lineno - 1
16
+ end = child.end_lineno
17
+ snippets.append({
18
+ "name": fqn,
19
+ "start": child.lineno,
20
+ "end": child.end_lineno,
21
+ "code": "\n".join(lines[start:end]),
22
+ "type": "class" if isinstance(child, ast.ClassDef) else "function",
23
+ "parent": parent_name
24
+ })
25
+ # Recurse into the scope of this class/function
26
+ traverse(child, fqn)
27
+ else:
28
+ # Generic traversal for non-scope nodes (If, Try, With, etc.)
29
+ # Recurse without changing scope
30
+ traverse(child, parent_name)
31
+
32
+ traverse(tree)
33
+ except SyntaxError: pass
34
+ return snippets
35
+
36
+ def extract_python_relations(source: str) -> list[dict]:
37
+ """Extract CALLS and IMPORTS from Python AST.
38
+ Returns list of {type, target, line, level} dicts."""
39
+ relations = []
40
+ try:
41
+ tree = ast.parse(source)
42
+ for node in ast.walk(tree):
43
+ if isinstance(node, (ast.Import, ast.ImportFrom)):
44
+ names = node.names
45
+ if isinstance(node, ast.Import):
46
+ for alias in names:
47
+ relations.append({"type": "IMPORTS", "target": alias.name, "line": node.lineno, "level": 0})
48
+ else: # ImportFrom
49
+ module = node.module or ""
50
+ rel_level = getattr(node, 'level', 0)
51
+ for alias in names:
52
+ full_name = f"{module}.{alias.name}" if module else alias.name
53
+ relations.append({"type": "IMPORTS", "target": full_name, "line": node.lineno, "level": rel_level})
54
+ elif isinstance(node, ast.Call):
55
+ if isinstance(node.func, ast.Name):
56
+ relations.append({"type": "CALLS", "target": node.func.id, "line": node.lineno})
57
+ elif isinstance(node.func, ast.Attribute):
58
+ # e.g., obj.method() → target = "method"
59
+ relations.append({"type": "CALLS", "target": node.func.attr, "line": node.lineno})
60
+ except (SyntaxError, Exception):
61
+ pass
62
+ return relations
63
+
64
+ def resolve_relative_imports(file_path: Path, rels: list[dict]) -> list[dict]:
65
+ """
66
+ Resolves relative imports (level > 0) to absolute dotted module paths
67
+ by walking up the package tree.
68
+ """
69
+ resolved_rels = []
70
+ for rel in rels:
71
+ if rel["type"] != "IMPORTS" or rel.get("level", 0) == 0:
72
+ resolved_rels.append(rel)
73
+ continue
74
+
75
+ level = rel["level"]
76
+ target = rel["target"]
77
+
78
+ # Determine package base directory
79
+ # level 1 = same dir, level 2 = parent dir, etc.
80
+ base_dir = file_path.parent
81
+ for _ in range(level - 1):
82
+ if base_dir.parent == base_dir: # Reached root
83
+ break
84
+ base_dir = base_dir.parent
85
+
86
+ # Discover package prefix by walking UP from base_dir as long as __init__.py exists
87
+ prefix_parts = []
88
+ walk_dir = base_dir
89
+ while (walk_dir / "__init__.py").exists():
90
+ prefix_parts.insert(0, walk_dir.name)
91
+ if walk_dir.parent == walk_dir:
92
+ break
93
+ walk_dir = walk_dir.parent
94
+
95
+ # Build resolved target
96
+ target_parts = target.split(".")
97
+ resolved_name = ".".join(prefix_parts + target_parts)
98
+
99
+ new_rel = rel.copy()
100
+ new_rel["target"] = resolved_name
101
+ new_rel["level"] = 0 # Now absolute
102
+ resolved_rels.append(new_rel)
103
+
104
+ return resolved_rels
105
+
106
+ def extract_ts_relations(source: str) -> list[dict]:
107
+ """
108
+ Extract ES module imports and exports from JS/TS source.
109
+ Uses a state machine to ignore matches inside strings and comments.
110
+ """
111
+ relations = []
112
+ lines = source.splitlines()
113
+
114
+ # Combined regex for:
115
+ # 1. import/export ... from './path'
116
+ # 2. import('./path') [dynamic]
117
+ # 3. import './path' [side-effect]
118
+ import_re = re.compile(
119
+ r"""(?:import|export)\s+.*?from\s+['"](\.\.?\/[^'"]+)['"]"""
120
+ r"""|import\s*\(\s*['"](\.\.?\/[^'"]+)['"]\s*\)"""
121
+ r"""|import\s+['"](\.\.?\/[^'"]+)['"]"""
122
+ )
123
+
124
+ in_string = False
125
+ string_char = None
126
+ in_block_comment = False
127
+
128
+ for i, line in enumerate(lines):
129
+ line_no = i + 1
130
+ j = 0
131
+ code_on_line = []
132
+
133
+ # Simple per-line state machine to strip comments
134
+ while j < len(line):
135
+ char = line[j]
136
+
137
+ # Block comments
138
+ if not in_string and not in_block_comment and char == '/' and j + 1 < len(line) and line[j+1] == '*':
139
+ in_block_comment = True
140
+ j += 2; continue
141
+ if in_block_comment and char == '*' and j + 1 < len(line) and line[j+1] == '/':
142
+ in_block_comment = False
143
+ j += 2; continue
144
+
145
+ if in_block_comment:
146
+ j += 1; continue
147
+
148
+ # Line comments
149
+ if not in_string and char == '/' and j + 1 < len(line) and line[j+1] == '/':
150
+ break # Skip rest of line
151
+
152
+ # String boundary tracking (but keep the chars)
153
+ if char in ("'", '"', '`'):
154
+ if not in_string:
155
+ in_string = True
156
+ string_char = char
157
+ elif string_char == char:
158
+ in_string = False
159
+ string_char = None
160
+
161
+ code_on_line.append(char)
162
+ j += 1
163
+
164
+ clean_code = "".join(code_on_line)
165
+
166
+ # Heuristic to avoid matching imports inside string assignments
167
+ # e.g., const s = "import { X } from './mod'";
168
+ if "=" in clean_code and clean_code.find("=") < clean_code.find("import"):
169
+ # If it's a dynamic import assignment like 'const p = import("./mod")',
170
+ # we might want to keep it, but for v1, skipping is safer than false positives.
171
+ continue
172
+
173
+ for match in import_re.finditer(clean_code):
174
+ path = next(g for g in match.groups() if g is not None)
175
+
176
+ # Level calculation: ./ -> 1, ../ -> 2, ../../ -> 3
177
+ if path.startswith('./'):
178
+ level = 1
179
+ else:
180
+ # Count non-empty ".." parts in the path
181
+ level = len([p for p in path.split('/') if p == '..']) + 1
182
+
183
+ relations.append({
184
+ "type": "IMPORTS",
185
+ "target": path,
186
+ "line": line_no,
187
+ "level": level
188
+ })
189
+
190
+ return relations
191
+
192
+ def resolve_ts_relative_imports(file_path: Path, rels: list[dict]) -> list[dict]:
193
+ """
194
+ Resolves relative JS/TS imports to absolute dotted module paths
195
+ using Node.js-style extension probing (.ts, .tsx, .js, .jsx).
196
+ """
197
+ EXTENSIONS = ['.ts', '.tsx', '.js', '.jsx']
198
+ INDEX_FILES = [f'index{e}' for e in EXTENSIONS]
199
+ resolved_rels = []
200
+
201
+ for rel in rels:
202
+ if rel["type"] != "IMPORTS" or rel.get("level", 0) == 0:
203
+ resolved_rels.append(rel)
204
+ continue
205
+
206
+ level = rel["level"]
207
+ target = rel["target"]
208
+
209
+ # Base directory from level
210
+ base = file_path.parent
211
+ for _ in range(level - 1):
212
+ if base.parent == base: break
213
+ base = base.parent
214
+
215
+ # Strip leading dots and slashes: "../../utils/mod" -> "utils/mod"
216
+ rel_module = re.sub(r'^\.+(?:\/|$)', '', target)
217
+ candidate = base / rel_module
218
+
219
+ # Extension probing
220
+ found = None
221
+ # 1. Try file extensions directly
222
+ for ext in EXTENSIONS:
223
+ if candidate.with_suffix(ext).exists():
224
+ found = candidate.with_suffix(ext)
225
+ break
226
+
227
+ # 2. Try directory index files
228
+ if not found and candidate.is_dir():
229
+ for idx in INDEX_FILES:
230
+ if (candidate / idx).exists():
231
+ found = candidate / idx
232
+ break
233
+
234
+ if found:
235
+ # Discover package prefix by walking up as long as source files exist in the dir
236
+ prefix_parts = []
237
+ walk_dir = base
238
+ while walk_dir.name and walk_dir.parent != walk_dir:
239
+ # Check if directory has any JS/TS files in it
240
+ try:
241
+ has_source = any(f.suffix in EXTENSIONS for f in walk_dir.iterdir() if f.is_file())
242
+ except (PermissionError, FileNotFoundError):
243
+ has_source = False
244
+
245
+ if not has_source:
246
+ break
247
+ prefix_parts.insert(0, walk_dir.name)
248
+ walk_dir = walk_dir.parent
249
+
250
+ # Build resolved dotted path
251
+ module_parts = rel_module.split("/")
252
+ resolved_name = ".".join(prefix_parts + module_parts)
253
+
254
+ new_rel = rel.copy()
255
+ new_rel["target"] = resolved_name
256
+ new_rel["level"] = 0 # Now absolute
257
+ resolved_rels.append(new_rel)
258
+ else:
259
+ # Graceful fallback: return as level 0 absolute-ish
260
+ new_rel = rel.copy()
261
+ new_rel["level"] = 0
262
+ resolved_rels.append(new_rel)
263
+
264
+ return resolved_rels
265
+
266
+ def extract_ts_functions(source: str) -> list[dict]:
267
+ snippets = []
268
+ lines = source.splitlines()
269
+ pattern = re.compile(
270
+ r"^(?:export\s+)?(?:default\s+)?(?:async\s+)?(?:"
271
+ r"function\s*\*?\s+(\w+)"
272
+ r"|class\s+(\w+)"
273
+ r"|const\s+(\w+)\s*=\s*(?:async\s+)?(?:\([^)]*\)|[^=]+)=>"
274
+ r"|(?!(?:if|for|while|switch|catch)\b)(\w+)\s*\([^)]*\)\s*\{"
275
+ r")",
276
+ re.MULTILINE,
277
+ )
278
+
279
+ for match in pattern.finditer(source):
280
+ name = next(g for g in match.groups() if g is not None)
281
+ start_line = source[: match.start()].count("\n")
282
+
283
+ # Forward Depth State Machine
284
+ depth = 0
285
+ end_line = start_line
286
+ in_string = False
287
+ string_char = None
288
+ started = False
289
+
290
+ for i in range(start_line, min(start_line + 500, len(lines))):
291
+ line = lines[i]
292
+ j = 0
293
+ while j < len(line):
294
+ char = line[j]
295
+
296
+ # Handle comments
297
+ if not in_string and char == '/' and j + 1 < len(line) and line[j+1] == '/':
298
+ break # Skip rest of line
299
+
300
+ # Handle strings/templates
301
+ if char in ("'", '"', '`'):
302
+ if not in_string:
303
+ in_string = True
304
+ string_char = char
305
+ elif string_char == char:
306
+ in_string = False
307
+ string_char = None
308
+
309
+ if not in_string:
310
+ if char == '{':
311
+ depth += 1
312
+ started = True
313
+ elif char == '}':
314
+ depth -= 1
315
+
316
+ if started and depth == 0:
317
+ end_line = i + 1
318
+ break
319
+ j += 1
320
+ if started and depth == 0:
321
+ break
322
+
323
+ snippets.append({
324
+ "name": name,
325
+ "start": start_line + 1,
326
+ "end": end_line,
327
+ "code": "\n".join(lines[start_line:end_line]),
328
+ "type": "function"
329
+ })
330
+ return snippets
331
+
332
+ def extract_snippets(file_path: Path, source: str) -> list[dict]:
333
+ if file_path.suffix == ".py":
334
+ return extract_python_functions(source)
335
+ elif file_path.suffix in (".js", ".ts", ".jsx", ".tsx"):
336
+ return extract_ts_functions(source)
337
+ return []
@@ -0,0 +1,39 @@
1
+ import ast
2
+ from pathlib import Path
3
+
4
+ def generate_code_skeleton(file_path: Path) -> str:
5
+ """Generate a compact skeleton of a file (functions and classes only)."""
6
+ if not file_path.exists():
7
+ return ""
8
+
9
+ source = file_path.read_text(errors="ignore")
10
+ skeleton = [f"File: {file_path.name}"]
11
+
12
+ try:
13
+ if file_path.suffix == ".py":
14
+ tree = ast.parse(source)
15
+ for node in tree.body:
16
+ if isinstance(node, ast.ClassDef):
17
+ skeleton.append(f" Class: {node.name}")
18
+ for subnode in node.body:
19
+ if isinstance(subnode, (ast.FunctionDef, ast.AsyncFunctionDef)):
20
+ skeleton.append(f" Method: {subnode.name}")
21
+ elif isinstance(subnode, ast.ClassDef):
22
+ skeleton.append(f" Nested Class: {subnode.name}")
23
+ elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
24
+ skeleton.append(f" Function: {node.name}")
25
+ elif file_path.suffix in (".ts", ".tsx", ".js", ".jsx"):
26
+ # Simple regex-based skeleton for JS/TS
27
+ import re
28
+ patterns = [
29
+ r"export\s+(?:async\s+)?function\s+(\w+)",
30
+ r"export\s+class\s+(\w+)",
31
+ r"export\s+const\s+(\w+)\s*=",
32
+ ]
33
+ for pattern in patterns:
34
+ for match in re.finditer(pattern, source):
35
+ skeleton.append(f" Symbol: {match.group(1)}")
36
+ except Exception as e:
37
+ print(f"Skeleton generation error: {e}")
38
+
39
+ return "\n".join(skeleton)
@@ -0,0 +1,30 @@
1
+ import os
2
+ from pathlib import Path
3
+ from typing import List
4
+
5
+ def pack_repo(root_path: Path, include_extensions: List[str] = None) -> str:
6
+ """Pack the repository into a single text block, respecting some ignores."""
7
+ if include_extensions is None:
8
+ include_extensions = [".py", ".ts", ".tsx", ".js", ".jsx", ".md", ".txt"]
9
+
10
+ skip_dirs = {".git", "node_modules", "__pycache__", "dist", "build"}
11
+ packed_output = []
12
+
13
+ for root, dirs, files in os.walk(root_path):
14
+ # Filter directories in-place to skip them
15
+ dirs[:] = [d for d in dirs if d not in skip_dirs]
16
+
17
+ for file in files:
18
+ file_path = Path(root) / file
19
+ if file_path.suffix.lower() in include_extensions:
20
+ try:
21
+ rel_path = file_path.relative_to(root_path)
22
+ content = file_path.read_text(errors="ignore")
23
+
24
+ packed_output.append(f"--- BEGIN FILE: {rel_path} ---")
25
+ packed_output.append(content)
26
+ packed_output.append(f"--- END FILE: {rel_path} ---\n")
27
+ except Exception:
28
+ continue
29
+
30
+ return "\n".join(packed_output)