codebookx 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codebookx/__init__.py +1 -0
- codebookx/cli.py +85 -0
- codebookx/core.py +452 -0
- codebookx/engine/graph.py +144 -0
- codebookx/engine/indexer.py +127 -0
- codebookx/engine/parser.py +337 -0
- codebookx/engine/vendor/claude_mem_lite.py +39 -0
- codebookx/engine/vendor/repomix_lite.py +30 -0
- codebookx/llm.py +73 -0
- codebookx/prompts.py +43 -0
- codebookx/webapp/app.py +153 -0
- codebookx-3.0.0.dist-info/METADATA +115 -0
- codebookx-3.0.0.dist-info/RECORD +17 -0
- codebookx-3.0.0.dist-info/WHEEL +5 -0
- codebookx-3.0.0.dist-info/entry_points.txt +3 -0
- codebookx-3.0.0.dist-info/licenses/NOTICE.md +20 -0
- codebookx-3.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
import sqlite3
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
class KnowledgeGraph:
|
|
6
|
+
def __init__(self, db_path: str):
|
|
7
|
+
self.db_path = db_path
|
|
8
|
+
self._init_db()
|
|
9
|
+
|
|
10
|
+
def _get_conn(self):
|
|
11
|
+
conn = sqlite3.connect(self.db_path)
|
|
12
|
+
conn.execute("PRAGMA foreign_keys = ON")
|
|
13
|
+
return conn
|
|
14
|
+
|
|
15
|
+
def _init_db(self):
|
|
16
|
+
with self._get_conn() as conn:
|
|
17
|
+
conn.execute("PRAGMA journal_mode = WAL")
|
|
18
|
+
conn.execute("""
|
|
19
|
+
CREATE TABLE IF NOT EXISTS files (
|
|
20
|
+
id INTEGER PRIMARY KEY,
|
|
21
|
+
path TEXT UNIQUE,
|
|
22
|
+
hash TEXT
|
|
23
|
+
)
|
|
24
|
+
""")
|
|
25
|
+
conn.execute("""
|
|
26
|
+
CREATE TABLE IF NOT EXISTS symbols (
|
|
27
|
+
id INTEGER PRIMARY KEY,
|
|
28
|
+
file_id INTEGER,
|
|
29
|
+
name TEXT,
|
|
30
|
+
type TEXT,
|
|
31
|
+
start_line INTEGER,
|
|
32
|
+
end_line INTEGER,
|
|
33
|
+
code TEXT,
|
|
34
|
+
FOREIGN KEY(file_id) REFERENCES files(id) ON DELETE CASCADE
|
|
35
|
+
)
|
|
36
|
+
""")
|
|
37
|
+
conn.execute("""
|
|
38
|
+
CREATE TABLE IF NOT EXISTS relations (
|
|
39
|
+
id INTEGER PRIMARY KEY,
|
|
40
|
+
from_id INTEGER,
|
|
41
|
+
to_id INTEGER,
|
|
42
|
+
type TEXT, -- CALLS, IMPORTS, CONTAINS
|
|
43
|
+
FOREIGN KEY(from_id) REFERENCES symbols(id) ON DELETE CASCADE,
|
|
44
|
+
FOREIGN KEY(to_id) REFERENCES symbols(id) ON DELETE CASCADE
|
|
45
|
+
)
|
|
46
|
+
""")
|
|
47
|
+
|
|
48
|
+
def get_file_hash(self, path: str) -> Optional[str]:
|
|
49
|
+
with self._get_conn() as conn:
|
|
50
|
+
res = conn.execute("SELECT hash FROM files WHERE path=?", (path,)).fetchone()
|
|
51
|
+
return res[0] if res else None
|
|
52
|
+
|
|
53
|
+
def clear_file_symbols(self, file_id: int):
|
|
54
|
+
with self._get_conn() as conn:
|
|
55
|
+
conn.execute("DELETE FROM symbols WHERE file_id=?", (file_id,))
|
|
56
|
+
|
|
57
|
+
def add_file(self, path: str, file_hash: str) -> int:
|
|
58
|
+
with self._get_conn() as conn:
|
|
59
|
+
cursor = conn.execute(
|
|
60
|
+
"INSERT INTO files (path, hash) VALUES (?, ?) ON CONFLICT(path) DO UPDATE SET hash=excluded.hash",
|
|
61
|
+
(path, file_hash)
|
|
62
|
+
)
|
|
63
|
+
# Fetch the id for the path
|
|
64
|
+
return conn.execute("SELECT id FROM files WHERE path=?", (path,)).fetchone()[0]
|
|
65
|
+
|
|
66
|
+
def add_symbol(self, file_id: int, name: str, sym_type: str, start: int, end: int, code: str) -> int:
|
|
67
|
+
with self._get_conn() as conn:
|
|
68
|
+
cursor = conn.execute(
|
|
69
|
+
"INSERT INTO symbols (file_id, name, type, start_line, end_line, code) VALUES (?, ?, ?, ?, ?, ?)",
|
|
70
|
+
(file_id, name, sym_type, start, end, code)
|
|
71
|
+
)
|
|
72
|
+
return cursor.lastrowid
|
|
73
|
+
|
|
74
|
+
def add_relation(self, from_id: int, to_id: int, rel_type: str):
|
|
75
|
+
with self._get_conn() as conn:
|
|
76
|
+
conn.execute(
|
|
77
|
+
"INSERT INTO relations (from_id, to_id, type) VALUES (?, ?, ?)",
|
|
78
|
+
(from_id, to_id, rel_type)
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
def get_all_symbol_context(self) -> str:
|
|
82
|
+
with self._get_conn() as conn:
|
|
83
|
+
rows = conn.execute("""
|
|
84
|
+
SELECT s.name, s.type, f.path, s.code
|
|
85
|
+
FROM symbols s
|
|
86
|
+
JOIN files f ON s.file_id = f.id
|
|
87
|
+
ORDER BY f.path, s.name
|
|
88
|
+
""").fetchall()
|
|
89
|
+
parts = []
|
|
90
|
+
for name, sym_type, path, code in rows:
|
|
91
|
+
parts.append(f"{sym_type}: {name} ({path})")
|
|
92
|
+
if code:
|
|
93
|
+
parts.append(f" ```\n{code[:500]}\n ```")
|
|
94
|
+
return "\n".join(parts)
|
|
95
|
+
|
|
96
|
+
def get_symbol_context_for_question(self, question: str, top_n: int = 30) -> str:
|
|
97
|
+
stop_words = {"the", "a", "an", "is", "are", "was", "were", "be", "been",
|
|
98
|
+
"being", "have", "has", "had", "do", "does", "did", "will",
|
|
99
|
+
"would", "could", "should", "may", "might", "how", "what",
|
|
100
|
+
"when", "where", "why", "which", "who", "this", "that",
|
|
101
|
+
"these", "those", "it", "its", "in", "on", "at", "to",
|
|
102
|
+
"for", "of", "with", "by", "from", "and", "or", "not",
|
|
103
|
+
"please", "tell", "me", "about", "work", "explain"}
|
|
104
|
+
tokens = set(
|
|
105
|
+
t.lower().rstrip("?.!,;:") for t in question.split()
|
|
106
|
+
if t.lower().rstrip("?.!,;:") not in stop_words
|
|
107
|
+
and len(t.rstrip("?.!,;:")) > 1
|
|
108
|
+
)
|
|
109
|
+
with self._get_conn() as conn:
|
|
110
|
+
rows = conn.execute("""
|
|
111
|
+
SELECT s.name, s.type, f.path, s.code
|
|
112
|
+
FROM symbols s
|
|
113
|
+
JOIN files f ON s.file_id = f.id
|
|
114
|
+
ORDER BY s.name
|
|
115
|
+
""").fetchall()
|
|
116
|
+
scored = []
|
|
117
|
+
for name, sym_type, path, code in rows:
|
|
118
|
+
name_lower = name.lower()
|
|
119
|
+
score = sum(1 for t in tokens if t in name_lower)
|
|
120
|
+
if score > 0:
|
|
121
|
+
scored.append((score, sym_type, name, path, code))
|
|
122
|
+
scored.sort(key=lambda x: -x[0])
|
|
123
|
+
parts = []
|
|
124
|
+
for _, sym_type, name, path, code in scored[:top_n]:
|
|
125
|
+
parts.append(f"{sym_type}: {name} ({path})")
|
|
126
|
+
if code:
|
|
127
|
+
parts.append(f" ```\n{code[:500]}\n ```")
|
|
128
|
+
return "\n".join(parts)
|
|
129
|
+
|
|
130
|
+
def get_symbol_id_by_name(self, name: str) -> Optional[int]:
|
|
131
|
+
with self._get_conn() as conn:
|
|
132
|
+
res = conn.execute(
|
|
133
|
+
"SELECT id FROM symbols WHERE name=? LIMIT 1", (name,)
|
|
134
|
+
).fetchone()
|
|
135
|
+
return res[0] if res else None
|
|
136
|
+
|
|
137
|
+
def get_symbol_ids_by_file(self, file_path: str) -> list[int]:
|
|
138
|
+
with self._get_conn() as conn:
|
|
139
|
+
rows = conn.execute("""
|
|
140
|
+
SELECT s.id FROM symbols s
|
|
141
|
+
JOIN files f ON s.file_id = f.id
|
|
142
|
+
WHERE f.path=?
|
|
143
|
+
""", (file_path,)).fetchall()
|
|
144
|
+
return [r[0] for r in rows]
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import List, Dict, Any
|
|
4
|
+
from .graph import KnowledgeGraph
|
|
5
|
+
from .parser import extract_snippets, extract_python_relations, resolve_relative_imports, extract_ts_relations, resolve_ts_relative_imports
|
|
6
|
+
from .vendor.claude_mem_lite import generate_code_skeleton
|
|
7
|
+
|
|
8
|
+
class Indexer:
|
|
9
|
+
def __init__(self, root_path: str, db_path: str):
|
|
10
|
+
self.root = Path(root_path).resolve()
|
|
11
|
+
self.kg = KnowledgeGraph(db_path)
|
|
12
|
+
self.skip_dirs = {".git", "node_modules", "__pycache__", "dist", "build"}
|
|
13
|
+
|
|
14
|
+
def get_file_hash(self, file_path: Path) -> str:
|
|
15
|
+
"""Calculate SHA-256 hash of file content."""
|
|
16
|
+
hasher = hashlib.sha256()
|
|
17
|
+
with open(file_path, "rb") as f:
|
|
18
|
+
for chunk in iter(lambda: f.read(4096), b""):
|
|
19
|
+
hasher.update(chunk)
|
|
20
|
+
return hasher.hexdigest()
|
|
21
|
+
|
|
22
|
+
def index(self, force: bool = False):
|
|
23
|
+
"""Run the multi-phase indexing pipeline."""
|
|
24
|
+
print(f"🔍 Indexing {self.root}...")
|
|
25
|
+
|
|
26
|
+
# Phase 1: Discovery
|
|
27
|
+
files = self._discover_files()
|
|
28
|
+
|
|
29
|
+
# Phase 2: Parsing & Ingestion
|
|
30
|
+
for file_path in files:
|
|
31
|
+
rel_path = str(file_path.relative_to(self.root))
|
|
32
|
+
file_hash = self.get_file_hash(file_path)
|
|
33
|
+
|
|
34
|
+
# Check if file changed or force re-index
|
|
35
|
+
existing_hash = self.kg.get_file_hash(rel_path)
|
|
36
|
+
if not force and existing_hash == file_hash:
|
|
37
|
+
continue
|
|
38
|
+
|
|
39
|
+
# TODO: Replace with RETURNING id when SQLite minimum version allows
|
|
40
|
+
file_id = self.kg.add_file(rel_path, file_hash)
|
|
41
|
+
self.kg.clear_file_symbols(file_id)
|
|
42
|
+
|
|
43
|
+
# Use core extraction for now (Phase 1 legacy)
|
|
44
|
+
source = file_path.read_text(errors="ignore")
|
|
45
|
+
snippets = extract_snippets(file_path, source)
|
|
46
|
+
|
|
47
|
+
# P1.1: Skeleton fallback for non-Python/JS languages
|
|
48
|
+
if not snippets and file_path.suffix in (".go", ".rs", ".java", ".cpp", ".cs"):
|
|
49
|
+
skeleton = generate_code_skeleton(file_path)
|
|
50
|
+
if skeleton:
|
|
51
|
+
snippets = [{
|
|
52
|
+
"name": file_path.stem,
|
|
53
|
+
"start": 1,
|
|
54
|
+
"end": skeleton.count("\n") + 1,
|
|
55
|
+
"code": skeleton,
|
|
56
|
+
"type": "module",
|
|
57
|
+
}]
|
|
58
|
+
|
|
59
|
+
# Map FQN to DB ID for relations
|
|
60
|
+
symbol_ids = {}
|
|
61
|
+
for snip in snippets:
|
|
62
|
+
sym_id = self.kg.add_symbol(
|
|
63
|
+
file_id,
|
|
64
|
+
snip["name"],
|
|
65
|
+
snip["type"],
|
|
66
|
+
snip["start"],
|
|
67
|
+
snip["end"],
|
|
68
|
+
snip["code"]
|
|
69
|
+
)
|
|
70
|
+
symbol_ids[snip["name"]] = sym_id
|
|
71
|
+
|
|
72
|
+
# Wire relations if parent exists
|
|
73
|
+
parent_name = snip.get("parent")
|
|
74
|
+
if parent_name and parent_name in symbol_ids:
|
|
75
|
+
self.kg.add_relation(symbol_ids[parent_name], sym_id, "CONTAINS")
|
|
76
|
+
|
|
77
|
+
# Phase 3: Two-pass post-processing for CALLS/IMPORTS (Python + JS/TS)
|
|
78
|
+
ts_extensions = {".ts", ".tsx", ".js", ".jsx"}
|
|
79
|
+
py_files = [f for f in files if f.suffix == ".py"]
|
|
80
|
+
ts_files = [f for f in files if f.suffix in ts_extensions]
|
|
81
|
+
lang_files = [
|
|
82
|
+
(py_files, extract_python_relations, resolve_relative_imports),
|
|
83
|
+
(ts_files, extract_ts_relations, resolve_ts_relative_imports),
|
|
84
|
+
]
|
|
85
|
+
|
|
86
|
+
if py_files or ts_files:
|
|
87
|
+
print(" Resolving CALLS/IMPORTS...")
|
|
88
|
+
|
|
89
|
+
# Pass 1: Build map of all symbol names -> id across all languages
|
|
90
|
+
all_symbols = {}
|
|
91
|
+
for file_path in py_files + ts_files:
|
|
92
|
+
source = file_path.read_text(errors="ignore")
|
|
93
|
+
snippets = extract_snippets(file_path, source)
|
|
94
|
+
for snip in snippets:
|
|
95
|
+
sid = self.kg.get_symbol_id_by_name(snip["name"])
|
|
96
|
+
if sid:
|
|
97
|
+
all_symbols[snip["name"]] = sid
|
|
98
|
+
|
|
99
|
+
# Pass 2: Extract + resolve + wire relations per language
|
|
100
|
+
for file_list, extract_fn, resolve_fn in lang_files:
|
|
101
|
+
for file_path in file_list:
|
|
102
|
+
source = file_path.read_text(errors="ignore")
|
|
103
|
+
rels = extract_fn(source)
|
|
104
|
+
rels = resolve_fn(file_path, rels)
|
|
105
|
+
for rel in rels:
|
|
106
|
+
target_name = rel["target"]
|
|
107
|
+
bare_name = target_name.split(".")[-1]
|
|
108
|
+
resolved = all_symbols.get(target_name) or all_symbols.get(bare_name)
|
|
109
|
+
if resolved:
|
|
110
|
+
from_ids = self.kg.get_symbol_ids_by_file(
|
|
111
|
+
str(file_path.relative_to(self.root))
|
|
112
|
+
)
|
|
113
|
+
for fid in from_ids:
|
|
114
|
+
self.kg.add_relation(fid, resolved, rel["type"])
|
|
115
|
+
|
|
116
|
+
print(f"✅ Indexing complete. Knowledge Graph updated.")
|
|
117
|
+
|
|
118
|
+
def _discover_files(self) -> List[Path]:
|
|
119
|
+
import os
|
|
120
|
+
files = []
|
|
121
|
+
for root, dirs, filenames in os.walk(self.root):
|
|
122
|
+
dirs[:] = [d for d in dirs if d not in self.skip_dirs]
|
|
123
|
+
for f in filenames:
|
|
124
|
+
file_path = Path(root) / f
|
|
125
|
+
if file_path.suffix in (".py", ".ts", ".tsx", ".js", ".jsx", ".go", ".rs", ".java", ".cpp", ".cs"):
|
|
126
|
+
files.append(file_path)
|
|
127
|
+
return files
|
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
import ast
|
|
2
|
+
import re
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
def extract_python_functions(source: str) -> list[dict]:
|
|
6
|
+
snippets = []
|
|
7
|
+
try:
|
|
8
|
+
tree = ast.parse(source)
|
|
9
|
+
lines = source.splitlines()
|
|
10
|
+
|
|
11
|
+
def traverse(node, parent_name=None):
|
|
12
|
+
for child in ast.iter_child_nodes(node):
|
|
13
|
+
if isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
|
|
14
|
+
fqn = f"{parent_name}.{child.name}" if parent_name else child.name
|
|
15
|
+
start = child.lineno - 1
|
|
16
|
+
end = child.end_lineno
|
|
17
|
+
snippets.append({
|
|
18
|
+
"name": fqn,
|
|
19
|
+
"start": child.lineno,
|
|
20
|
+
"end": child.end_lineno,
|
|
21
|
+
"code": "\n".join(lines[start:end]),
|
|
22
|
+
"type": "class" if isinstance(child, ast.ClassDef) else "function",
|
|
23
|
+
"parent": parent_name
|
|
24
|
+
})
|
|
25
|
+
# Recurse into the scope of this class/function
|
|
26
|
+
traverse(child, fqn)
|
|
27
|
+
else:
|
|
28
|
+
# Generic traversal for non-scope nodes (If, Try, With, etc.)
|
|
29
|
+
# Recurse without changing scope
|
|
30
|
+
traverse(child, parent_name)
|
|
31
|
+
|
|
32
|
+
traverse(tree)
|
|
33
|
+
except SyntaxError: pass
|
|
34
|
+
return snippets
|
|
35
|
+
|
|
36
|
+
def extract_python_relations(source: str) -> list[dict]:
|
|
37
|
+
"""Extract CALLS and IMPORTS from Python AST.
|
|
38
|
+
Returns list of {type, target, line, level} dicts."""
|
|
39
|
+
relations = []
|
|
40
|
+
try:
|
|
41
|
+
tree = ast.parse(source)
|
|
42
|
+
for node in ast.walk(tree):
|
|
43
|
+
if isinstance(node, (ast.Import, ast.ImportFrom)):
|
|
44
|
+
names = node.names
|
|
45
|
+
if isinstance(node, ast.Import):
|
|
46
|
+
for alias in names:
|
|
47
|
+
relations.append({"type": "IMPORTS", "target": alias.name, "line": node.lineno, "level": 0})
|
|
48
|
+
else: # ImportFrom
|
|
49
|
+
module = node.module or ""
|
|
50
|
+
rel_level = getattr(node, 'level', 0)
|
|
51
|
+
for alias in names:
|
|
52
|
+
full_name = f"{module}.{alias.name}" if module else alias.name
|
|
53
|
+
relations.append({"type": "IMPORTS", "target": full_name, "line": node.lineno, "level": rel_level})
|
|
54
|
+
elif isinstance(node, ast.Call):
|
|
55
|
+
if isinstance(node.func, ast.Name):
|
|
56
|
+
relations.append({"type": "CALLS", "target": node.func.id, "line": node.lineno})
|
|
57
|
+
elif isinstance(node.func, ast.Attribute):
|
|
58
|
+
# e.g., obj.method() → target = "method"
|
|
59
|
+
relations.append({"type": "CALLS", "target": node.func.attr, "line": node.lineno})
|
|
60
|
+
except (SyntaxError, Exception):
|
|
61
|
+
pass
|
|
62
|
+
return relations
|
|
63
|
+
|
|
64
|
+
def resolve_relative_imports(file_path: Path, rels: list[dict]) -> list[dict]:
|
|
65
|
+
"""
|
|
66
|
+
Resolves relative imports (level > 0) to absolute dotted module paths
|
|
67
|
+
by walking up the package tree.
|
|
68
|
+
"""
|
|
69
|
+
resolved_rels = []
|
|
70
|
+
for rel in rels:
|
|
71
|
+
if rel["type"] != "IMPORTS" or rel.get("level", 0) == 0:
|
|
72
|
+
resolved_rels.append(rel)
|
|
73
|
+
continue
|
|
74
|
+
|
|
75
|
+
level = rel["level"]
|
|
76
|
+
target = rel["target"]
|
|
77
|
+
|
|
78
|
+
# Determine package base directory
|
|
79
|
+
# level 1 = same dir, level 2 = parent dir, etc.
|
|
80
|
+
base_dir = file_path.parent
|
|
81
|
+
for _ in range(level - 1):
|
|
82
|
+
if base_dir.parent == base_dir: # Reached root
|
|
83
|
+
break
|
|
84
|
+
base_dir = base_dir.parent
|
|
85
|
+
|
|
86
|
+
# Discover package prefix by walking UP from base_dir as long as __init__.py exists
|
|
87
|
+
prefix_parts = []
|
|
88
|
+
walk_dir = base_dir
|
|
89
|
+
while (walk_dir / "__init__.py").exists():
|
|
90
|
+
prefix_parts.insert(0, walk_dir.name)
|
|
91
|
+
if walk_dir.parent == walk_dir:
|
|
92
|
+
break
|
|
93
|
+
walk_dir = walk_dir.parent
|
|
94
|
+
|
|
95
|
+
# Build resolved target
|
|
96
|
+
target_parts = target.split(".")
|
|
97
|
+
resolved_name = ".".join(prefix_parts + target_parts)
|
|
98
|
+
|
|
99
|
+
new_rel = rel.copy()
|
|
100
|
+
new_rel["target"] = resolved_name
|
|
101
|
+
new_rel["level"] = 0 # Now absolute
|
|
102
|
+
resolved_rels.append(new_rel)
|
|
103
|
+
|
|
104
|
+
return resolved_rels
|
|
105
|
+
|
|
106
|
+
def extract_ts_relations(source: str) -> list[dict]:
|
|
107
|
+
"""
|
|
108
|
+
Extract ES module imports and exports from JS/TS source.
|
|
109
|
+
Uses a state machine to ignore matches inside strings and comments.
|
|
110
|
+
"""
|
|
111
|
+
relations = []
|
|
112
|
+
lines = source.splitlines()
|
|
113
|
+
|
|
114
|
+
# Combined regex for:
|
|
115
|
+
# 1. import/export ... from './path'
|
|
116
|
+
# 2. import('./path') [dynamic]
|
|
117
|
+
# 3. import './path' [side-effect]
|
|
118
|
+
import_re = re.compile(
|
|
119
|
+
r"""(?:import|export)\s+.*?from\s+['"](\.\.?\/[^'"]+)['"]"""
|
|
120
|
+
r"""|import\s*\(\s*['"](\.\.?\/[^'"]+)['"]\s*\)"""
|
|
121
|
+
r"""|import\s+['"](\.\.?\/[^'"]+)['"]"""
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
in_string = False
|
|
125
|
+
string_char = None
|
|
126
|
+
in_block_comment = False
|
|
127
|
+
|
|
128
|
+
for i, line in enumerate(lines):
|
|
129
|
+
line_no = i + 1
|
|
130
|
+
j = 0
|
|
131
|
+
code_on_line = []
|
|
132
|
+
|
|
133
|
+
# Simple per-line state machine to strip comments
|
|
134
|
+
while j < len(line):
|
|
135
|
+
char = line[j]
|
|
136
|
+
|
|
137
|
+
# Block comments
|
|
138
|
+
if not in_string and not in_block_comment and char == '/' and j + 1 < len(line) and line[j+1] == '*':
|
|
139
|
+
in_block_comment = True
|
|
140
|
+
j += 2; continue
|
|
141
|
+
if in_block_comment and char == '*' and j + 1 < len(line) and line[j+1] == '/':
|
|
142
|
+
in_block_comment = False
|
|
143
|
+
j += 2; continue
|
|
144
|
+
|
|
145
|
+
if in_block_comment:
|
|
146
|
+
j += 1; continue
|
|
147
|
+
|
|
148
|
+
# Line comments
|
|
149
|
+
if not in_string and char == '/' and j + 1 < len(line) and line[j+1] == '/':
|
|
150
|
+
break # Skip rest of line
|
|
151
|
+
|
|
152
|
+
# String boundary tracking (but keep the chars)
|
|
153
|
+
if char in ("'", '"', '`'):
|
|
154
|
+
if not in_string:
|
|
155
|
+
in_string = True
|
|
156
|
+
string_char = char
|
|
157
|
+
elif string_char == char:
|
|
158
|
+
in_string = False
|
|
159
|
+
string_char = None
|
|
160
|
+
|
|
161
|
+
code_on_line.append(char)
|
|
162
|
+
j += 1
|
|
163
|
+
|
|
164
|
+
clean_code = "".join(code_on_line)
|
|
165
|
+
|
|
166
|
+
# Heuristic to avoid matching imports inside string assignments
|
|
167
|
+
# e.g., const s = "import { X } from './mod'";
|
|
168
|
+
if "=" in clean_code and clean_code.find("=") < clean_code.find("import"):
|
|
169
|
+
# If it's a dynamic import assignment like 'const p = import("./mod")',
|
|
170
|
+
# we might want to keep it, but for v1, skipping is safer than false positives.
|
|
171
|
+
continue
|
|
172
|
+
|
|
173
|
+
for match in import_re.finditer(clean_code):
|
|
174
|
+
path = next(g for g in match.groups() if g is not None)
|
|
175
|
+
|
|
176
|
+
# Level calculation: ./ -> 1, ../ -> 2, ../../ -> 3
|
|
177
|
+
if path.startswith('./'):
|
|
178
|
+
level = 1
|
|
179
|
+
else:
|
|
180
|
+
# Count non-empty ".." parts in the path
|
|
181
|
+
level = len([p for p in path.split('/') if p == '..']) + 1
|
|
182
|
+
|
|
183
|
+
relations.append({
|
|
184
|
+
"type": "IMPORTS",
|
|
185
|
+
"target": path,
|
|
186
|
+
"line": line_no,
|
|
187
|
+
"level": level
|
|
188
|
+
})
|
|
189
|
+
|
|
190
|
+
return relations
|
|
191
|
+
|
|
192
|
+
def resolve_ts_relative_imports(file_path: Path, rels: list[dict]) -> list[dict]:
|
|
193
|
+
"""
|
|
194
|
+
Resolves relative JS/TS imports to absolute dotted module paths
|
|
195
|
+
using Node.js-style extension probing (.ts, .tsx, .js, .jsx).
|
|
196
|
+
"""
|
|
197
|
+
EXTENSIONS = ['.ts', '.tsx', '.js', '.jsx']
|
|
198
|
+
INDEX_FILES = [f'index{e}' for e in EXTENSIONS]
|
|
199
|
+
resolved_rels = []
|
|
200
|
+
|
|
201
|
+
for rel in rels:
|
|
202
|
+
if rel["type"] != "IMPORTS" or rel.get("level", 0) == 0:
|
|
203
|
+
resolved_rels.append(rel)
|
|
204
|
+
continue
|
|
205
|
+
|
|
206
|
+
level = rel["level"]
|
|
207
|
+
target = rel["target"]
|
|
208
|
+
|
|
209
|
+
# Base directory from level
|
|
210
|
+
base = file_path.parent
|
|
211
|
+
for _ in range(level - 1):
|
|
212
|
+
if base.parent == base: break
|
|
213
|
+
base = base.parent
|
|
214
|
+
|
|
215
|
+
# Strip leading dots and slashes: "../../utils/mod" -> "utils/mod"
|
|
216
|
+
rel_module = re.sub(r'^\.+(?:\/|$)', '', target)
|
|
217
|
+
candidate = base / rel_module
|
|
218
|
+
|
|
219
|
+
# Extension probing
|
|
220
|
+
found = None
|
|
221
|
+
# 1. Try file extensions directly
|
|
222
|
+
for ext in EXTENSIONS:
|
|
223
|
+
if candidate.with_suffix(ext).exists():
|
|
224
|
+
found = candidate.with_suffix(ext)
|
|
225
|
+
break
|
|
226
|
+
|
|
227
|
+
# 2. Try directory index files
|
|
228
|
+
if not found and candidate.is_dir():
|
|
229
|
+
for idx in INDEX_FILES:
|
|
230
|
+
if (candidate / idx).exists():
|
|
231
|
+
found = candidate / idx
|
|
232
|
+
break
|
|
233
|
+
|
|
234
|
+
if found:
|
|
235
|
+
# Discover package prefix by walking up as long as source files exist in the dir
|
|
236
|
+
prefix_parts = []
|
|
237
|
+
walk_dir = base
|
|
238
|
+
while walk_dir.name and walk_dir.parent != walk_dir:
|
|
239
|
+
# Check if directory has any JS/TS files in it
|
|
240
|
+
try:
|
|
241
|
+
has_source = any(f.suffix in EXTENSIONS for f in walk_dir.iterdir() if f.is_file())
|
|
242
|
+
except (PermissionError, FileNotFoundError):
|
|
243
|
+
has_source = False
|
|
244
|
+
|
|
245
|
+
if not has_source:
|
|
246
|
+
break
|
|
247
|
+
prefix_parts.insert(0, walk_dir.name)
|
|
248
|
+
walk_dir = walk_dir.parent
|
|
249
|
+
|
|
250
|
+
# Build resolved dotted path
|
|
251
|
+
module_parts = rel_module.split("/")
|
|
252
|
+
resolved_name = ".".join(prefix_parts + module_parts)
|
|
253
|
+
|
|
254
|
+
new_rel = rel.copy()
|
|
255
|
+
new_rel["target"] = resolved_name
|
|
256
|
+
new_rel["level"] = 0 # Now absolute
|
|
257
|
+
resolved_rels.append(new_rel)
|
|
258
|
+
else:
|
|
259
|
+
# Graceful fallback: return as level 0 absolute-ish
|
|
260
|
+
new_rel = rel.copy()
|
|
261
|
+
new_rel["level"] = 0
|
|
262
|
+
resolved_rels.append(new_rel)
|
|
263
|
+
|
|
264
|
+
return resolved_rels
|
|
265
|
+
|
|
266
|
+
def extract_ts_functions(source: str) -> list[dict]:
|
|
267
|
+
snippets = []
|
|
268
|
+
lines = source.splitlines()
|
|
269
|
+
pattern = re.compile(
|
|
270
|
+
r"^(?:export\s+)?(?:default\s+)?(?:async\s+)?(?:"
|
|
271
|
+
r"function\s*\*?\s+(\w+)"
|
|
272
|
+
r"|class\s+(\w+)"
|
|
273
|
+
r"|const\s+(\w+)\s*=\s*(?:async\s+)?(?:\([^)]*\)|[^=]+)=>"
|
|
274
|
+
r"|(?!(?:if|for|while|switch|catch)\b)(\w+)\s*\([^)]*\)\s*\{"
|
|
275
|
+
r")",
|
|
276
|
+
re.MULTILINE,
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
for match in pattern.finditer(source):
|
|
280
|
+
name = next(g for g in match.groups() if g is not None)
|
|
281
|
+
start_line = source[: match.start()].count("\n")
|
|
282
|
+
|
|
283
|
+
# Forward Depth State Machine
|
|
284
|
+
depth = 0
|
|
285
|
+
end_line = start_line
|
|
286
|
+
in_string = False
|
|
287
|
+
string_char = None
|
|
288
|
+
started = False
|
|
289
|
+
|
|
290
|
+
for i in range(start_line, min(start_line + 500, len(lines))):
|
|
291
|
+
line = lines[i]
|
|
292
|
+
j = 0
|
|
293
|
+
while j < len(line):
|
|
294
|
+
char = line[j]
|
|
295
|
+
|
|
296
|
+
# Handle comments
|
|
297
|
+
if not in_string and char == '/' and j + 1 < len(line) and line[j+1] == '/':
|
|
298
|
+
break # Skip rest of line
|
|
299
|
+
|
|
300
|
+
# Handle strings/templates
|
|
301
|
+
if char in ("'", '"', '`'):
|
|
302
|
+
if not in_string:
|
|
303
|
+
in_string = True
|
|
304
|
+
string_char = char
|
|
305
|
+
elif string_char == char:
|
|
306
|
+
in_string = False
|
|
307
|
+
string_char = None
|
|
308
|
+
|
|
309
|
+
if not in_string:
|
|
310
|
+
if char == '{':
|
|
311
|
+
depth += 1
|
|
312
|
+
started = True
|
|
313
|
+
elif char == '}':
|
|
314
|
+
depth -= 1
|
|
315
|
+
|
|
316
|
+
if started and depth == 0:
|
|
317
|
+
end_line = i + 1
|
|
318
|
+
break
|
|
319
|
+
j += 1
|
|
320
|
+
if started and depth == 0:
|
|
321
|
+
break
|
|
322
|
+
|
|
323
|
+
snippets.append({
|
|
324
|
+
"name": name,
|
|
325
|
+
"start": start_line + 1,
|
|
326
|
+
"end": end_line,
|
|
327
|
+
"code": "\n".join(lines[start_line:end_line]),
|
|
328
|
+
"type": "function"
|
|
329
|
+
})
|
|
330
|
+
return snippets
|
|
331
|
+
|
|
332
|
+
def extract_snippets(file_path: Path, source: str) -> list[dict]:
|
|
333
|
+
if file_path.suffix == ".py":
|
|
334
|
+
return extract_python_functions(source)
|
|
335
|
+
elif file_path.suffix in (".js", ".ts", ".jsx", ".tsx"):
|
|
336
|
+
return extract_ts_functions(source)
|
|
337
|
+
return []
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import ast
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
def generate_code_skeleton(file_path: Path) -> str:
|
|
5
|
+
"""Generate a compact skeleton of a file (functions and classes only)."""
|
|
6
|
+
if not file_path.exists():
|
|
7
|
+
return ""
|
|
8
|
+
|
|
9
|
+
source = file_path.read_text(errors="ignore")
|
|
10
|
+
skeleton = [f"File: {file_path.name}"]
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
if file_path.suffix == ".py":
|
|
14
|
+
tree = ast.parse(source)
|
|
15
|
+
for node in tree.body:
|
|
16
|
+
if isinstance(node, ast.ClassDef):
|
|
17
|
+
skeleton.append(f" Class: {node.name}")
|
|
18
|
+
for subnode in node.body:
|
|
19
|
+
if isinstance(subnode, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
20
|
+
skeleton.append(f" Method: {subnode.name}")
|
|
21
|
+
elif isinstance(subnode, ast.ClassDef):
|
|
22
|
+
skeleton.append(f" Nested Class: {subnode.name}")
|
|
23
|
+
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
24
|
+
skeleton.append(f" Function: {node.name}")
|
|
25
|
+
elif file_path.suffix in (".ts", ".tsx", ".js", ".jsx"):
|
|
26
|
+
# Simple regex-based skeleton for JS/TS
|
|
27
|
+
import re
|
|
28
|
+
patterns = [
|
|
29
|
+
r"export\s+(?:async\s+)?function\s+(\w+)",
|
|
30
|
+
r"export\s+class\s+(\w+)",
|
|
31
|
+
r"export\s+const\s+(\w+)\s*=",
|
|
32
|
+
]
|
|
33
|
+
for pattern in patterns:
|
|
34
|
+
for match in re.finditer(pattern, source):
|
|
35
|
+
skeleton.append(f" Symbol: {match.group(1)}")
|
|
36
|
+
except Exception as e:
|
|
37
|
+
print(f"Skeleton generation error: {e}")
|
|
38
|
+
|
|
39
|
+
return "\n".join(skeleton)
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import List
|
|
4
|
+
|
|
5
|
+
def pack_repo(root_path: Path, include_extensions: List[str] = None) -> str:
|
|
6
|
+
"""Pack the repository into a single text block, respecting some ignores."""
|
|
7
|
+
if include_extensions is None:
|
|
8
|
+
include_extensions = [".py", ".ts", ".tsx", ".js", ".jsx", ".md", ".txt"]
|
|
9
|
+
|
|
10
|
+
skip_dirs = {".git", "node_modules", "__pycache__", "dist", "build"}
|
|
11
|
+
packed_output = []
|
|
12
|
+
|
|
13
|
+
for root, dirs, files in os.walk(root_path):
|
|
14
|
+
# Filter directories in-place to skip them
|
|
15
|
+
dirs[:] = [d for d in dirs if d not in skip_dirs]
|
|
16
|
+
|
|
17
|
+
for file in files:
|
|
18
|
+
file_path = Path(root) / file
|
|
19
|
+
if file_path.suffix.lower() in include_extensions:
|
|
20
|
+
try:
|
|
21
|
+
rel_path = file_path.relative_to(root_path)
|
|
22
|
+
content = file_path.read_text(errors="ignore")
|
|
23
|
+
|
|
24
|
+
packed_output.append(f"--- BEGIN FILE: {rel_path} ---")
|
|
25
|
+
packed_output.append(content)
|
|
26
|
+
packed_output.append(f"--- END FILE: {rel_path} ---\n")
|
|
27
|
+
except Exception:
|
|
28
|
+
continue
|
|
29
|
+
|
|
30
|
+
return "\n".join(packed_output)
|