graphcoding 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
graphcoding/scan.py ADDED
@@ -0,0 +1,254 @@
1
+ """Scanner — turn source files into graph nodes and import edges.
2
+
3
+ Two jobs:
4
+ * scan_repo(): full sweep — migrate an existing repo onto the graph.
5
+ * scan_file(): one file — used by sync after every change.
6
+
7
+ Python is parsed with ast (imports, top-level defs, module docstring).
8
+ JS/TS/JSX/TSX/Vue/Svelte use regex import extraction with relative-path
9
+ resolution. Everything else gets a file node with language + first-comment
10
+ summary. Deliberately lightweight: the graph's value is the design layer
11
+ (summaries, planned nodes, cross-cutting edges an import scan can't see),
12
+ not perfect static analysis.
13
+ """
14
+ from __future__ import annotations
15
+
16
+ import ast
17
+ import os
18
+ import re
19
+ import subprocess
20
+
21
+ from .store import Graph, Node
22
+
23
+ LANG = {
24
+ ".py": "python", ".ts": "typescript", ".tsx": "typescript",
25
+ ".js": "javascript", ".jsx": "javascript", ".go": "go", ".rs": "rust",
26
+ ".rb": "ruby", ".java": "java", ".kt": "kotlin", ".c": "c", ".h": "c",
27
+ ".cpp": "cpp", ".hpp": "cpp", ".cs": "csharp", ".php": "php",
28
+ ".swift": "swift", ".css": "css", ".scss": "css", ".sql": "sql",
29
+ ".sh": "shell", ".html": "html", ".vue": "vue", ".svelte": "svelte",
30
+ ".json": "json", ".yaml": "yaml", ".yml": "yaml", ".toml": "toml",
31
+ ".md": "markdown",
32
+ }
33
+
34
+ TEST_MARKERS = (".test.", ".spec.", "_test.")
35
+
36
+ JS_IMPORT_RE = re.compile(
37
+ r"""(?:import\s+(?:[\w${},*\s]+\s+from\s+)?|export\s+[\w${},*\s]+\s+from\s+|require\()\s*['"]([^'"]+)['"]""")
38
+ JS_EXPORT_RE = re.compile(
39
+ r"""export\s+(?:default\s+)?(?:async\s+)?(?:function|class|const)\s+([A-Za-z_$][\w$]*)""")
40
+
41
+
42
+ def language_of(path: str) -> str:
43
+ return LANG.get(os.path.splitext(path)[1], "")
44
+
45
+
46
+ def is_test(path: str) -> bool:
47
+ base = os.path.basename(path)
48
+ return base.startswith("test_") or any(m in base for m in TEST_MARKERS) \
49
+ or "/tests/" in "/" + path or "/__tests__/" in "/" + path
50
+
51
+
52
+ def trackable(path: str, cfg: dict) -> bool:
53
+ segs = path.split("/")
54
+ if any(s in cfg["ignore_segments"] for s in segs):
55
+ return False
56
+ if cfg.get("ignore_tests") and is_test(path):
57
+ return False
58
+ return os.path.splitext(path)[1] in cfg["track_extensions"]
59
+
60
+
61
+ def tracked_files(root: str, cfg: dict) -> list[str]:
62
+ """git ls-files when possible (respects .gitignore); os.walk fallback."""
63
+ try:
64
+ # cached + untracked-but-not-ignored: the graph should see WIP files too
65
+ out = subprocess.run(
66
+ ["git", "-C", root, "ls-files", "--cached", "--others",
67
+ "--exclude-standard"],
68
+ capture_output=True, text=True, check=True).stdout
69
+ # a file deleted from the worktree is still in the index — drop it
70
+ files = [p for p in out.splitlines()
71
+ if os.path.exists(os.path.join(root, p))]
72
+ except (subprocess.CalledProcessError, FileNotFoundError):
73
+ files = []
74
+ for dirpath, dirnames, filenames in os.walk(root):
75
+ rel = os.path.relpath(dirpath, root)
76
+ dirnames[:] = [d for d in dirnames
77
+ if d not in cfg["ignore_segments"] and not d.startswith(".")]
78
+ for fn in filenames:
79
+ files.append(os.path.normpath(os.path.join(rel, fn)).replace(os.sep, "/"))
80
+ return sorted(p for p in files if trackable(p, cfg))
81
+
82
+
83
+ # -- per-language extraction --------------------------------------------------
84
+ def _py_extract(root: str, path: str, src: str):
85
+ """Returns (summary, imports, symbols). Never raises on bad source."""
86
+ try:
87
+ tree = ast.parse(src)
88
+ except SyntaxError:
89
+ return "", [], []
90
+ doc = ast.get_docstring(tree) or ""
91
+ summary = doc.strip().splitlines()[0] if doc.strip() else ""
92
+ modules = []
93
+ for stmt in ast.walk(tree):
94
+ if isinstance(stmt, ast.Import):
95
+ modules.extend(a.name for a in stmt.names)
96
+ elif isinstance(stmt, ast.ImportFrom):
97
+ prefix = "." * stmt.level
98
+ base = prefix + (stmt.module or "")
99
+ modules.append(base)
100
+ # `from pkg import mod` — each name may itself be a module file
101
+ for a in stmt.names:
102
+ modules.append((base + "." + a.name) if stmt.module
103
+ else prefix + a.name)
104
+ imports = []
105
+ for m in modules:
106
+ target = _py_resolve(root, path, m)
107
+ if target:
108
+ imports.append(target)
109
+ symbols = []
110
+ for stmt in tree.body:
111
+ if isinstance(stmt, (ast.FunctionDef, ast.AsyncFunctionDef)):
112
+ symbols.append((stmt.name, "CodeFunction", ast.get_docstring(stmt) or ""))
113
+ elif isinstance(stmt, ast.ClassDef):
114
+ symbols.append((stmt.name, "CodeClass", ast.get_docstring(stmt) or ""))
115
+ return summary, sorted(set(imports)), symbols
116
+
117
+
118
+ def _py_resolve(root: str, path: str, module: str) -> str | None:
119
+ """Map a python module string to a repo-relative file, if it lives in-repo."""
120
+ if module.startswith("."):
121
+ level = len(module) - len(module.lstrip("."))
122
+ base = os.path.dirname(path)
123
+ for _ in range(level - 1):
124
+ base = os.path.dirname(base)
125
+ tail = module.lstrip(".")
126
+ parts = ([base] if base else []) + (tail.split(".") if tail else [])
127
+ cand = "/".join(p for p in parts if p)
128
+ else:
129
+ cand = module.replace(".", "/")
130
+ for suffix in (".py", "/__init__.py"):
131
+ rel = cand + suffix
132
+ if os.path.exists(os.path.join(root, rel)):
133
+ return rel
134
+ # src-layout: src/<pkg>/...
135
+ for prefix in ("src/",):
136
+ for suffix in (".py", "/__init__.py"):
137
+ rel = prefix + cand + suffix
138
+ if os.path.exists(os.path.join(root, rel)):
139
+ return rel
140
+ return None
141
+
142
+
143
+ def _js_resolve(root: str, path: str, spec: str) -> str | None:
144
+ """Resolve a relative (or @/ aliased) JS/TS import to a repo file."""
145
+ if spec.startswith("@/"):
146
+ base_dir = "src" if os.path.isdir(os.path.join(root, "src")) else ""
147
+ cand = os.path.normpath(os.path.join(base_dir, spec[2:]))
148
+ elif spec.startswith("."):
149
+ cand = os.path.normpath(os.path.join(os.path.dirname(path), spec))
150
+ else:
151
+ return None # external package
152
+ cand = cand.replace(os.sep, "/")
153
+ exts = ["", ".ts", ".tsx", ".js", ".jsx", ".vue", ".svelte", ".css", ".json"]
154
+ for ext in exts:
155
+ rel = cand + ext
156
+ if os.path.isfile(os.path.join(root, rel)):
157
+ return rel
158
+ for ext in (".ts", ".tsx", ".js", ".jsx"):
159
+ rel = cand + "/index" + ext
160
+ if os.path.isfile(os.path.join(root, rel)):
161
+ return rel
162
+ return None
163
+
164
+
165
+ def _first_comment(src: str) -> str:
166
+ """First comment or heading line — a serviceable auto-summary."""
167
+ for line in src.splitlines()[:15]:
168
+ s = line.strip()
169
+ for prefix in ("#", "//", "/*", "*", "--", "<!--"):
170
+ if s.startswith(prefix):
171
+ text = s.lstrip("#/*-<!– ").rstrip("*/->").strip()
172
+ if len(text) > 8 and not text.lower().startswith(("eslint", "ts-", "noqa", "prettier")):
173
+ return text
174
+ return ""
175
+
176
+
177
+ def node_type_for(path: str, src: str = "") -> str:
178
+ base = os.path.basename(path)
179
+ ext = os.path.splitext(path)[1]
180
+ if ext in (".json", ".yaml", ".yml", ".toml"):
181
+ return "ConfigFile"
182
+ if ext == ".md":
183
+ return "Doc"
184
+ if path.endswith(".d.ts"):
185
+ return "TypeDef"
186
+ if ext in (".tsx", ".jsx") and base[:1].isupper():
187
+ return "Component"
188
+ if re.match(r"^use[A-Z]", base) and ext in (".ts", ".tsx", ".js", ".jsx"):
189
+ return "Hook"
190
+ return "CodeFile"
191
+
192
+
193
+ def scan_file(root: str, path: str, cfg: dict) -> tuple[Node, list[Node]]:
194
+ """Build the node (+ optional symbol sub-nodes) for one file."""
195
+ full = os.path.join(root, path)
196
+ try:
197
+ with open(full, encoding="utf-8", errors="replace") as f:
198
+ src = f.read()
199
+ except OSError:
200
+ src = ""
201
+ lang = language_of(path)
202
+ summary, imports, symbols = "", [], []
203
+ if lang == "python":
204
+ summary, imports, symbols = _py_extract(root, path, src)
205
+ elif lang in ("typescript", "javascript", "vue", "svelte"):
206
+ for spec in JS_IMPORT_RE.findall(src):
207
+ t = _js_resolve(root, path, spec)
208
+ if t:
209
+ imports.append(t)
210
+ imports = sorted(set(imports))
211
+ if cfg.get("scan_symbols"):
212
+ symbols = [(m, "CodeFunction", "") for m in JS_EXPORT_RE.findall(src)]
213
+ if not summary:
214
+ summary = _first_comment(src)
215
+ node = Node(name=path, type=node_type_for(path, src), status="ok",
216
+ language=lang, summary=summary,
217
+ edges=[{"to": t, "type": "IMPORTS"} for t in imports if t != path])
218
+ subs = []
219
+ if cfg.get("scan_symbols") and symbols:
220
+ for sname, stype, sdoc in symbols:
221
+ if sname.startswith("_"):
222
+ continue
223
+ ssum = sdoc.strip().splitlines()[0] if sdoc.strip() else ""
224
+ subs.append(Node(name=f"{path}::{sname}", type=stype, status="ok",
225
+ language=lang, summary=ssum))
226
+ node.add_edge(f"{path}::{sname}", "CONTAINS")
227
+ return node, subs
228
+
229
+
230
+ def scan_repo(root: str, cfg: dict, graph: Graph) -> dict:
231
+ """Full sweep. Preserves human-written summaries and planned/delete marks."""
232
+ files = tracked_files(root, cfg)
233
+ added = updated = 0
234
+ for path in files:
235
+ node, subs = scan_file(root, path, cfg)
236
+ old = graph.nodes.get(path)
237
+ if old:
238
+ # never clobber intent: keep richer summary and lifecycle statuses
239
+ if old.summary and not node.summary:
240
+ node.summary = old.summary
241
+ if len(old.summary) > len(node.summary):
242
+ node.summary = old.summary
243
+ if old.status == "to-be-deleted":
244
+ node.status = old.status
245
+ updated += 1
246
+ else:
247
+ added += 1
248
+ graph.nodes[path] = node
249
+ for s in subs:
250
+ prev = graph.nodes.get(s.name)
251
+ if prev and len(prev.summary) > len(s.summary):
252
+ s.summary = prev.summary
253
+ graph.nodes[s.name] = s
254
+ return {"files": len(files), "added": added, "updated": updated}
graphcoding/store.py ADDED
@@ -0,0 +1,202 @@
1
+ """Graph store — the in-repo knowledge graph.
2
+
3
+ The graph lives at .graphcoding/graph.jsonl: one JSON object per line, sorted
4
+ by node name. Sorted JSONL keeps diffs small and merges sane — a node edit
5
+ touches one line, and two branches adding different nodes rarely conflict.
6
+
7
+ Node shape:
8
+ {
9
+ "name": "src/app.py" # repo-relative path, or "path::Symbol"
10
+ "type": "CodeFile", # see NODE_TYPES
11
+ "status": "ok", # ok | planned | needs-analysis | to-be-deleted
12
+ "language": "python",
13
+ "summary": "One line: what this file is for.",
14
+ "edges": [{"to": "src/db.py", "type": "IMPORTS"}, ...]
15
+ }
16
+
17
+ Edges are stored on the source node. Edge targets may name nodes that do not
18
+ exist yet — a link to a planned node is work to do, by design.
19
+ """
20
+ from __future__ import annotations
21
+
22
+ import json
23
+ import os
24
+ from dataclasses import dataclass, field
25
+
26
+ NODE_TYPES = [
27
+ "CodeFile", "CodeFunction", "CodeClass", "CodeModule",
28
+ "Component", "Hook", "TypeDef", "ServiceDef", "ConfigFile", "Doc",
29
+ ]
30
+
31
+ EDGE_TYPES = [
32
+ "IMPORTS", "CALLS", "CONTAINS", "INHERITS", "IMPLEMENTS",
33
+ "REFERENCES", "DEPENDS_ON", "RELATED_TO",
34
+ ]
35
+
36
+ STATUSES = ["ok", "planned", "needs-analysis", "to-be-deleted"]
37
+
38
+ GRAPH_DIR = ".graphcoding"
39
+ GRAPH_FILE = "graph.jsonl"
40
+ CONFIG_FILE = "config.json"
41
+
42
+ DEFAULT_CONFIG = {
43
+ "track_extensions": [
44
+ ".py", ".ts", ".tsx", ".js", ".jsx", ".go", ".rs", ".rb", ".java",
45
+ ".kt", ".c", ".h", ".cpp", ".hpp", ".cs", ".php", ".swift",
46
+ ".css", ".scss", ".sql", ".sh", ".html", ".vue", ".svelte",
47
+ ".json", ".yaml", ".yml", ".toml", ".md",
48
+ ],
49
+ "ignore_segments": [
50
+ "node_modules", ".git", ".venv", "venv", "dist", "build", "target",
51
+ "__pycache__", ".next", ".nuxt", "coverage", "vendor", ".graphcoding",
52
+ ],
53
+ "ignore_tests": True,
54
+ "scan_symbols": False,
55
+ }
56
+
57
+
58
+ @dataclass
59
+ class Node:
60
+ name: str
61
+ type: str = "CodeFile"
62
+ status: str = "ok"
63
+ language: str = ""
64
+ summary: str = ""
65
+ edges: list = field(default_factory=list)
66
+
67
+ def to_dict(self) -> dict:
68
+ d = {"name": self.name, "type": self.type, "status": self.status}
69
+ if self.language:
70
+ d["language"] = self.language
71
+ if self.summary:
72
+ d["summary"] = self.summary
73
+ if self.edges:
74
+ d["edges"] = sorted(self.edges, key=lambda e: (e["type"], e["to"]))
75
+ return d
76
+
77
+ @classmethod
78
+ def from_dict(cls, d: dict) -> "Node":
79
+ return cls(
80
+ name=d["name"],
81
+ type=d.get("type", "CodeFile"),
82
+ status=d.get("status", "ok"),
83
+ language=d.get("language", ""),
84
+ summary=d.get("summary", ""),
85
+ edges=list(d.get("edges", [])),
86
+ )
87
+
88
+ def add_edge(self, to: str, etype: str) -> bool:
89
+ for e in self.edges:
90
+ if e["to"] == to and e["type"] == etype:
91
+ return False
92
+ self.edges.append({"to": to, "type": etype})
93
+ return True
94
+
95
+
96
+ class Graph:
97
+ """The whole graph, loaded in memory; save() rewrites the sorted JSONL."""
98
+
99
+ def __init__(self, root: str):
100
+ self.root = root
101
+ self.path = os.path.join(root, GRAPH_DIR, GRAPH_FILE)
102
+ self.nodes: dict[str, Node] = {}
103
+
104
+ # -- persistence -----------------------------------------------------
105
+ @classmethod
106
+ def load(cls, root: str) -> "Graph":
107
+ g = cls(root)
108
+ if os.path.exists(g.path):
109
+ with open(g.path, encoding="utf-8") as f:
110
+ for line in f:
111
+ line = line.strip()
112
+ if not line:
113
+ continue
114
+ n = Node.from_dict(json.loads(line))
115
+ g.nodes[n.name] = n
116
+ return g
117
+
118
+ def save(self) -> None:
119
+ os.makedirs(os.path.dirname(self.path), exist_ok=True)
120
+ with open(self.path, "w", encoding="utf-8") as f:
121
+ for name in sorted(self.nodes):
122
+ f.write(json.dumps(self.nodes[name].to_dict(),
123
+ ensure_ascii=False, sort_keys=True) + "\n")
124
+
125
+ # -- ops ---------------------------------------------------------------
126
+ def upsert(self, node: Node) -> Node:
127
+ existing = self.nodes.get(node.name)
128
+ if existing:
129
+ existing.type = node.type or existing.type
130
+ existing.language = node.language or existing.language
131
+ if node.summary:
132
+ existing.summary = node.summary
133
+ existing.status = node.status
134
+ if node.edges:
135
+ for e in node.edges:
136
+ existing.add_edge(e["to"], e["type"])
137
+ return existing
138
+ self.nodes[node.name] = node
139
+ return node
140
+
141
+ def delete(self, name: str) -> bool:
142
+ """Remove a node and every edge pointing at it."""
143
+ found = self.nodes.pop(name, None) is not None
144
+ for n in self.nodes.values():
145
+ n.edges = [e for e in n.edges if e["to"] != name]
146
+ return found
147
+
148
+ def incoming(self, name: str) -> list[tuple[str, str]]:
149
+ """Who points at this node — the blast radius. [(source, edge_type)]"""
150
+ out = []
151
+ for n in self.nodes.values():
152
+ for e in n.edges:
153
+ if e["to"] == name:
154
+ out.append((n.name, e["type"]))
155
+ return sorted(out)
156
+
157
+ def file_nodes(self) -> dict[str, Node]:
158
+ """Nodes that represent files (no ::symbol suffix)."""
159
+ return {k: v for k, v in self.nodes.items() if "::" not in k}
160
+
161
+ def with_status(self, status: str) -> list[Node]:
162
+ return sorted((n for n in self.nodes.values() if n.status == status),
163
+ key=lambda n: n.name)
164
+
165
+ def search(self, terms: list[str], limit: int = 20) -> list[tuple[float, Node]]:
166
+ """Rank nodes by token overlap across name + summary. No server needed."""
167
+ terms = [t.lower() for t in terms if t]
168
+ scored = []
169
+ for n in self.nodes.values():
170
+ hay = (n.name + " " + n.summary).lower()
171
+ score = sum(2.0 if t in n.name.lower() else 1.0
172
+ for t in terms if t in hay)
173
+ if score > 0:
174
+ scored.append((score, n))
175
+ scored.sort(key=lambda s: (-s[0], s[1].name))
176
+ return scored[:limit]
177
+
178
+
179
+ # -- config -----------------------------------------------------------------
180
+ def config_path(root: str) -> str:
181
+ return os.path.join(root, GRAPH_DIR, CONFIG_FILE)
182
+
183
+
184
+ def load_config(root: str) -> dict:
185
+ cfg = dict(DEFAULT_CONFIG)
186
+ p = config_path(root)
187
+ if os.path.exists(p):
188
+ with open(p, encoding="utf-8") as f:
189
+ cfg.update(json.load(f))
190
+ return cfg
191
+
192
+
193
+ def find_root(start: str | None = None) -> str | None:
194
+ """Walk up from start (or cwd) to the directory containing .graphcoding/."""
195
+ d = os.path.abspath(start or os.getcwd())
196
+ while True:
197
+ if os.path.isdir(os.path.join(d, GRAPH_DIR)):
198
+ return d
199
+ parent = os.path.dirname(d)
200
+ if parent == d:
201
+ return None
202
+ d = parent
graphcoding/sync.py ADDED
@@ -0,0 +1,86 @@
1
+ """Sync — reconcile the graph with a set of changed files.
2
+
3
+ Sources of the change set:
4
+ --staged files staged right now (pre-commit)
5
+ --commit REF files changed in a commit (post-commit, default HEAD)
6
+ --files a b c explicit list
7
+ (none) every drifting file from a fresh drift report
8
+
9
+ Rules:
10
+ * added/modified file -> rescan; planned becomes ok (the design was built);
11
+ human summaries survive unless the file's own
12
+ docstring/comment is richer
13
+ * deleted file -> node removed, along with edges pointing at it
14
+ * to-be-deleted + gone -> node removed (deletion completed)
15
+ """
16
+ from __future__ import annotations
17
+
18
+ import os
19
+ import subprocess
20
+
21
+ from .drift import compute_drift
22
+ from .scan import scan_file, trackable
23
+ from .store import Graph
24
+
25
+
26
+ def _git_changed(root: str, staged: bool, commit: str | None) -> list[tuple[str, str]]:
27
+ if staged:
28
+ cmd = ["git", "-C", root, "diff", "--cached", "--name-status"]
29
+ else:
30
+ ref = commit or "HEAD"
31
+ cmd = ["git", "-C", root, "diff", "--name-status", f"{ref}~1..{ref}"]
32
+ try:
33
+ out = subprocess.run(cmd, capture_output=True, text=True, check=True).stdout
34
+ except (subprocess.CalledProcessError, FileNotFoundError):
35
+ return []
36
+ changes = []
37
+ for line in out.splitlines():
38
+ parts = line.split("\t")
39
+ if len(parts) >= 2:
40
+ changes.append((parts[-1], parts[0][0])) # renames: new path wins
41
+ return changes
42
+
43
+
44
+ def sync(root: str, cfg: dict, graph: Graph,
45
+ staged: bool = False, commit: str | None = None,
46
+ files: list[str] | None = None) -> dict:
47
+ if files:
48
+ changes = [(f, "D" if not os.path.exists(os.path.join(root, f)) else "M")
49
+ for f in files]
50
+ elif staged or commit:
51
+ changes = _git_changed(root, staged, commit)
52
+ else:
53
+ rep = compute_drift(root, cfg, graph)
54
+ changes = ([(p, "M") for p in rep["missing_node"] + rep["built_not_synced"]]
55
+ + [(p, "D") for p in rep["ghost_node"] + rep["not_deleted"]])
56
+ # not_deleted files still exist; deleting the node is wrong — the FILE
57
+ # should go. Surface them instead of silently "fixing" the graph.
58
+ changes = [(p, s) for p, s in changes if p not in rep["not_deleted"]]
59
+
60
+ upserted, removed, skipped = [], [], []
61
+ for path, st in changes:
62
+ if not trackable(path, cfg):
63
+ continue
64
+ if st == "D":
65
+ if os.path.exists(os.path.join(root, path)):
66
+ skipped.append(path) # marked deleted in git but still on disk
67
+ continue
68
+ # drop the file node and its symbol sub-nodes
69
+ for name in [n for n in graph.nodes
70
+ if n == path or n.startswith(path + "::")]:
71
+ graph.delete(name)
72
+ removed.append(path)
73
+ else:
74
+ node, subs = scan_file(root, path, cfg)
75
+ old = graph.nodes.get(path)
76
+ if old and len(old.summary) > len(node.summary):
77
+ node.summary = old.summary
78
+ graph.nodes[path] = node
79
+ for s in subs:
80
+ prev = graph.nodes.get(s.name)
81
+ if prev and len(prev.summary) > len(s.summary):
82
+ s.summary = prev.summary
83
+ graph.nodes[s.name] = s
84
+ upserted.append(path)
85
+ graph.save()
86
+ return {"upserted": upserted, "removed": removed, "skipped": skipped}