superlocalmemory 3.3.20 → 3.3.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/pyproject.toml +9 -1
- package/src/superlocalmemory/cli/commands.py +138 -22
- package/src/superlocalmemory/cli/daemon.py +372 -0
- package/src/superlocalmemory/cli/main.py +8 -0
- package/src/superlocalmemory/cli/pending_store.py +158 -0
- package/src/superlocalmemory/cli/setup_wizard.py +39 -6
- package/src/superlocalmemory/code_graph/__init__.py +46 -0
- package/src/superlocalmemory/code_graph/blast_radius.py +177 -0
- package/src/superlocalmemory/code_graph/bridge/__init__.py +36 -0
- package/src/superlocalmemory/code_graph/bridge/entity_resolver.py +464 -0
- package/src/superlocalmemory/code_graph/bridge/event_listeners.py +195 -0
- package/src/superlocalmemory/code_graph/bridge/fact_enricher.py +159 -0
- package/src/superlocalmemory/code_graph/bridge/hebbian_linker.py +170 -0
- package/src/superlocalmemory/code_graph/bridge/temporal_checker.py +152 -0
- package/src/superlocalmemory/code_graph/changes.py +363 -0
- package/src/superlocalmemory/code_graph/communities.py +299 -0
- package/src/superlocalmemory/code_graph/config.py +88 -0
- package/src/superlocalmemory/code_graph/database.py +482 -0
- package/src/superlocalmemory/code_graph/extractors/__init__.py +78 -0
- package/src/superlocalmemory/code_graph/extractors/python.py +413 -0
- package/src/superlocalmemory/code_graph/extractors/typescript.py +556 -0
- package/src/superlocalmemory/code_graph/flows.py +350 -0
- package/src/superlocalmemory/code_graph/git_hooks.py +226 -0
- package/src/superlocalmemory/code_graph/graph_engine.py +295 -0
- package/src/superlocalmemory/code_graph/graph_store.py +158 -0
- package/src/superlocalmemory/code_graph/incremental.py +200 -0
- package/src/superlocalmemory/code_graph/models.py +130 -0
- package/src/superlocalmemory/code_graph/parser.py +507 -0
- package/src/superlocalmemory/code_graph/resolver.py +321 -0
- package/src/superlocalmemory/code_graph/search.py +460 -0
- package/src/superlocalmemory/code_graph/service.py +95 -0
- package/src/superlocalmemory/code_graph/watcher.py +207 -0
- package/src/superlocalmemory/core/embedding_worker.py +4 -2
- package/src/superlocalmemory/core/embeddings.py +8 -2
- package/src/superlocalmemory/core/engine.py +32 -0
- package/src/superlocalmemory/core/engine_wiring.py +5 -0
- package/src/superlocalmemory/core/store_pipeline.py +23 -1
- package/src/superlocalmemory/encoding/fact_extractor.py +68 -7
- package/src/superlocalmemory/infra/event_bus.py +5 -0
- package/src/superlocalmemory/mcp/server.py +23 -0
- package/src/superlocalmemory/mcp/tools_code_graph.py +1592 -0
- package/src/superlocalmemory/retrieval/engine.py +137 -2
- package/src/superlocalmemory/retrieval/semantic_channel.py +6 -2
- package/src/superlocalmemory/retrieval/spreading_activation.py +5 -3
- package/src/superlocalmemory/retrieval/strategy.py +16 -0
- package/src/superlocalmemory/server/api.py +4 -2
- package/src/superlocalmemory/server/ui.py +5 -2
- package/src/superlocalmemory/storage/schema_code_graph.py +239 -0
- package/src/superlocalmemory/ui/index.html +1879 -0
- package/src/superlocalmemory/ui/js/agents.js +192 -0
- package/src/superlocalmemory/ui/js/auto-settings.js +399 -0
- package/src/superlocalmemory/ui/js/behavioral.js +276 -0
- package/src/superlocalmemory/ui/js/clusters.js +206 -0
- package/src/superlocalmemory/ui/js/compliance.js +252 -0
- package/src/superlocalmemory/ui/js/core.js +246 -0
- package/src/superlocalmemory/ui/js/dashboard.js +110 -0
- package/src/superlocalmemory/ui/js/events.js +178 -0
- package/src/superlocalmemory/ui/js/fact-detail.js +92 -0
- package/src/superlocalmemory/ui/js/feedback.js +333 -0
- package/src/superlocalmemory/ui/js/graph-core.js +447 -0
- package/src/superlocalmemory/ui/js/graph-filters.js +220 -0
- package/src/superlocalmemory/ui/js/graph-interactions.js +351 -0
- package/src/superlocalmemory/ui/js/graph-ui.js +214 -0
- package/src/superlocalmemory/ui/js/ide-status.js +102 -0
- package/src/superlocalmemory/ui/js/init.js +45 -0
- package/src/superlocalmemory/ui/js/learning.js +435 -0
- package/src/superlocalmemory/ui/js/lifecycle.js +298 -0
- package/src/superlocalmemory/ui/js/math-health.js +98 -0
- package/src/superlocalmemory/ui/js/memories.js +264 -0
- package/src/superlocalmemory/ui/js/modal.js +357 -0
- package/src/superlocalmemory/ui/js/patterns.js +93 -0
- package/src/superlocalmemory/ui/js/profiles.js +236 -0
- package/src/superlocalmemory/ui/js/recall-lab.js +292 -0
- package/src/superlocalmemory/ui/js/search.js +59 -0
- package/src/superlocalmemory/ui/js/settings.js +224 -0
- package/src/superlocalmemory/ui/js/timeline.js +32 -0
- package/src/superlocalmemory/ui/js/trust-dashboard.js +73 -0
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under the MIT License - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory v3.4 — CodeGraph Module
|
|
4
|
+
|
|
5
|
+
"""Data models for the CodeGraph module.
|
|
6
|
+
|
|
7
|
+
Frozen dataclasses + string enums. All immutable.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import time
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from enum import Enum
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
from superlocalmemory.storage.models import _new_id
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# ---------------------------------------------------------------------------
|
|
21
|
+
# Enums
|
|
22
|
+
# ---------------------------------------------------------------------------
|
|
23
|
+
|
|
24
|
+
class NodeKind(str, Enum):
|
|
25
|
+
"""Kind of code entity in the graph."""
|
|
26
|
+
FILE = "file"
|
|
27
|
+
CLASS = "class"
|
|
28
|
+
FUNCTION = "function"
|
|
29
|
+
METHOD = "method"
|
|
30
|
+
MODULE = "module"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class EdgeKind(str, Enum):
|
|
34
|
+
"""Kind of relationship between code entities."""
|
|
35
|
+
CALLS = "calls"
|
|
36
|
+
IMPORTS = "imports"
|
|
37
|
+
INHERITS = "inherits"
|
|
38
|
+
CONTAINS = "contains"
|
|
39
|
+
TESTED_BY = "tested_by"
|
|
40
|
+
DEPENDS_ON = "depends_on"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class LinkType(str, Enum):
|
|
44
|
+
"""Type of bridge link between code node and SLM memory."""
|
|
45
|
+
MENTIONS = "mentions"
|
|
46
|
+
DECISION_ABOUT = "decision_about"
|
|
47
|
+
BUG_FIX = "bug_fix"
|
|
48
|
+
REFACTOR = "refactor"
|
|
49
|
+
DESIGN_RATIONALE = "design_rationale"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# ---------------------------------------------------------------------------
|
|
53
|
+
# Frozen Dataclasses
|
|
54
|
+
# ---------------------------------------------------------------------------
|
|
55
|
+
|
|
56
|
+
@dataclass(frozen=True)
|
|
57
|
+
class GraphNode:
|
|
58
|
+
"""A code entity in the graph (function, class, file, etc.)."""
|
|
59
|
+
node_id: str = field(default_factory=_new_id)
|
|
60
|
+
kind: NodeKind = NodeKind.FUNCTION
|
|
61
|
+
name: str = ""
|
|
62
|
+
qualified_name: str = ""
|
|
63
|
+
file_path: str = ""
|
|
64
|
+
line_start: int = 0
|
|
65
|
+
line_end: int = 0
|
|
66
|
+
language: str = ""
|
|
67
|
+
parent_name: str | None = None
|
|
68
|
+
signature: str | None = None
|
|
69
|
+
docstring: str | None = None
|
|
70
|
+
is_test: bool = False
|
|
71
|
+
content_hash: str | None = None
|
|
72
|
+
community_id: int | None = None
|
|
73
|
+
extra_json: str = "{}"
|
|
74
|
+
created_at: float = field(default_factory=time.time)
|
|
75
|
+
updated_at: float = field(default_factory=time.time)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass(frozen=True)
|
|
79
|
+
class GraphEdge:
|
|
80
|
+
"""A relationship between two code entities."""
|
|
81
|
+
edge_id: str = field(default_factory=_new_id)
|
|
82
|
+
kind: EdgeKind = EdgeKind.CALLS
|
|
83
|
+
source_node_id: str = ""
|
|
84
|
+
target_node_id: str = ""
|
|
85
|
+
file_path: str = ""
|
|
86
|
+
line: int = 0
|
|
87
|
+
confidence: float = 1.0
|
|
88
|
+
extra_json: str = "{}"
|
|
89
|
+
created_at: float = field(default_factory=time.time)
|
|
90
|
+
updated_at: float = field(default_factory=time.time)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
@dataclass(frozen=True)
|
|
94
|
+
class FileRecord:
|
|
95
|
+
"""Tracking record for a parsed source file."""
|
|
96
|
+
file_path: str = ""
|
|
97
|
+
content_hash: str = ""
|
|
98
|
+
mtime: float = 0.0
|
|
99
|
+
language: str = ""
|
|
100
|
+
node_count: int = 0
|
|
101
|
+
edge_count: int = 0
|
|
102
|
+
last_indexed: float = field(default_factory=time.time)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@dataclass(frozen=True)
|
|
106
|
+
class CodeMemoryLink:
|
|
107
|
+
"""Bridge link between a code graph node and an SLM memory fact."""
|
|
108
|
+
link_id: str = field(default_factory=_new_id)
|
|
109
|
+
code_node_id: str = ""
|
|
110
|
+
slm_fact_id: str = ""
|
|
111
|
+
slm_entity_id: str | None = None
|
|
112
|
+
link_type: LinkType = LinkType.MENTIONS
|
|
113
|
+
confidence: float = 0.8
|
|
114
|
+
created_at: str = ""
|
|
115
|
+
last_verified: str | None = None
|
|
116
|
+
is_stale: bool = False
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
# ---------------------------------------------------------------------------
|
|
120
|
+
# Parse result containers (used by parser → database pipeline)
|
|
121
|
+
# ---------------------------------------------------------------------------
|
|
122
|
+
|
|
123
|
+
@dataclass(frozen=True)
|
|
124
|
+
class ParseResult:
|
|
125
|
+
"""Result of parsing a single file."""
|
|
126
|
+
file_path: str
|
|
127
|
+
nodes: tuple[GraphNode, ...]
|
|
128
|
+
edges: tuple[GraphEdge, ...]
|
|
129
|
+
file_record: FileRecord
|
|
130
|
+
errors: tuple[str, ...] = ()
|
|
@@ -0,0 +1,507 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under the MIT License - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory v3.4 — CodeGraph Module
|
|
4
|
+
|
|
5
|
+
"""Multi-language tree-sitter parser with parallel execution.
|
|
6
|
+
|
|
7
|
+
Dispatches to language-specific extractors (Python, TypeScript).
|
|
8
|
+
Uses ProcessPoolExecutor for CPU-bound parallel file parsing.
|
|
9
|
+
|
|
10
|
+
tree-sitter imports are lazy (HR-07): only imported when parse_file
|
|
11
|
+
is called, never at module-level or package import time.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import hashlib
|
|
17
|
+
import logging
|
|
18
|
+
import os
|
|
19
|
+
import time
|
|
20
|
+
from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
21
|
+
from fnmatch import fnmatch
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
from typing import Any
|
|
24
|
+
|
|
25
|
+
from superlocalmemory.code_graph.config import CodeGraphConfig
|
|
26
|
+
from superlocalmemory.code_graph.models import (
|
|
27
|
+
EdgeKind,
|
|
28
|
+
FileRecord,
|
|
29
|
+
GraphEdge,
|
|
30
|
+
GraphNode,
|
|
31
|
+
NodeKind,
|
|
32
|
+
ParseResult,
|
|
33
|
+
)
|
|
34
|
+
from superlocalmemory.storage.models import _new_id
|
|
35
|
+
|
|
36
|
+
logger = logging.getLogger(__name__)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class UnsupportedLanguageError(Exception):
|
|
40
|
+
"""Raised when a language is not supported."""
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class ParseError(Exception):
|
|
44
|
+
"""Raised when tree-sitter parsing fails."""
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _is_test_file(file_path: str, config: CodeGraphConfig) -> bool:
|
|
48
|
+
"""Check if a file is a test file based on config patterns."""
|
|
49
|
+
name = Path(file_path).name
|
|
50
|
+
parts = Path(file_path).parts
|
|
51
|
+
test_patterns = [
|
|
52
|
+
"test_*.py", "*_test.py",
|
|
53
|
+
"*.test.ts", "*.test.tsx", "*.spec.ts", "*.spec.tsx",
|
|
54
|
+
]
|
|
55
|
+
for pattern in test_patterns:
|
|
56
|
+
if fnmatch(name, pattern):
|
|
57
|
+
return True
|
|
58
|
+
# Check directory patterns
|
|
59
|
+
test_dirs = {"tests", "test", "__tests__", "spec"}
|
|
60
|
+
return bool(test_dirs.intersection(parts))
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _sha256(data: bytes) -> str:
|
|
64
|
+
"""Compute SHA-256 hex digest."""
|
|
65
|
+
return hashlib.sha256(data).hexdigest()
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _make_qualified_name(
|
|
69
|
+
file_path: str, name: str, parent_name: str | None
|
|
70
|
+
) -> str:
|
|
71
|
+
if parent_name:
|
|
72
|
+
return f"{file_path}::{parent_name}.{name}"
|
|
73
|
+
return f"{file_path}::{name}"
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# ---------------------------------------------------------------------------
|
|
77
|
+
# Module-level parse function (picklable for ProcessPoolExecutor)
|
|
78
|
+
# ---------------------------------------------------------------------------
|
|
79
|
+
|
|
80
|
+
def _parse_file_standalone(
|
|
81
|
+
file_path_str: str,
|
|
82
|
+
source_bytes: bytes,
|
|
83
|
+
language: str,
|
|
84
|
+
config_dict: dict[str, Any],
|
|
85
|
+
) -> dict[str, Any]:
|
|
86
|
+
"""Parse a single file. Module-level function for pickling.
|
|
87
|
+
|
|
88
|
+
Returns a serializable dict with nodes, edges, errors.
|
|
89
|
+
"""
|
|
90
|
+
try:
|
|
91
|
+
# Lazy import (HR-07)
|
|
92
|
+
from tree_sitter_language_pack import get_parser # noqa: F811
|
|
93
|
+
|
|
94
|
+
parser_instance = get_parser(language)
|
|
95
|
+
tree = parser_instance.parse(source_bytes)
|
|
96
|
+
root = tree.root_node
|
|
97
|
+
|
|
98
|
+
# Create a minimal config for the extractor
|
|
99
|
+
config = CodeGraphConfig(**{
|
|
100
|
+
k: v for k, v in config_dict.items()
|
|
101
|
+
if k in CodeGraphConfig.__dataclass_fields__
|
|
102
|
+
})
|
|
103
|
+
|
|
104
|
+
# Select extractor
|
|
105
|
+
if language == "python":
|
|
106
|
+
from superlocalmemory.code_graph.extractors.python import PythonExtractor
|
|
107
|
+
extractor = PythonExtractor(root, source_bytes, file_path_str, config)
|
|
108
|
+
elif language in ("typescript", "tsx", "javascript", "jsx"):
|
|
109
|
+
from superlocalmemory.code_graph.extractors.typescript import TypeScriptExtractor
|
|
110
|
+
extractor = TypeScriptExtractor(root, source_bytes, file_path_str, config)
|
|
111
|
+
else:
|
|
112
|
+
return {"nodes": [], "edges": [], "errors": [f"Unsupported language: {language}"]}
|
|
113
|
+
|
|
114
|
+
nodes, edges = extractor.extract()
|
|
115
|
+
|
|
116
|
+
return {
|
|
117
|
+
"nodes": nodes,
|
|
118
|
+
"edges": edges,
|
|
119
|
+
"errors": [],
|
|
120
|
+
}
|
|
121
|
+
except Exception as exc:
|
|
122
|
+
return {
|
|
123
|
+
"nodes": [],
|
|
124
|
+
"edges": [],
|
|
125
|
+
"errors": [str(exc)],
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class CodeParser:
|
|
130
|
+
"""Multi-language tree-sitter parser with parallel execution."""
|
|
131
|
+
|
|
132
|
+
def __init__(self, config: CodeGraphConfig) -> None:
|
|
133
|
+
"""Store config. Does not import tree-sitter yet (lazy)."""
|
|
134
|
+
self._config = config
|
|
135
|
+
|
|
136
|
+
def discover_files(self, repo_root: Path) -> list[Path]:
|
|
137
|
+
"""Find all parseable files under repo_root.
|
|
138
|
+
|
|
139
|
+
Returns relative paths sorted alphabetically.
|
|
140
|
+
Raises FileNotFoundError if repo_root does not exist.
|
|
141
|
+
"""
|
|
142
|
+
if not repo_root.exists():
|
|
143
|
+
raise FileNotFoundError(f"Repository root does not exist: {repo_root}")
|
|
144
|
+
|
|
145
|
+
results: list[Path] = []
|
|
146
|
+
exclude_dirs = self._config.exclude_dirs
|
|
147
|
+
|
|
148
|
+
for dirpath, dirnames, filenames in os.walk(repo_root):
|
|
149
|
+
# Prune excluded directories (modifying dirnames in-place)
|
|
150
|
+
dirnames[:] = [
|
|
151
|
+
d for d in dirnames
|
|
152
|
+
if d not in exclude_dirs
|
|
153
|
+
and not any(fnmatch(d, p) for p in exclude_dirs)
|
|
154
|
+
]
|
|
155
|
+
|
|
156
|
+
for filename in filenames:
|
|
157
|
+
# Check extension
|
|
158
|
+
ext = Path(filename).suffix
|
|
159
|
+
if ext not in self._config.extension_map:
|
|
160
|
+
continue
|
|
161
|
+
|
|
162
|
+
# Check file size
|
|
163
|
+
full_path = Path(dirpath) / filename
|
|
164
|
+
try:
|
|
165
|
+
size = full_path.stat().st_size
|
|
166
|
+
except OSError:
|
|
167
|
+
continue
|
|
168
|
+
|
|
169
|
+
if size > self._config.max_file_size_bytes:
|
|
170
|
+
logger.warning(
|
|
171
|
+
"Skipping large file (%d bytes): %s", size, full_path
|
|
172
|
+
)
|
|
173
|
+
continue
|
|
174
|
+
|
|
175
|
+
# Check exclude patterns
|
|
176
|
+
rel = full_path.relative_to(repo_root)
|
|
177
|
+
skip = False
|
|
178
|
+
for pattern in self._config.exclude_patterns:
|
|
179
|
+
if fnmatch(str(rel), pattern) or fnmatch(filename, pattern):
|
|
180
|
+
skip = True
|
|
181
|
+
break
|
|
182
|
+
if skip:
|
|
183
|
+
continue
|
|
184
|
+
|
|
185
|
+
results.append(rel)
|
|
186
|
+
|
|
187
|
+
return sorted(results)
|
|
188
|
+
|
|
189
|
+
def parse_file(
|
|
190
|
+
self,
|
|
191
|
+
file_path: Path,
|
|
192
|
+
source_bytes: bytes,
|
|
193
|
+
language: str,
|
|
194
|
+
) -> tuple[list[GraphNode], list[GraphEdge]]:
|
|
195
|
+
"""Parse a single file and return extracted nodes and edges.
|
|
196
|
+
|
|
197
|
+
Raises UnsupportedLanguageError if language is not supported.
|
|
198
|
+
"""
|
|
199
|
+
supported = {"python", "typescript", "tsx", "javascript", "jsx"}
|
|
200
|
+
if language not in supported:
|
|
201
|
+
raise UnsupportedLanguageError(f"Unsupported language: {language}")
|
|
202
|
+
|
|
203
|
+
try:
|
|
204
|
+
from tree_sitter_language_pack import get_parser
|
|
205
|
+
except ImportError as exc:
|
|
206
|
+
raise ImportError(
|
|
207
|
+
"tree-sitter required. Install: pip install 'superlocalmemory[code-graph]'"
|
|
208
|
+
) from exc
|
|
209
|
+
|
|
210
|
+
parser = get_parser(language)
|
|
211
|
+
tree = parser.parse(source_bytes)
|
|
212
|
+
root = tree.root_node
|
|
213
|
+
|
|
214
|
+
file_path_str = str(file_path)
|
|
215
|
+
content_hash = _sha256(source_bytes)
|
|
216
|
+
|
|
217
|
+
# Create File node
|
|
218
|
+
file_node = GraphNode(
|
|
219
|
+
node_id=_new_id(),
|
|
220
|
+
kind=NodeKind.FILE,
|
|
221
|
+
name=file_path.name,
|
|
222
|
+
qualified_name=file_path_str,
|
|
223
|
+
file_path=file_path_str,
|
|
224
|
+
line_start=0,
|
|
225
|
+
line_end=root.end_point[0],
|
|
226
|
+
language=language,
|
|
227
|
+
content_hash=content_hash,
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
# Select and run extractor
|
|
231
|
+
if language == "python":
|
|
232
|
+
from superlocalmemory.code_graph.extractors.python import PythonExtractor
|
|
233
|
+
extractor = PythonExtractor(root, source_bytes, file_path_str, self._config)
|
|
234
|
+
else:
|
|
235
|
+
from superlocalmemory.code_graph.extractors.typescript import TypeScriptExtractor
|
|
236
|
+
extractor = TypeScriptExtractor(root, source_bytes, file_path_str, self._config)
|
|
237
|
+
|
|
238
|
+
extracted_nodes, extracted_edges = extractor.extract()
|
|
239
|
+
|
|
240
|
+
# Check if test file
|
|
241
|
+
is_test = _is_test_file(file_path_str, self._config)
|
|
242
|
+
|
|
243
|
+
# Mark test functions
|
|
244
|
+
if is_test:
|
|
245
|
+
marked_nodes: list[GraphNode] = []
|
|
246
|
+
for node in extracted_nodes:
|
|
247
|
+
if node.kind in (NodeKind.FUNCTION, NodeKind.METHOD):
|
|
248
|
+
marked_nodes.append(GraphNode(
|
|
249
|
+
node_id=node.node_id,
|
|
250
|
+
kind=node.kind,
|
|
251
|
+
name=node.name,
|
|
252
|
+
qualified_name=node.qualified_name,
|
|
253
|
+
file_path=node.file_path,
|
|
254
|
+
line_start=node.line_start,
|
|
255
|
+
line_end=node.line_end,
|
|
256
|
+
language=node.language,
|
|
257
|
+
parent_name=node.parent_name,
|
|
258
|
+
signature=node.signature,
|
|
259
|
+
docstring=node.docstring,
|
|
260
|
+
is_test=True,
|
|
261
|
+
content_hash=node.content_hash,
|
|
262
|
+
extra_json=node.extra_json,
|
|
263
|
+
))
|
|
264
|
+
else:
|
|
265
|
+
marked_nodes.append(node)
|
|
266
|
+
extracted_nodes = marked_nodes
|
|
267
|
+
|
|
268
|
+
all_nodes = [file_node] + extracted_nodes
|
|
269
|
+
|
|
270
|
+
# Generate CONTAINS edges
|
|
271
|
+
contains_edges = self._generate_contains_edges(file_node, extracted_nodes)
|
|
272
|
+
|
|
273
|
+
# Generate TESTED_BY edges
|
|
274
|
+
tested_by_edges = self._generate_tested_by_edges(
|
|
275
|
+
extracted_nodes, extracted_edges
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
all_edges = extracted_edges + contains_edges + tested_by_edges
|
|
279
|
+
|
|
280
|
+
return all_nodes, all_edges
|
|
281
|
+
|
|
282
|
+
def parse_all(
|
|
283
|
+
self, repo_root: Path
|
|
284
|
+
) -> tuple[list[GraphNode], list[GraphEdge], list[FileRecord]]:
|
|
285
|
+
"""Parse entire project in parallel.
|
|
286
|
+
|
|
287
|
+
Returns (all_nodes, all_edges, all_file_records).
|
|
288
|
+
"""
|
|
289
|
+
files = self.discover_files(repo_root)
|
|
290
|
+
if not files:
|
|
291
|
+
return [], [], []
|
|
292
|
+
|
|
293
|
+
all_nodes: list[GraphNode] = []
|
|
294
|
+
all_edges: list[GraphEdge] = []
|
|
295
|
+
all_file_records: list[FileRecord] = []
|
|
296
|
+
|
|
297
|
+
# Read files and prepare tasks
|
|
298
|
+
tasks: list[tuple[Path, bytes, str]] = []
|
|
299
|
+
for rel_path in files:
|
|
300
|
+
full_path = repo_root / rel_path
|
|
301
|
+
try:
|
|
302
|
+
source_bytes = full_path.read_bytes()
|
|
303
|
+
except OSError as exc:
|
|
304
|
+
logger.warning("Failed to read %s: %s", full_path, exc)
|
|
305
|
+
continue
|
|
306
|
+
|
|
307
|
+
ext = rel_path.suffix
|
|
308
|
+
language = self._config.extension_map.get(ext)
|
|
309
|
+
if language is None:
|
|
310
|
+
continue
|
|
311
|
+
|
|
312
|
+
tasks.append((rel_path, source_bytes, language))
|
|
313
|
+
|
|
314
|
+
# Parse with ProcessPoolExecutor for parallel CPU-bound work
|
|
315
|
+
# For small numbers of files, run sequentially to avoid overhead
|
|
316
|
+
if len(tasks) <= 2:
|
|
317
|
+
for rel_path, source_bytes, language in tasks:
|
|
318
|
+
try:
|
|
319
|
+
nodes, edges = self.parse_file(rel_path, source_bytes, language)
|
|
320
|
+
all_nodes.extend(nodes)
|
|
321
|
+
all_edges.extend(edges)
|
|
322
|
+
all_file_records.append(FileRecord(
|
|
323
|
+
file_path=str(rel_path),
|
|
324
|
+
content_hash=_sha256(source_bytes),
|
|
325
|
+
mtime=(repo_root / rel_path).stat().st_mtime,
|
|
326
|
+
language=language,
|
|
327
|
+
node_count=len(nodes),
|
|
328
|
+
edge_count=len(edges),
|
|
329
|
+
last_indexed=time.time(),
|
|
330
|
+
))
|
|
331
|
+
except Exception as exc:
|
|
332
|
+
logger.warning("Failed to parse %s: %s", rel_path, exc)
|
|
333
|
+
return all_nodes, all_edges, all_file_records
|
|
334
|
+
|
|
335
|
+
# Parallel execution
|
|
336
|
+
config_dict = {
|
|
337
|
+
field_name: getattr(self._config, field_name)
|
|
338
|
+
for field_name in CodeGraphConfig.__dataclass_fields__
|
|
339
|
+
if not isinstance(getattr(self._config, field_name), Path)
|
|
340
|
+
}
|
|
341
|
+
# Convert Path fields to strings
|
|
342
|
+
config_dict["repo_root"] = str(self._config.repo_root)
|
|
343
|
+
|
|
344
|
+
workers = min(self._config.parallel_workers, len(tasks))
|
|
345
|
+
with ProcessPoolExecutor(max_workers=workers) as executor:
|
|
346
|
+
future_map = {}
|
|
347
|
+
for rel_path, source_bytes, language in tasks:
|
|
348
|
+
future = executor.submit(
|
|
349
|
+
_parse_file_standalone,
|
|
350
|
+
str(rel_path),
|
|
351
|
+
source_bytes,
|
|
352
|
+
language,
|
|
353
|
+
config_dict,
|
|
354
|
+
)
|
|
355
|
+
future_map[future] = (rel_path, source_bytes, language)
|
|
356
|
+
|
|
357
|
+
for future in as_completed(future_map):
|
|
358
|
+
rel_path, source_bytes, language = future_map[future]
|
|
359
|
+
try:
|
|
360
|
+
result = future.result(timeout=self._config.parse_timeout_seconds)
|
|
361
|
+
except Exception as exc:
|
|
362
|
+
logger.warning("Parse failed for %s: %s", rel_path, exc)
|
|
363
|
+
continue
|
|
364
|
+
|
|
365
|
+
if result["errors"]:
|
|
366
|
+
for err in result["errors"]:
|
|
367
|
+
logger.warning("Parse error in %s: %s", rel_path, err)
|
|
368
|
+
if not result["nodes"]:
|
|
369
|
+
continue
|
|
370
|
+
|
|
371
|
+
file_nodes = result["nodes"]
|
|
372
|
+
file_edges = result["edges"]
|
|
373
|
+
|
|
374
|
+
# Build the full parse result with file node and CONTAINS edges
|
|
375
|
+
file_path_str = str(rel_path)
|
|
376
|
+
content_hash = _sha256(source_bytes)
|
|
377
|
+
|
|
378
|
+
file_node = GraphNode(
|
|
379
|
+
node_id=_new_id(),
|
|
380
|
+
kind=NodeKind.FILE,
|
|
381
|
+
name=rel_path.name,
|
|
382
|
+
qualified_name=file_path_str,
|
|
383
|
+
file_path=file_path_str,
|
|
384
|
+
line_start=0,
|
|
385
|
+
line_end=0,
|
|
386
|
+
language=language,
|
|
387
|
+
content_hash=content_hash,
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
is_test = _is_test_file(file_path_str, self._config)
|
|
391
|
+
if is_test:
|
|
392
|
+
marked: list[GraphNode] = []
|
|
393
|
+
for n in file_nodes:
|
|
394
|
+
if n.kind in (NodeKind.FUNCTION, NodeKind.METHOD):
|
|
395
|
+
marked.append(GraphNode(
|
|
396
|
+
node_id=n.node_id, kind=n.kind, name=n.name,
|
|
397
|
+
qualified_name=n.qualified_name,
|
|
398
|
+
file_path=n.file_path,
|
|
399
|
+
line_start=n.line_start, line_end=n.line_end,
|
|
400
|
+
language=n.language, parent_name=n.parent_name,
|
|
401
|
+
signature=n.signature, docstring=n.docstring,
|
|
402
|
+
is_test=True, content_hash=n.content_hash,
|
|
403
|
+
extra_json=n.extra_json,
|
|
404
|
+
))
|
|
405
|
+
else:
|
|
406
|
+
marked.append(n)
|
|
407
|
+
file_nodes = marked
|
|
408
|
+
|
|
409
|
+
contains = self._generate_contains_edges(file_node, file_nodes)
|
|
410
|
+
tested_by = self._generate_tested_by_edges(file_nodes, file_edges)
|
|
411
|
+
|
|
412
|
+
final_nodes = [file_node] + file_nodes
|
|
413
|
+
final_edges = file_edges + contains + tested_by
|
|
414
|
+
|
|
415
|
+
all_nodes.extend(final_nodes)
|
|
416
|
+
all_edges.extend(final_edges)
|
|
417
|
+
|
|
418
|
+
try:
|
|
419
|
+
mtime = (repo_root / rel_path).stat().st_mtime
|
|
420
|
+
except OSError:
|
|
421
|
+
mtime = 0.0
|
|
422
|
+
|
|
423
|
+
all_file_records.append(FileRecord(
|
|
424
|
+
file_path=file_path_str,
|
|
425
|
+
content_hash=content_hash,
|
|
426
|
+
mtime=mtime,
|
|
427
|
+
language=language,
|
|
428
|
+
node_count=len(final_nodes),
|
|
429
|
+
edge_count=len(final_edges),
|
|
430
|
+
last_indexed=time.time(),
|
|
431
|
+
))
|
|
432
|
+
|
|
433
|
+
return all_nodes, all_edges, all_file_records
|
|
434
|
+
|
|
435
|
+
# ------------------------------------------------------------------
|
|
436
|
+
# Private helpers
|
|
437
|
+
# ------------------------------------------------------------------
|
|
438
|
+
|
|
439
|
+
@staticmethod
|
|
440
|
+
def _generate_contains_edges(
|
|
441
|
+
file_node: GraphNode, extracted_nodes: list[GraphNode]
|
|
442
|
+
) -> list[GraphEdge]:
|
|
443
|
+
"""Generate CONTAINS edges: File -> top-level, parent -> child."""
|
|
444
|
+
edges: list[GraphEdge] = []
|
|
445
|
+
# Build name -> node_id map for parent lookup
|
|
446
|
+
name_to_id: dict[str, str] = {}
|
|
447
|
+
for node in extracted_nodes:
|
|
448
|
+
name_to_id[node.name] = node.node_id
|
|
449
|
+
|
|
450
|
+
for node in extracted_nodes:
|
|
451
|
+
if node.parent_name is None:
|
|
452
|
+
# Top-level: File contains this node
|
|
453
|
+
edges.append(GraphEdge(
|
|
454
|
+
edge_id=_new_id(),
|
|
455
|
+
kind=EdgeKind.CONTAINS,
|
|
456
|
+
source_node_id=file_node.node_id,
|
|
457
|
+
target_node_id=node.node_id,
|
|
458
|
+
file_path=file_node.file_path,
|
|
459
|
+
line=node.line_start,
|
|
460
|
+
))
|
|
461
|
+
else:
|
|
462
|
+
# Child: parent contains this node
|
|
463
|
+
parent_id = name_to_id.get(node.parent_name)
|
|
464
|
+
if parent_id:
|
|
465
|
+
edges.append(GraphEdge(
|
|
466
|
+
edge_id=_new_id(),
|
|
467
|
+
kind=EdgeKind.CONTAINS,
|
|
468
|
+
source_node_id=parent_id,
|
|
469
|
+
target_node_id=node.node_id,
|
|
470
|
+
file_path=file_node.file_path,
|
|
471
|
+
line=node.line_start,
|
|
472
|
+
))
|
|
473
|
+
else:
|
|
474
|
+
# Fallback: File contains
|
|
475
|
+
edges.append(GraphEdge(
|
|
476
|
+
edge_id=_new_id(),
|
|
477
|
+
kind=EdgeKind.CONTAINS,
|
|
478
|
+
source_node_id=file_node.node_id,
|
|
479
|
+
target_node_id=node.node_id,
|
|
480
|
+
file_path=file_node.file_path,
|
|
481
|
+
line=node.line_start,
|
|
482
|
+
))
|
|
483
|
+
return edges
|
|
484
|
+
|
|
485
|
+
@staticmethod
|
|
486
|
+
def _generate_tested_by_edges(
|
|
487
|
+
nodes: list[GraphNode], edges: list[GraphEdge]
|
|
488
|
+
) -> list[GraphEdge]:
|
|
489
|
+
"""Generate TESTED_BY edges: for each CALLS from test to non-test."""
|
|
490
|
+
test_node_ids = {n.node_id for n in nodes if n.is_test}
|
|
491
|
+
if not test_node_ids:
|
|
492
|
+
return []
|
|
493
|
+
|
|
494
|
+
tested_by: list[GraphEdge] = []
|
|
495
|
+
for edge in edges:
|
|
496
|
+
if edge.kind == EdgeKind.CALLS and edge.source_node_id in test_node_ids:
|
|
497
|
+
# Only if target is not a test node
|
|
498
|
+
if edge.target_node_id not in test_node_ids:
|
|
499
|
+
tested_by.append(GraphEdge(
|
|
500
|
+
edge_id=_new_id(),
|
|
501
|
+
kind=EdgeKind.TESTED_BY,
|
|
502
|
+
source_node_id=edge.target_node_id,
|
|
503
|
+
target_node_id=edge.source_node_id,
|
|
504
|
+
file_path=edge.file_path,
|
|
505
|
+
line=edge.line,
|
|
506
|
+
))
|
|
507
|
+
return tested_by
|