knowcode 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- knowcode-0.1.0.dist-info/METADATA +175 -0
- knowcode-0.1.0.dist-info/RECORD +63 -0
- knowcode-0.1.0.dist-info/WHEEL +4 -0
- knowcode-0.1.0.dist-info/entry_points.txt +2 -0
- runtime/__init__.py +4 -0
- runtime/artifact/__init__.py +1 -0
- runtime/artifact/builder.py +179 -0
- runtime/cli/__init__.py +1 -0
- runtime/cli/animation.py +278 -0
- runtime/cli/app.py +309 -0
- runtime/cli/auth.py +171 -0
- runtime/cli/telemetry.py +91 -0
- runtime/exceptions/__init__.py +1 -0
- runtime/exceptions/errors.py +99 -0
- runtime/repository/__init__.py +13 -0
- runtime/repository/discovery.py +64 -0
- runtime/repository/models.py +103 -0
- runtime/repository/paths.py +50 -0
- runtime/repository/validator.py +100 -0
- runtime/services/__init__.py +1 -0
- runtime/services/ingest_service.py +105 -0
- runtime/services/init_service.py +45 -0
- runtime/services/semantic_sync_service.py +55 -0
- runtime/services/status_service.py +40 -0
- runtime/services/sync_service.py +57 -0
- runtime/templates/KNOWCODE_LOADER.md.j2 +24 -0
- runtime/templates/README_KNOWLEDGE.md.j2 +12 -0
- runtime/templates/README_STRUCTURE.md.j2 +19 -0
- runtime/templates/__init__.py +1 -0
- runtime/templates/active_context.md.j2 +3 -0
- runtime/templates/ingest_legacy.md.j2 +15 -0
- runtime/templates/raw_readme.md.j2 +9 -0
- runtime/templates/sync_reconciliation.md.j2 +17 -0
- runtime/templates/synthesize_knowledge.md.j2 +32 -0
- runtime/templates/track_intent.md.j2 +14 -0
- structural_engine/__init__.py +3 -0
- structural_engine/diff/__init__.py +1 -0
- structural_engine/diff/generator.py +92 -0
- structural_engine/diff/models.py +48 -0
- structural_engine/engine.py +192 -0
- structural_engine/logs/__init__.py +1 -0
- structural_engine/logs/generator.py +33 -0
- structural_engine/parser/__init__.py +7 -0
- structural_engine/parser/discovery.py +165 -0
- structural_engine/parser/extractors/base.py +44 -0
- structural_engine/parser/languages/javascript/adapter.py +149 -0
- structural_engine/parser/languages/python/adapter.py +174 -0
- structural_engine/parser/languages/typescript/adapter.py +165 -0
- structural_engine/parser/models.py +186 -0
- structural_engine/parser/parser.py +160 -0
- structural_engine/parser/resolvers/calls.py +105 -0
- structural_engine/parser/tree_sitter/registry.py +61 -0
- structural_engine/reports/__init__.py +1 -0
- structural_engine/reports/generator.py +77 -0
- structural_engine/results.py +54 -0
- structural_engine/revisions/__init__.py +1 -0
- structural_engine/revisions/tracker.py +32 -0
- structural_engine/snapshot/__init__.py +1 -0
- structural_engine/snapshot/generator.py +58 -0
- structural_engine/snapshot/loader.py +59 -0
- structural_engine/state/__init__.py +1 -0
- structural_engine/state/manager.py +169 -0
- structural_engine/state/models.py +34 -0
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
"""Structural Engine Facade.
|
|
2
|
+
|
|
3
|
+
The single public entry point for all structural operations.
|
|
4
|
+
Orchestrates parsers, state, diffs, and generators safely behind
|
|
5
|
+
a strict facade.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import structlog
|
|
11
|
+
from ruamel.yaml import YAML
|
|
12
|
+
|
|
13
|
+
from runtime.exceptions.errors import StructuralEngineFailure
|
|
14
|
+
from runtime.repository.models import RepositoryPaths
|
|
15
|
+
from structural_engine.diff.generator import generate as generate_diff
|
|
16
|
+
from structural_engine.logs.generator import append as append_log
|
|
17
|
+
from structural_engine.parser.parser import parse
|
|
18
|
+
from structural_engine.reports.generator import generate as generate_report
|
|
19
|
+
from structural_engine.results import InitializationResult, StructuralStatusResult, SyncResult
|
|
20
|
+
from structural_engine.revisions.tracker import get_next_revision
|
|
21
|
+
from structural_engine.snapshot.generator import persist as persist_snapshot
|
|
22
|
+
from structural_engine.snapshot.loader import load as load_snapshot
|
|
23
|
+
from structural_engine.state.manager import StateManager
|
|
24
|
+
|
|
25
|
+
logger = structlog.get_logger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class StructuralEngine:
|
|
29
|
+
"""Facade for the Structural Engine."""
|
|
30
|
+
|
|
31
|
+
def __init__(self) -> None:
|
|
32
|
+
self.state_manager = StateManager()
|
|
33
|
+
|
|
34
|
+
def initialize(self, paths: RepositoryPaths) -> InitializationResult:
|
|
35
|
+
"""Initialize the structural state of the repository.
|
|
36
|
+
|
|
37
|
+
Must only be called after the Runtime has scaffolded the
|
|
38
|
+
.knowcode directory via ArtifactBuilder.
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
paths : RepositoryPaths
|
|
43
|
+
Canonical paths.
|
|
44
|
+
|
|
45
|
+
Returns
|
|
46
|
+
-------
|
|
47
|
+
InitializationResult
|
|
48
|
+
Success and initial S-001 markers.
|
|
49
|
+
"""
|
|
50
|
+
try:
|
|
51
|
+
logger.info("engine.initialize.started")
|
|
52
|
+
|
|
53
|
+
# 1. Parse current state
|
|
54
|
+
snapshot = parse(paths)
|
|
55
|
+
|
|
56
|
+
# 2. Persist initial snapshot
|
|
57
|
+
revision_id = "S-001"
|
|
58
|
+
snapshot_file = persist_snapshot(snapshot, revision_id, paths)
|
|
59
|
+
|
|
60
|
+
# 3. Initialize state.yaml
|
|
61
|
+
self.state_manager.initialize(paths)
|
|
62
|
+
|
|
63
|
+
logger.info("engine.initialize.complete", revision=revision_id)
|
|
64
|
+
return InitializationResult(
|
|
65
|
+
success=True,
|
|
66
|
+
structural_revision=revision_id,
|
|
67
|
+
snapshot_file=snapshot_file,
|
|
68
|
+
message="Structural Engine initialized successfully.",
|
|
69
|
+
)
|
|
70
|
+
except Exception as e:
|
|
71
|
+
logger.exception("engine.initialize.failed")
|
|
72
|
+
raise StructuralEngineFailure(f"Failed to initialize Engine: {e}") from e
|
|
73
|
+
|
|
74
|
+
def status(self, paths: RepositoryPaths) -> StructuralStatusResult:
|
|
75
|
+
"""Get the current status of the engine.
|
|
76
|
+
|
|
77
|
+
Parameters
|
|
78
|
+
----------
|
|
79
|
+
paths : RepositoryPaths
|
|
80
|
+
Canonical paths.
|
|
81
|
+
|
|
82
|
+
Returns
|
|
83
|
+
-------
|
|
84
|
+
StructuralStatusResult
|
|
85
|
+
Combined structural state and unowned passthrough fields.
|
|
86
|
+
"""
|
|
87
|
+
try:
|
|
88
|
+
state = self.state_manager.load(paths)
|
|
89
|
+
|
|
90
|
+
# Read semantic_revision manually for the passthrough
|
|
91
|
+
yaml = YAML()
|
|
92
|
+
with open(paths.state_file, "r", encoding="utf-8") as f:
|
|
93
|
+
raw_data = yaml.load(f)
|
|
94
|
+
semantic_rev = raw_data.get("semantic_revision", "none")
|
|
95
|
+
|
|
96
|
+
return StructuralStatusResult(
|
|
97
|
+
initialized=True,
|
|
98
|
+
structural_revision=state.structural_revision,
|
|
99
|
+
semantic_revision=semantic_rev,
|
|
100
|
+
current_snapshot=state.current_snapshot,
|
|
101
|
+
latest_report=state.latest_report,
|
|
102
|
+
last_sync=state.last_sync,
|
|
103
|
+
repository_root=str(paths.repo_root),
|
|
104
|
+
)
|
|
105
|
+
except Exception as e:
|
|
106
|
+
logger.exception("engine.status.failed")
|
|
107
|
+
raise StructuralEngineFailure(f"Failed to read engine status: {e}") from e
|
|
108
|
+
|
|
109
|
+
def sync(self, paths: RepositoryPaths) -> SyncResult:
|
|
110
|
+
"""Synchronize the engine with the current repository filesystem.
|
|
111
|
+
|
|
112
|
+
Follows strict persistence ordering:
|
|
113
|
+
Parse -> Diff -> Persist Snapshot -> Persist Report -> Persist Log -> Update State.
|
|
114
|
+
|
|
115
|
+
Parameters
|
|
116
|
+
----------
|
|
117
|
+
paths : RepositoryPaths
|
|
118
|
+
Canonical paths.
|
|
119
|
+
|
|
120
|
+
Returns
|
|
121
|
+
-------
|
|
122
|
+
SyncResult
|
|
123
|
+
Outcome of the sync operation.
|
|
124
|
+
"""
|
|
125
|
+
try:
|
|
126
|
+
logger.info("engine.sync.started")
|
|
127
|
+
|
|
128
|
+
# 1. Load previous state
|
|
129
|
+
state = self.state_manager.load(paths)
|
|
130
|
+
prev_snapshot = load_snapshot(state.current_snapshot, paths)
|
|
131
|
+
if prev_snapshot is None:
|
|
132
|
+
raise StructuralEngineFailure(f"Missing snapshot: {state.current_snapshot}")
|
|
133
|
+
|
|
134
|
+
# 2. Parse current state
|
|
135
|
+
curr_snapshot = parse(paths)
|
|
136
|
+
|
|
137
|
+
# 3. Compute Diff
|
|
138
|
+
diff = generate_diff(prev_snapshot, curr_snapshot)
|
|
139
|
+
|
|
140
|
+
# 4. No-Change Shortcut
|
|
141
|
+
if not diff.has_changes:
|
|
142
|
+
logger.info("engine.sync.no_changes")
|
|
143
|
+
return SyncResult(
|
|
144
|
+
success=True,
|
|
145
|
+
changes_detected=False,
|
|
146
|
+
structural_revision=state.structural_revision,
|
|
147
|
+
snapshot_file=state.current_snapshot,
|
|
148
|
+
report_file=state.latest_report,
|
|
149
|
+
affected_components=frozenset(),
|
|
150
|
+
message="No structural changes detected.",
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
# 5. Changes detected — execute strict persistence order
|
|
154
|
+
next_rev = get_next_revision(state.structural_revision)
|
|
155
|
+
|
|
156
|
+
# 5a. Persist Snapshot
|
|
157
|
+
snapshot_file = persist_snapshot(curr_snapshot, next_rev, paths)
|
|
158
|
+
|
|
159
|
+
# 5b. Persist Report
|
|
160
|
+
report_file = generate_report(diff, next_rev, paths)
|
|
161
|
+
|
|
162
|
+
# 5c. Persist Log
|
|
163
|
+
append_log(next_rev, report_file, "SUCCESS", paths)
|
|
164
|
+
|
|
165
|
+
# 5d. Update State (MUST BE LAST)
|
|
166
|
+
self.state_manager.update(
|
|
167
|
+
paths=paths,
|
|
168
|
+
snapshot_id=snapshot_file,
|
|
169
|
+
revision_id=next_rev,
|
|
170
|
+
report_id=report_file,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
logger.info("engine.sync.complete", next_revision=next_rev)
|
|
174
|
+
return SyncResult(
|
|
175
|
+
success=True,
|
|
176
|
+
changes_detected=True,
|
|
177
|
+
structural_revision=next_rev,
|
|
178
|
+
snapshot_file=snapshot_file,
|
|
179
|
+
report_file=report_file,
|
|
180
|
+
affected_components=diff.affected_components,
|
|
181
|
+
message=f"Sync complete: {next_rev}",
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
except Exception as e:
|
|
185
|
+
logger.exception("engine.sync.failed")
|
|
186
|
+
# Attempt to log failure if possible, but safely
|
|
187
|
+
try:
|
|
188
|
+
state = self.state_manager.load(paths)
|
|
189
|
+
append_log(state.structural_revision, "none", f"FAILED: {e}", paths)
|
|
190
|
+
except Exception:
|
|
191
|
+
pass
|
|
192
|
+
raise StructuralEngineFailure(f"Sync failed: {e}") from e
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Logs subsystem.
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Log generator.
|
|
2
|
+
|
|
3
|
+
Appends structured timestamps and operations to the sync log.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
|
+
|
|
10
|
+
from runtime.repository.models import RepositoryPaths
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def append(revision_id: str, report_id: str, status: str, paths: RepositoryPaths) -> None:
|
|
14
|
+
"""Append a sync operation entry to the persistent log.
|
|
15
|
+
|
|
16
|
+
Parameters
|
|
17
|
+
----------
|
|
18
|
+
revision_id : str
|
|
19
|
+
The ID of the generated structural revision.
|
|
20
|
+
report_id : str
|
|
21
|
+
The filename of the generated report, or ``none``.
|
|
22
|
+
status : str
|
|
23
|
+
The outcome of the sync (e.g. ``SUCCESS``, ``NO_CHANGES``).
|
|
24
|
+
paths : RepositoryPaths
|
|
25
|
+
Canonical paths; appends to ``paths.logs_dir / sync.md``.
|
|
26
|
+
"""
|
|
27
|
+
log_file = paths.logs_dir / "sync.md"
|
|
28
|
+
timestamp = datetime.now(timezone.utc).isoformat()
|
|
29
|
+
|
|
30
|
+
entry = f"- **{timestamp}** | Revision: `{revision_id}` | Report: `{report_id}` | Status: `{status}`\n"
|
|
31
|
+
|
|
32
|
+
with open(log_file, "a", encoding="utf-8") as f:
|
|
33
|
+
f.write(entry)
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"""File discovery and language detection.
|
|
2
|
+
|
|
3
|
+
Recursively walks the repository starting from ``repo_root``, discovers
|
|
4
|
+
source files in supported languages, and filters out ignored directories.
|
|
5
|
+
|
|
6
|
+
This module is stateless and side-effect free. It performs no writes,
|
|
7
|
+
no persistence, and no Brain-awareness.
|
|
8
|
+
|
|
9
|
+
Ignored Directories
|
|
10
|
+
-------------------
|
|
11
|
+
::
|
|
12
|
+
|
|
13
|
+
.git, .knowcode, node_modules, dist, build, target, venv,
|
|
14
|
+
.venv, __pycache__, .mypy_cache, .pytest_cache, .tox,
|
|
15
|
+
.eggs, .idea, .vscode, .vs
|
|
16
|
+
|
|
17
|
+
Supported Extensions
|
|
18
|
+
--------------------
|
|
19
|
+
::
|
|
20
|
+
|
|
21
|
+
.py → PYTHON
|
|
22
|
+
.ts → TYPESCRIPT
|
|
23
|
+
.tsx → TYPESCRIPT
|
|
24
|
+
.js → JAVASCRIPT
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from __future__ import annotations
|
|
28
|
+
|
|
29
|
+
from pathlib import Path
|
|
30
|
+
|
|
31
|
+
from structural_engine.parser.models import (
|
|
32
|
+
EXTENSION_LANGUAGE_MAP,
|
|
33
|
+
FileInfo,
|
|
34
|
+
Language,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# Directories to skip during recursive walk.
|
|
38
|
+
# This set is intentionally broad to avoid traversing large vendored
|
|
39
|
+
# or generated trees that have no structural relevance.
|
|
40
|
+
IGNORED_DIRECTORIES: frozenset[str] = frozenset(
|
|
41
|
+
{
|
|
42
|
+
".git",
|
|
43
|
+
".knowcode",
|
|
44
|
+
".agent",
|
|
45
|
+
"node_modules",
|
|
46
|
+
"dist",
|
|
47
|
+
"build",
|
|
48
|
+
"target",
|
|
49
|
+
"venv",
|
|
50
|
+
".venv",
|
|
51
|
+
"__pycache__",
|
|
52
|
+
".mypy_cache",
|
|
53
|
+
".pytest_cache",
|
|
54
|
+
".tox",
|
|
55
|
+
".eggs",
|
|
56
|
+
".idea",
|
|
57
|
+
".vscode",
|
|
58
|
+
".vs",
|
|
59
|
+
}
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def discover_files(repo_root: Path) -> list[FileInfo]:
|
|
64
|
+
"""Walk the repository and discover all supported source files.
|
|
65
|
+
|
|
66
|
+
Parameters
|
|
67
|
+
----------
|
|
68
|
+
repo_root : Path
|
|
69
|
+
Absolute path to the repository root.
|
|
70
|
+
|
|
71
|
+
Returns
|
|
72
|
+
-------
|
|
73
|
+
list[FileInfo]
|
|
74
|
+
Discovered files sorted by ``relative_path`` for determinism.
|
|
75
|
+
Files in ignored directories are excluded.
|
|
76
|
+
Files with unsupported extensions are excluded.
|
|
77
|
+
"""
|
|
78
|
+
discovered: list[FileInfo] = []
|
|
79
|
+
|
|
80
|
+
for item in _walk_filtered(repo_root):
|
|
81
|
+
if not item.is_file():
|
|
82
|
+
continue
|
|
83
|
+
|
|
84
|
+
language = detect_language(item)
|
|
85
|
+
if language is None:
|
|
86
|
+
continue
|
|
87
|
+
|
|
88
|
+
# Relative path always uses forward slashes for cross-platform
|
|
89
|
+
# determinism (entity IDs depend on this).
|
|
90
|
+
relative = item.relative_to(repo_root).as_posix()
|
|
91
|
+
|
|
92
|
+
discovered.append(
|
|
93
|
+
FileInfo(
|
|
94
|
+
absolute_path=item,
|
|
95
|
+
relative_path=relative,
|
|
96
|
+
language=language,
|
|
97
|
+
)
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Sort for deterministic output — entity ID generation depends on
|
|
101
|
+
# a stable file ordering.
|
|
102
|
+
discovered.sort(key=lambda f: f.relative_path)
|
|
103
|
+
return discovered
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def detect_language(file_path: Path) -> Language | None:
|
|
107
|
+
"""Detect the programming language of a file by extension.
|
|
108
|
+
|
|
109
|
+
Parameters
|
|
110
|
+
----------
|
|
111
|
+
file_path : Path
|
|
112
|
+
Path to the source file.
|
|
113
|
+
|
|
114
|
+
Returns
|
|
115
|
+
-------
|
|
116
|
+
Language | None
|
|
117
|
+
The detected language, or *None* if the extension is not
|
|
118
|
+
supported.
|
|
119
|
+
"""
|
|
120
|
+
return EXTENSION_LANGUAGE_MAP.get(file_path.suffix.lower())
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _walk_filtered(root: Path) -> list[Path]:
|
|
124
|
+
"""Recursively collect all file paths, skipping ignored directories.
|
|
125
|
+
|
|
126
|
+
Uses ``Path.iterdir()`` + manual recursion instead of ``os.walk()``
|
|
127
|
+
to have fine-grained control over directory filtering before descent.
|
|
128
|
+
|
|
129
|
+
Parameters
|
|
130
|
+
----------
|
|
131
|
+
root : Path
|
|
132
|
+
Directory to walk.
|
|
133
|
+
|
|
134
|
+
Returns
|
|
135
|
+
-------
|
|
136
|
+
list[Path]
|
|
137
|
+
All non-ignored file paths under *root*.
|
|
138
|
+
"""
|
|
139
|
+
results: list[Path] = []
|
|
140
|
+
_walk_recursive(root, results)
|
|
141
|
+
return results
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _walk_recursive(directory: Path, accumulator: list[Path]) -> None:
|
|
145
|
+
"""Internal recursive walker.
|
|
146
|
+
|
|
147
|
+
Parameters
|
|
148
|
+
----------
|
|
149
|
+
directory : Path
|
|
150
|
+
Current directory being walked.
|
|
151
|
+
accumulator : list[Path]
|
|
152
|
+
Mutable list collecting discovered file paths.
|
|
153
|
+
"""
|
|
154
|
+
try:
|
|
155
|
+
entries = sorted(directory.iterdir(), key=lambda p: p.name)
|
|
156
|
+
except PermissionError:
|
|
157
|
+
# Skip directories we can't read.
|
|
158
|
+
return
|
|
159
|
+
|
|
160
|
+
for entry in entries:
|
|
161
|
+
if entry.is_dir():
|
|
162
|
+
if entry.name not in IGNORED_DIRECTORIES:
|
|
163
|
+
_walk_recursive(entry, accumulator)
|
|
164
|
+
elif entry.is_file():
|
|
165
|
+
accumulator.append(entry)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""Base extraction architecture.
|
|
2
|
+
|
|
3
|
+
Defines the interface for language-specific adapters that extract
|
|
4
|
+
entities, relationships, and raw calls from tree-sitter ASTs.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from typing import Protocol
|
|
11
|
+
|
|
12
|
+
import tree_sitter
|
|
13
|
+
|
|
14
|
+
from structural_engine.parser.models import Entity, FileInfo, RawCall, Relationship
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class ExtractionResult:
|
|
18
|
+
"""Accumulates structural information extracted from a single file."""
|
|
19
|
+
|
|
20
|
+
entities: list[Entity] = field(default_factory=list)
|
|
21
|
+
relationships: list[Relationship] = field(default_factory=list)
|
|
22
|
+
raw_calls: list[RawCall] = field(default_factory=list)
|
|
23
|
+
|
|
24
|
+
class LanguageAdapter(Protocol):
|
|
25
|
+
"""Protocol for language-specific tree-sitter AST extractors."""
|
|
26
|
+
|
|
27
|
+
def extract(self, file_info: FileInfo, tree: tree_sitter.Tree, source_bytes: bytes) -> ExtractionResult:
|
|
28
|
+
"""Extract structural facts from a parsed AST.
|
|
29
|
+
|
|
30
|
+
Parameters
|
|
31
|
+
----------
|
|
32
|
+
file_info : FileInfo
|
|
33
|
+
Information about the source file being parsed.
|
|
34
|
+
tree : tree_sitter.Tree
|
|
35
|
+
The tree-sitter syntax tree.
|
|
36
|
+
source_bytes : bytes
|
|
37
|
+
The raw bytes of the source file.
|
|
38
|
+
|
|
39
|
+
Returns
|
|
40
|
+
-------
|
|
41
|
+
ExtractionResult
|
|
42
|
+
The extracted entities, relationships, and unresolved calls.
|
|
43
|
+
"""
|
|
44
|
+
...
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"""JavaScript structural extractor.
|
|
2
|
+
|
|
3
|
+
Walks a JavaScript tree-sitter AST to extract entities, relationships,
|
|
4
|
+
and raw calls.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import tree_sitter
|
|
10
|
+
|
|
11
|
+
from structural_engine.parser.extractors.base import ExtractionResult, LanguageAdapter
|
|
12
|
+
from structural_engine.parser.models import (
|
|
13
|
+
Entity,
|
|
14
|
+
EntityType,
|
|
15
|
+
FileInfo,
|
|
16
|
+
RawCall,
|
|
17
|
+
Relationship,
|
|
18
|
+
RelationshipType,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class JavaScriptAdapter(LanguageAdapter):
|
|
23
|
+
"""AST extractor for JavaScript."""
|
|
24
|
+
|
|
25
|
+
def extract(
|
|
26
|
+
self, file_info: FileInfo, tree: tree_sitter.Tree, source_bytes: bytes
|
|
27
|
+
) -> ExtractionResult:
|
|
28
|
+
result = ExtractionResult()
|
|
29
|
+
|
|
30
|
+
file_id = file_info.relative_path
|
|
31
|
+
parent_dir = file_info.relative_path.rsplit("/", 1)[0] if "/" in file_info.relative_path else "repo"
|
|
32
|
+
file_entity = Entity(
|
|
33
|
+
id=file_id,
|
|
34
|
+
type=EntityType.FILE,
|
|
35
|
+
name=file_info.absolute_path.name,
|
|
36
|
+
path=file_info.relative_path,
|
|
37
|
+
parent_id=parent_dir,
|
|
38
|
+
start_line=1,
|
|
39
|
+
end_line=source_bytes.count(b"\n") + 1,
|
|
40
|
+
)
|
|
41
|
+
result.entities.append(file_entity)
|
|
42
|
+
|
|
43
|
+
def walk(node: tree_sitter.Node, current_parent_id: str, is_in_class: bool = False):
|
|
44
|
+
# Extract Imports
|
|
45
|
+
if node.type == "import_statement":
|
|
46
|
+
source_node = node.child_by_field_name("source")
|
|
47
|
+
if source_node:
|
|
48
|
+
imported_module = source_node.text.decode("utf-8").strip("'\"")
|
|
49
|
+
result.relationships.append(
|
|
50
|
+
Relationship(
|
|
51
|
+
source_id=file_id,
|
|
52
|
+
target_id=imported_module,
|
|
53
|
+
type=RelationshipType.IMPORTS,
|
|
54
|
+
)
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
# Extract Classes
|
|
58
|
+
elif node.type == "class_declaration":
|
|
59
|
+
name_node = node.child_by_field_name("name")
|
|
60
|
+
if name_node:
|
|
61
|
+
name = name_node.text.decode("utf-8")
|
|
62
|
+
class_id = f"{current_parent_id}::{name}"
|
|
63
|
+
|
|
64
|
+
result.entities.append(
|
|
65
|
+
Entity(
|
|
66
|
+
id=class_id,
|
|
67
|
+
type=EntityType.CLASS,
|
|
68
|
+
name=name,
|
|
69
|
+
path=file_info.relative_path,
|
|
70
|
+
parent_id=current_parent_id,
|
|
71
|
+
start_line=node.start_point[0] + 1,
|
|
72
|
+
end_line=node.end_point[0] + 1,
|
|
73
|
+
)
|
|
74
|
+
)
|
|
75
|
+
result.relationships.append(
|
|
76
|
+
Relationship(
|
|
77
|
+
source_id=current_parent_id,
|
|
78
|
+
target_id=class_id,
|
|
79
|
+
type=RelationshipType.CONTAINS,
|
|
80
|
+
)
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
heritage = node.child_by_field_name("heritage")
|
|
84
|
+
if heritage and heritage.type == "class_heritage":
|
|
85
|
+
for child in heritage.children:
|
|
86
|
+
if child.type == "identifier":
|
|
87
|
+
result.relationships.append(
|
|
88
|
+
Relationship(
|
|
89
|
+
source_id=class_id,
|
|
90
|
+
target_id=child.text.decode("utf-8"),
|
|
91
|
+
type=RelationshipType.INHERITS,
|
|
92
|
+
)
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
for child in node.children:
|
|
96
|
+
walk(child, class_id, is_in_class=True)
|
|
97
|
+
return
|
|
98
|
+
|
|
99
|
+
# Extract Functions
|
|
100
|
+
elif node.type in ("function_declaration", "method_definition"):
|
|
101
|
+
name_node = node.child_by_field_name("name")
|
|
102
|
+
if name_node:
|
|
103
|
+
name = name_node.text.decode("utf-8")
|
|
104
|
+
func_id = f"{current_parent_id}::{name}"
|
|
105
|
+
func_type = EntityType.METHOD if is_in_class or node.type == "method_definition" else EntityType.FUNCTION
|
|
106
|
+
|
|
107
|
+
result.entities.append(
|
|
108
|
+
Entity(
|
|
109
|
+
id=func_id,
|
|
110
|
+
type=func_type,
|
|
111
|
+
name=name,
|
|
112
|
+
path=file_info.relative_path,
|
|
113
|
+
parent_id=current_parent_id,
|
|
114
|
+
start_line=node.start_point[0] + 1,
|
|
115
|
+
end_line=node.end_point[0] + 1,
|
|
116
|
+
)
|
|
117
|
+
)
|
|
118
|
+
result.relationships.append(
|
|
119
|
+
Relationship(
|
|
120
|
+
source_id=current_parent_id,
|
|
121
|
+
target_id=func_id,
|
|
122
|
+
type=RelationshipType.CONTAINS,
|
|
123
|
+
)
|
|
124
|
+
)
|
|
125
|
+
for child in node.children:
|
|
126
|
+
walk(child, func_id, is_in_class=False)
|
|
127
|
+
return
|
|
128
|
+
|
|
129
|
+
# Extract Calls
|
|
130
|
+
elif node.type == "call_expression":
|
|
131
|
+
func_node = node.child_by_field_name("function")
|
|
132
|
+
if func_node:
|
|
133
|
+
target_name = func_node.text.decode("utf-8")
|
|
134
|
+
if "." in target_name:
|
|
135
|
+
target_name = target_name.split(".")[-1]
|
|
136
|
+
result.raw_calls.append(
|
|
137
|
+
RawCall(
|
|
138
|
+
caller_id=current_parent_id,
|
|
139
|
+
target_name=target_name,
|
|
140
|
+
source_file=file_info.relative_path,
|
|
141
|
+
line=node.start_point[0] + 1,
|
|
142
|
+
)
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
for child in node.children:
|
|
146
|
+
walk(child, current_parent_id, is_in_class)
|
|
147
|
+
|
|
148
|
+
walk(tree.root_node, file_id)
|
|
149
|
+
return result
|