knowcode 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. knowcode-0.1.0.dist-info/METADATA +175 -0
  2. knowcode-0.1.0.dist-info/RECORD +63 -0
  3. knowcode-0.1.0.dist-info/WHEEL +4 -0
  4. knowcode-0.1.0.dist-info/entry_points.txt +2 -0
  5. runtime/__init__.py +4 -0
  6. runtime/artifact/__init__.py +1 -0
  7. runtime/artifact/builder.py +179 -0
  8. runtime/cli/__init__.py +1 -0
  9. runtime/cli/animation.py +278 -0
  10. runtime/cli/app.py +309 -0
  11. runtime/cli/auth.py +171 -0
  12. runtime/cli/telemetry.py +91 -0
  13. runtime/exceptions/__init__.py +1 -0
  14. runtime/exceptions/errors.py +99 -0
  15. runtime/repository/__init__.py +13 -0
  16. runtime/repository/discovery.py +64 -0
  17. runtime/repository/models.py +103 -0
  18. runtime/repository/paths.py +50 -0
  19. runtime/repository/validator.py +100 -0
  20. runtime/services/__init__.py +1 -0
  21. runtime/services/ingest_service.py +105 -0
  22. runtime/services/init_service.py +45 -0
  23. runtime/services/semantic_sync_service.py +55 -0
  24. runtime/services/status_service.py +40 -0
  25. runtime/services/sync_service.py +57 -0
  26. runtime/templates/KNOWCODE_LOADER.md.j2 +24 -0
  27. runtime/templates/README_KNOWLEDGE.md.j2 +12 -0
  28. runtime/templates/README_STRUCTURE.md.j2 +19 -0
  29. runtime/templates/__init__.py +1 -0
  30. runtime/templates/active_context.md.j2 +3 -0
  31. runtime/templates/ingest_legacy.md.j2 +15 -0
  32. runtime/templates/raw_readme.md.j2 +9 -0
  33. runtime/templates/sync_reconciliation.md.j2 +17 -0
  34. runtime/templates/synthesize_knowledge.md.j2 +32 -0
  35. runtime/templates/track_intent.md.j2 +14 -0
  36. structural_engine/__init__.py +3 -0
  37. structural_engine/diff/__init__.py +1 -0
  38. structural_engine/diff/generator.py +92 -0
  39. structural_engine/diff/models.py +48 -0
  40. structural_engine/engine.py +192 -0
  41. structural_engine/logs/__init__.py +1 -0
  42. structural_engine/logs/generator.py +33 -0
  43. structural_engine/parser/__init__.py +7 -0
  44. structural_engine/parser/discovery.py +165 -0
  45. structural_engine/parser/extractors/base.py +44 -0
  46. structural_engine/parser/languages/javascript/adapter.py +149 -0
  47. structural_engine/parser/languages/python/adapter.py +174 -0
  48. structural_engine/parser/languages/typescript/adapter.py +165 -0
  49. structural_engine/parser/models.py +186 -0
  50. structural_engine/parser/parser.py +160 -0
  51. structural_engine/parser/resolvers/calls.py +105 -0
  52. structural_engine/parser/tree_sitter/registry.py +61 -0
  53. structural_engine/reports/__init__.py +1 -0
  54. structural_engine/reports/generator.py +77 -0
  55. structural_engine/results.py +54 -0
  56. structural_engine/revisions/__init__.py +1 -0
  57. structural_engine/revisions/tracker.py +32 -0
  58. structural_engine/snapshot/__init__.py +1 -0
  59. structural_engine/snapshot/generator.py +58 -0
  60. structural_engine/snapshot/loader.py +59 -0
  61. structural_engine/state/__init__.py +1 -0
  62. structural_engine/state/manager.py +169 -0
  63. structural_engine/state/models.py +34 -0
@@ -0,0 +1,174 @@
1
+ """Python structural extractor.
2
+
3
+ Walks a Python tree-sitter AST to extract entities, relationships,
4
+ and raw calls.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import tree_sitter
10
+
11
+ from structural_engine.parser.extractors.base import ExtractionResult, LanguageAdapter
12
+ from structural_engine.parser.models import (
13
+ Entity,
14
+ EntityType,
15
+ FileInfo,
16
+ RawCall,
17
+ Relationship,
18
+ RelationshipType,
19
+ )
20
+
21
+ class PythonAdapter(LanguageAdapter):
22
+ """AST extractor for Python."""
23
+
24
+ def extract(
25
+ self, file_info: FileInfo, tree: tree_sitter.Tree, source_bytes: bytes
26
+ ) -> ExtractionResult:
27
+ result = ExtractionResult()
28
+
29
+ file_id = file_info.relative_path
30
+ file_entity = Entity(
31
+ id=file_id,
32
+ type=EntityType.FILE,
33
+ name=file_info.absolute_path.name,
34
+ path=file_info.relative_path,
35
+ parent_id=str(file_info.absolute_path.parent.relative_to(file_info.absolute_path.parents[len(file_info.relative_path.split("/")) - 1])) if "/" in file_info.relative_path else "repo", # Actually let's just make the parser build the directories later or assign properly. Let's simplify and make the caller build dir entities. Or build them here if needed.
36
+ start_line=1,
37
+ end_line=source_bytes.count(b"\n") + 1,
38
+ )
39
+ result.entities.append(file_entity)
40
+
41
+ # Let's fix parent_id for file. The parser.py should probably assemble Repository and Directory entities.
42
+ # But we'll leave it as None for now and let the caller fix it, or we can just say parent_id is the directory.
43
+ parent_dir = file_info.relative_path.rsplit("/", 1)[0] if "/" in file_info.relative_path else "repo"
44
+ file_entity = Entity(
45
+ id=file_id,
46
+ type=EntityType.FILE,
47
+ name=file_info.absolute_path.name,
48
+ path=file_info.relative_path,
49
+ parent_id=parent_dir,
50
+ start_line=1,
51
+ end_line=source_bytes.count(b"\n") + 1,
52
+ )
53
+ result.entities[-1] = file_entity
54
+
55
+ def walk(node: tree_sitter.Node, current_parent_id: str, is_in_class: bool = False):
56
+ # Extract Imports
57
+ if node.type == "import_statement":
58
+ for child in node.children:
59
+ if child.type == "dotted_name":
60
+ imported_module = child.text.decode("utf-8")
61
+ result.relationships.append(
62
+ Relationship(
63
+ source_id=file_id,
64
+ target_id=imported_module, # This will be resolved conservatively later, but for now we record it as is. Wait, IMPORTS should target the module id.
65
+ type=RelationshipType.IMPORTS,
66
+ )
67
+ )
68
+ elif node.type == "import_from_statement":
69
+ module_node = node.child_by_field_name("module_name")
70
+ if module_node:
71
+ module_name = module_node.text.decode("utf-8")
72
+ result.relationships.append(
73
+ Relationship(
74
+ source_id=file_id,
75
+ target_id=module_name,
76
+ type=RelationshipType.IMPORTS,
77
+ )
78
+ )
79
+
80
+ # Extract Classes
81
+ elif node.type == "class_definition":
82
+ name_node = node.child_by_field_name("name")
83
+ if name_node:
84
+ name = name_node.text.decode("utf-8")
85
+ class_id = f"{current_parent_id}::{name}"
86
+
87
+ result.entities.append(
88
+ Entity(
89
+ id=class_id,
90
+ type=EntityType.CLASS,
91
+ name=name,
92
+ path=file_info.relative_path,
93
+ parent_id=current_parent_id,
94
+ start_line=node.start_point[0] + 1,
95
+ end_line=node.end_point[0] + 1,
96
+ )
97
+ )
98
+ result.relationships.append(
99
+ Relationship(
100
+ source_id=current_parent_id,
101
+ target_id=class_id,
102
+ type=RelationshipType.CONTAINS,
103
+ )
104
+ )
105
+
106
+ # Extract Base Classes (INHERITS)
107
+ superclasses = node.child_by_field_name("superclasses")
108
+ if superclasses:
109
+ for child in superclasses.children:
110
+ if child.type == "identifier" or child.type == "attribute":
111
+ base_name = child.text.decode("utf-8")
112
+ result.relationships.append(
113
+ Relationship(
114
+ source_id=class_id,
115
+ target_id=base_name, # Raw target, we may need to resolve it, but INHERITS is usually raw or we can use the same resolver.
116
+ type=RelationshipType.INHERITS,
117
+ )
118
+ )
119
+
120
+ for child in node.children:
121
+ walk(child, class_id, is_in_class=True)
122
+ return
123
+
124
+ # Extract Functions/Methods
125
+ elif node.type == "function_definition":
126
+ name_node = node.child_by_field_name("name")
127
+ if name_node:
128
+ name = name_node.text.decode("utf-8")
129
+ func_id = f"{current_parent_id}::{name}"
130
+ func_type = EntityType.METHOD if is_in_class else EntityType.FUNCTION
131
+
132
+ result.entities.append(
133
+ Entity(
134
+ id=func_id,
135
+ type=func_type,
136
+ name=name,
137
+ path=file_info.relative_path,
138
+ parent_id=current_parent_id,
139
+ start_line=node.start_point[0] + 1,
140
+ end_line=node.end_point[0] + 1,
141
+ )
142
+ )
143
+ result.relationships.append(
144
+ Relationship(
145
+ source_id=current_parent_id,
146
+ target_id=func_id,
147
+ type=RelationshipType.CONTAINS,
148
+ )
149
+ )
150
+ for child in node.children:
151
+ walk(child, func_id, is_in_class=False)
152
+ return
153
+
154
+ # Extract Calls
155
+ elif node.type == "call":
156
+ func_node = node.child_by_field_name("function")
157
+ if func_node:
158
+ target_name = func_node.text.decode("utf-8")
159
+ if "." in target_name:
160
+ target_name = target_name.split(".")[-1]
161
+ result.raw_calls.append(
162
+ RawCall(
163
+ caller_id=current_parent_id,
164
+ target_name=target_name,
165
+ source_file=file_info.relative_path,
166
+ line=node.start_point[0] + 1,
167
+ )
168
+ )
169
+
170
+ for child in node.children:
171
+ walk(child, current_parent_id, is_in_class)
172
+
173
+ walk(tree.root_node, file_id)
174
+ return result
@@ -0,0 +1,165 @@
1
+ """TypeScript structural extractor.
2
+
3
+ Walks a TypeScript tree-sitter AST to extract entities, relationships,
4
+ and raw calls.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import tree_sitter
10
+
11
+ from structural_engine.parser.extractors.base import ExtractionResult, LanguageAdapter
12
+ from structural_engine.parser.models import (
13
+ Entity,
14
+ EntityType,
15
+ FileInfo,
16
+ RawCall,
17
+ Relationship,
18
+ RelationshipType,
19
+ )
20
+
21
+
22
+ class TypeScriptAdapter(LanguageAdapter):
23
+ """AST extractor for TypeScript."""
24
+
25
+ def extract(
26
+ self, file_info: FileInfo, tree: tree_sitter.Tree, source_bytes: bytes
27
+ ) -> ExtractionResult:
28
+ result = ExtractionResult()
29
+
30
+ file_id = file_info.relative_path
31
+ parent_dir = file_info.relative_path.rsplit("/", 1)[0] if "/" in file_info.relative_path else "repo"
32
+ file_entity = Entity(
33
+ id=file_id,
34
+ type=EntityType.FILE,
35
+ name=file_info.absolute_path.name,
36
+ path=file_info.relative_path,
37
+ parent_id=parent_dir,
38
+ start_line=1,
39
+ end_line=source_bytes.count(b"\n") + 1,
40
+ )
41
+ result.entities.append(file_entity)
42
+
43
+ def walk(node: tree_sitter.Node, current_parent_id: str, is_in_class: bool = False):
44
+ # Extract Imports
45
+ if node.type == "import_statement":
46
+ source_node = node.child_by_field_name("source")
47
+ if source_node:
48
+ imported_module = source_node.text.decode("utf-8").strip("'\"")
49
+ result.relationships.append(
50
+ Relationship(
51
+ source_id=file_id,
52
+ target_id=imported_module,
53
+ type=RelationshipType.IMPORTS,
54
+ )
55
+ )
56
+
57
+ # Extract Classes / Interfaces
58
+ elif node.type in ("class_declaration", "interface_declaration"):
59
+ name_node = node.child_by_field_name("name")
60
+ if name_node:
61
+ name = name_node.text.decode("utf-8")
62
+ entity_id = f"{current_parent_id}::{name}"
63
+
64
+ entity_type = EntityType.INTERFACE if node.type == "interface_declaration" else EntityType.CLASS
65
+
66
+ result.entities.append(
67
+ Entity(
68
+ id=entity_id,
69
+ type=entity_type,
70
+ name=name,
71
+ path=file_info.relative_path,
72
+ parent_id=current_parent_id,
73
+ start_line=node.start_point[0] + 1,
74
+ end_line=node.end_point[0] + 1,
75
+ )
76
+ )
77
+ result.relationships.append(
78
+ Relationship(
79
+ source_id=current_parent_id,
80
+ target_id=entity_id,
81
+ type=RelationshipType.CONTAINS,
82
+ )
83
+ )
84
+
85
+ for child in node.children:
86
+ if child.type == "class_heritage":
87
+ for heritage_child in child.children:
88
+ if heritage_child.type == "extends_clause":
89
+ for gchild in heritage_child.children:
90
+ if gchild.type == "identifier" or gchild.type == "type_identifier":
91
+ result.relationships.append(
92
+ Relationship(
93
+ source_id=entity_id,
94
+ target_id=gchild.text.decode("utf-8"),
95
+ type=RelationshipType.INHERITS,
96
+ )
97
+ )
98
+ elif heritage_child.type == "implements_clause":
99
+ for gchild in heritage_child.children:
100
+ if gchild.type == "type_identifier" or gchild.type == "identifier":
101
+ result.relationships.append(
102
+ Relationship(
103
+ source_id=entity_id,
104
+ target_id=gchild.text.decode("utf-8"),
105
+ type=RelationshipType.IMPLEMENTS,
106
+ )
107
+ )
108
+
109
+ for child in node.children:
110
+ walk(child, entity_id, is_in_class=(entity_type == EntityType.CLASS))
111
+ return
112
+
113
+ # Extract Functions
114
+ elif node.type in ("function_declaration", "method_definition", "arrow_function"):
115
+ # Arrow functions might not have a direct 'name' field if assigned to variable,
116
+ # but let's stick to basics for V1
117
+ name_node = node.child_by_field_name("name")
118
+ if name_node:
119
+ name = name_node.text.decode("utf-8")
120
+ func_id = f"{current_parent_id}::{name}"
121
+ func_type = EntityType.METHOD if is_in_class or node.type == "method_definition" else EntityType.FUNCTION
122
+
123
+ result.entities.append(
124
+ Entity(
125
+ id=func_id,
126
+ type=func_type,
127
+ name=name,
128
+ path=file_info.relative_path,
129
+ parent_id=current_parent_id,
130
+ start_line=node.start_point[0] + 1,
131
+ end_line=node.end_point[0] + 1,
132
+ )
133
+ )
134
+ result.relationships.append(
135
+ Relationship(
136
+ source_id=current_parent_id,
137
+ target_id=func_id,
138
+ type=RelationshipType.CONTAINS,
139
+ )
140
+ )
141
+ for child in node.children:
142
+ walk(child, func_id, is_in_class=False)
143
+ return
144
+
145
+ # Extract Calls
146
+ elif node.type == "call_expression":
147
+ func_node = node.child_by_field_name("function")
148
+ if func_node:
149
+ target_name = func_node.text.decode("utf-8")
150
+ if "." in target_name:
151
+ target_name = target_name.split(".")[-1]
152
+ result.raw_calls.append(
153
+ RawCall(
154
+ caller_id=current_parent_id,
155
+ target_name=target_name,
156
+ source_file=file_info.relative_path,
157
+ line=node.start_point[0] + 1,
158
+ )
159
+ )
160
+
161
+ for child in node.children:
162
+ walk(child, current_parent_id, is_in_class)
163
+
164
+ walk(tree.root_node, file_id)
165
+ return result
@@ -0,0 +1,186 @@
1
+ """Parser domain models.
2
+
3
+ All structural data types produced by the parser pipeline.
4
+
5
+ These models are internal to the Structural Engine. The Runtime never
6
+ imports them directly — it only sees the ``StructuralSnapshot`` indirectly
7
+ through result objects returned by the Engine's public interface.
8
+
9
+ Invariants
10
+ ----------
11
+ - All models are frozen (immutable).
12
+ - ``Entity.id`` uses stable, path-based identifiers — never UUIDs.
13
+ - ``StructuralSnapshot`` contains no metadata, revisions, or timestamps.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ from dataclasses import dataclass, field
19
+ from enum import Enum, auto
20
+ from pathlib import Path
21
+
22
+
23
+ # ── Enums ─────────────────────────────────────────────────────────────
24
+
25
+
26
+ class EntityType(Enum):
27
+ """Classification of structural entities discovered in a repository."""
28
+
29
+ REPOSITORY = auto()
30
+ DIRECTORY = auto()
31
+ FILE = auto()
32
+ CLASS = auto()
33
+ INTERFACE = auto()
34
+ FUNCTION = auto()
35
+ METHOD = auto()
36
+
37
+
38
+ class RelationshipType(Enum):
39
+ """Classification of structural relationships between entities."""
40
+
41
+ CONTAINS = auto()
42
+ IMPORTS = auto()
43
+ INHERITS = auto()
44
+ IMPLEMENTS = auto()
45
+ CALLS = auto()
46
+
47
+
48
+ class Language(Enum):
49
+ """Supported programming languages for structural parsing.
50
+
51
+ V1 supports Python, TypeScript, and JavaScript.
52
+ """
53
+
54
+ PYTHON = auto()
55
+ TYPESCRIPT = auto()
56
+ JAVASCRIPT = auto()
57
+
58
+
59
+ # ── Extension → Language mapping ──────────────────────────────────────
60
+
61
+ EXTENSION_LANGUAGE_MAP: dict[str, Language] = {
62
+ ".py": Language.PYTHON,
63
+ ".ts": Language.TYPESCRIPT,
64
+ ".tsx": Language.TYPESCRIPT,
65
+ ".js": Language.JAVASCRIPT,
66
+ }
67
+
68
+
69
+ # ── Data models ───────────────────────────────────────────────────────
70
+
71
+
72
+ @dataclass(frozen=True)
73
+ class FileInfo:
74
+ """A discovered source file with its resolved language.
75
+
76
+ Produced by file discovery, consumed by the tree-sitter parsing stage.
77
+ """
78
+
79
+ absolute_path: Path
80
+ """Absolute path to the source file on disk."""
81
+
82
+ relative_path: str
83
+ """Path relative to repository root, using forward slashes (e.g. ``src/auth.py``)."""
84
+
85
+ language: Language
86
+ """Detected programming language."""
87
+
88
+
89
+ @dataclass(frozen=True)
90
+ class Entity:
91
+ """A structural entity extracted from the repository.
92
+
93
+ Entities are identified by stable, path-based string IDs that are
94
+ deterministic across machines and runs.
95
+
96
+ ID Examples
97
+ -----------
98
+ ::
99
+
100
+ repo
101
+ src
102
+ src/auth.py
103
+ src/auth.py::verify_token
104
+ src/auth.py::UserService
105
+ src/auth.py::UserService::create_user
106
+ """
107
+
108
+ id: str
109
+ """Stable path-based identifier. Never a UUID."""
110
+
111
+ type: EntityType
112
+ """Structural classification of this entity."""
113
+
114
+ name: str
115
+ """Human-readable name (e.g. ``verify_token``, ``UserService``)."""
116
+
117
+ path: str
118
+ """Relative file path within the repository (forward slashes)."""
119
+
120
+ parent_id: str | None
121
+ """ID of the containing entity, or *None* for the repository root."""
122
+
123
+ start_line: int
124
+ """1-indexed start line in the source file. 0 for non-file entities."""
125
+
126
+ end_line: int
127
+ """1-indexed end line in the source file. 0 for non-file entities."""
128
+
129
+
130
+ @dataclass(frozen=True)
131
+ class Relationship:
132
+ """A directed structural relationship between two entities.
133
+
134
+ Relationships are matched for diffing using the composite key
135
+ ``(source_id, target_id, type)``.
136
+ """
137
+
138
+ source_id: str
139
+ """ID of the originating entity."""
140
+
141
+ target_id: str
142
+ """ID of the target entity."""
143
+
144
+ type: RelationshipType
145
+ """Classification of this relationship."""
146
+
147
+
148
+ @dataclass(frozen=True)
149
+ class RawCall:
150
+ """An unresolved function/method call extracted from source code.
151
+
152
+ Raw calls are an intermediate representation produced by call
153
+ extraction and consumed by the call resolver. They do not appear
154
+ in the final snapshot.
155
+ """
156
+
157
+ caller_id: str
158
+ """Stable ID of the entity that makes the call."""
159
+
160
+ target_name: str
161
+ """Unresolved name of the called function/method."""
162
+
163
+ source_file: str
164
+ """Relative path to the source file."""
165
+
166
+ line: int
167
+ """1-indexed line number of the call site."""
168
+
169
+
170
+ @dataclass(frozen=True)
171
+ class StructuralSnapshot:
172
+ """Complete structural truth of a repository at a point in time.
173
+
174
+ Contains all discovered entities and relationships.
175
+ No metadata, revisions, timestamps, or reports.
176
+
177
+ Sorting contract (guarantees deterministic output):
178
+ - ``entities`` sorted by ``id``
179
+ - ``relationships`` sorted by ``(source_id, target_id, type.name)``
180
+ """
181
+
182
+ entities: tuple[Entity, ...] = field(default_factory=tuple)
183
+ """All structural entities, sorted by ``id``."""
184
+
185
+ relationships: tuple[Relationship, ...] = field(default_factory=tuple)
186
+ """All structural relationships, sorted by composite key."""
@@ -0,0 +1,160 @@
1
+ """Parser — public interface.
2
+
3
+ The Parser is the observation subsystem of the Structural Engine.
4
+ It answers one question:
5
+
6
+ *What does the repository look like right now?*
7
+
8
+ Contract
9
+ --------
10
+ ::
11
+
12
+ parse(paths: RepositoryPaths) -> StructuralSnapshot
13
+
14
+ Invariants
15
+ ----------
16
+ - **Deterministic:** Identical repository states always produce identical
17
+ snapshots, regardless of machine, user, or OS.
18
+ - **Stateless:** Never reads ``state.yaml``, previous snapshots, reports,
19
+ logs, or knowledge. Current repository state is the only authority.
20
+ - **Side-effect free:** Performs no writes, no filesystem mutations,
21
+ no revision generation, no persistence.
22
+ - **No Brain awareness:** Accepts ``RepositoryPaths`` but only consumes
23
+ ``paths.repo_root``. All other fields are ignored.
24
+
25
+ Pipeline (Phase 2A stub — discovery only)
26
+ -----------------------------------------
27
+ ::
28
+
29
+ RepositoryPaths
30
+
31
+ repo_root
32
+
33
+ File Discovery
34
+
35
+ Language Detection
36
+
37
+ [Tree-sitter Parse] ← Phase 2B
38
+
39
+ [Entity Extraction] ← Phase 2B
40
+
41
+ [Relationship Extraction] ← Phase 2B
42
+
43
+ [Call Resolution] ← Phase 2B
44
+
45
+ StructuralSnapshot
46
+ """
47
+
48
+ from __future__ import annotations
49
+
50
+ import structlog
51
+
52
+ from runtime.repository.models import RepositoryPaths
53
+ from structural_engine.parser.discovery import discover_files
54
+ from structural_engine.parser.models import StructuralSnapshot, Entity, Relationship, RawCall, Language
55
+
56
+ logger = structlog.get_logger(__name__)
57
+
58
+
59
+ def parse(paths: RepositoryPaths) -> StructuralSnapshot:
60
+ """Parse a repository and produce its structural snapshot.
61
+
62
+ This is the sole public entry point of the Parser subsystem.
63
+ Called by ``StructuralEngine.initialize()`` and
64
+ ``StructuralEngine.sync()``.
65
+
66
+ Parameters
67
+ ----------
68
+ paths : RepositoryPaths
69
+ Canonical 9-field path contract. Only ``paths.repo_root`` is
70
+ consumed by the parser.
71
+
72
+ Returns
73
+ -------
74
+ StructuralSnapshot
75
+ Complete structural truth of the repository.
76
+ Entities sorted by ``id``, relationships sorted by composite key.
77
+
78
+ Notes
79
+ -----
80
+ **Phase 2A stub:** Currently performs file discovery and language
81
+ detection only. Tree-sitter extraction, entity/relationship
82
+ building, and call resolution will be added in Phase 2B.
83
+ """
84
+ repo_root = paths.repo_root
85
+
86
+ logger.info("parser.started", repo_root=str(repo_root))
87
+
88
+ # ── Stage 1: File Discovery + Language Detection ──────────────
89
+ files = discover_files(repo_root)
90
+ logger.info(
91
+ "parser.discovery_complete",
92
+ files_found=len(files),
93
+ languages={
94
+ lang.name: sum(1 for f in files if f.language == lang)
95
+ for lang in set(f.language for f in files)
96
+ },
97
+ )
98
+
99
+ # ── Stages 2–7: Tree-sitter parsing, extraction, resolution ──
100
+ all_entities: list[Entity] = []
101
+ all_relationships: list[Relationship] = []
102
+ all_raw_calls: list[RawCall] = []
103
+
104
+ for file_info in files:
105
+ # 1. Get the language parser
106
+ try:
107
+ from structural_engine.parser.tree_sitter.registry import get_parser
108
+ ts_parser = get_parser(file_info.language)
109
+ except ValueError:
110
+ logger.warning("parser.unsupported_language", file=file_info.relative_path, language=file_info.language.name)
111
+ continue
112
+
113
+ # 2. Parse the source code
114
+ try:
115
+ source_bytes = file_info.absolute_path.read_bytes()
116
+ tree = ts_parser.parse(source_bytes)
117
+ except Exception as e:
118
+ logger.error("parser.parse_failed", file=file_info.relative_path, error=str(e))
119
+ continue
120
+
121
+ # 3. Extract structural facts
122
+ adapter = None
123
+ if file_info.language == Language.PYTHON:
124
+ from structural_engine.parser.languages.python.adapter import PythonAdapter
125
+ adapter = PythonAdapter()
126
+ elif file_info.language == Language.JAVASCRIPT:
127
+ from structural_engine.parser.languages.javascript.adapter import JavaScriptAdapter
128
+ adapter = JavaScriptAdapter()
129
+ elif file_info.language == Language.TYPESCRIPT:
130
+ from structural_engine.parser.languages.typescript.adapter import TypeScriptAdapter
131
+ adapter = TypeScriptAdapter()
132
+
133
+ if adapter:
134
+ result = adapter.extract(file_info, tree, source_bytes)
135
+ all_entities.extend(result.entities)
136
+ all_relationships.extend(result.relationships)
137
+ all_raw_calls.extend(result.raw_calls)
138
+
139
+ # 4. Resolve calls
140
+ from structural_engine.parser.resolvers.calls import resolve_calls
141
+ resolved_calls = resolve_calls(all_entities, all_relationships, all_raw_calls)
142
+ all_relationships.extend(resolved_calls)
143
+
144
+ # 5. Sort for deterministic output
145
+ all_entities.sort(key=lambda e: e.id)
146
+ all_relationships.sort(key=lambda r: (r.source_id, r.target_id, r.type.name))
147
+
148
+ snapshot = StructuralSnapshot(
149
+ entities=tuple(all_entities),
150
+ relationships=tuple(all_relationships)
151
+ )
152
+
153
+ logger.info(
154
+ "parser.complete",
155
+ entities=len(snapshot.entities),
156
+ relationships=len(snapshot.relationships),
157
+ resolved_calls=len(resolved_calls),
158
+ )
159
+
160
+ return snapshot