knowcode 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- knowcode-0.1.0.dist-info/METADATA +175 -0
- knowcode-0.1.0.dist-info/RECORD +63 -0
- knowcode-0.1.0.dist-info/WHEEL +4 -0
- knowcode-0.1.0.dist-info/entry_points.txt +2 -0
- runtime/__init__.py +4 -0
- runtime/artifact/__init__.py +1 -0
- runtime/artifact/builder.py +179 -0
- runtime/cli/__init__.py +1 -0
- runtime/cli/animation.py +278 -0
- runtime/cli/app.py +309 -0
- runtime/cli/auth.py +171 -0
- runtime/cli/telemetry.py +91 -0
- runtime/exceptions/__init__.py +1 -0
- runtime/exceptions/errors.py +99 -0
- runtime/repository/__init__.py +13 -0
- runtime/repository/discovery.py +64 -0
- runtime/repository/models.py +103 -0
- runtime/repository/paths.py +50 -0
- runtime/repository/validator.py +100 -0
- runtime/services/__init__.py +1 -0
- runtime/services/ingest_service.py +105 -0
- runtime/services/init_service.py +45 -0
- runtime/services/semantic_sync_service.py +55 -0
- runtime/services/status_service.py +40 -0
- runtime/services/sync_service.py +57 -0
- runtime/templates/KNOWCODE_LOADER.md.j2 +24 -0
- runtime/templates/README_KNOWLEDGE.md.j2 +12 -0
- runtime/templates/README_STRUCTURE.md.j2 +19 -0
- runtime/templates/__init__.py +1 -0
- runtime/templates/active_context.md.j2 +3 -0
- runtime/templates/ingest_legacy.md.j2 +15 -0
- runtime/templates/raw_readme.md.j2 +9 -0
- runtime/templates/sync_reconciliation.md.j2 +17 -0
- runtime/templates/synthesize_knowledge.md.j2 +32 -0
- runtime/templates/track_intent.md.j2 +14 -0
- structural_engine/__init__.py +3 -0
- structural_engine/diff/__init__.py +1 -0
- structural_engine/diff/generator.py +92 -0
- structural_engine/diff/models.py +48 -0
- structural_engine/engine.py +192 -0
- structural_engine/logs/__init__.py +1 -0
- structural_engine/logs/generator.py +33 -0
- structural_engine/parser/__init__.py +7 -0
- structural_engine/parser/discovery.py +165 -0
- structural_engine/parser/extractors/base.py +44 -0
- structural_engine/parser/languages/javascript/adapter.py +149 -0
- structural_engine/parser/languages/python/adapter.py +174 -0
- structural_engine/parser/languages/typescript/adapter.py +165 -0
- structural_engine/parser/models.py +186 -0
- structural_engine/parser/parser.py +160 -0
- structural_engine/parser/resolvers/calls.py +105 -0
- structural_engine/parser/tree_sitter/registry.py +61 -0
- structural_engine/reports/__init__.py +1 -0
- structural_engine/reports/generator.py +77 -0
- structural_engine/results.py +54 -0
- structural_engine/revisions/__init__.py +1 -0
- structural_engine/revisions/tracker.py +32 -0
- structural_engine/snapshot/__init__.py +1 -0
- structural_engine/snapshot/generator.py +58 -0
- structural_engine/snapshot/loader.py +59 -0
- structural_engine/state/__init__.py +1 -0
- structural_engine/state/manager.py +169 -0
- structural_engine/state/models.py +34 -0
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
"""Python structural extractor.
|
|
2
|
+
|
|
3
|
+
Walks a Python tree-sitter AST to extract entities, relationships,
|
|
4
|
+
and raw calls.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import tree_sitter
|
|
10
|
+
|
|
11
|
+
from structural_engine.parser.extractors.base import ExtractionResult, LanguageAdapter
|
|
12
|
+
from structural_engine.parser.models import (
|
|
13
|
+
Entity,
|
|
14
|
+
EntityType,
|
|
15
|
+
FileInfo,
|
|
16
|
+
RawCall,
|
|
17
|
+
Relationship,
|
|
18
|
+
RelationshipType,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
class PythonAdapter(LanguageAdapter):
|
|
22
|
+
"""AST extractor for Python."""
|
|
23
|
+
|
|
24
|
+
def extract(
|
|
25
|
+
self, file_info: FileInfo, tree: tree_sitter.Tree, source_bytes: bytes
|
|
26
|
+
) -> ExtractionResult:
|
|
27
|
+
result = ExtractionResult()
|
|
28
|
+
|
|
29
|
+
file_id = file_info.relative_path
|
|
30
|
+
file_entity = Entity(
|
|
31
|
+
id=file_id,
|
|
32
|
+
type=EntityType.FILE,
|
|
33
|
+
name=file_info.absolute_path.name,
|
|
34
|
+
path=file_info.relative_path,
|
|
35
|
+
parent_id=str(file_info.absolute_path.parent.relative_to(file_info.absolute_path.parents[len(file_info.relative_path.split("/")) - 1])) if "/" in file_info.relative_path else "repo", # Actually let's just make the parser build the directories later or assign properly. Let's simplify and make the caller build dir entities. Or build them here if needed.
|
|
36
|
+
start_line=1,
|
|
37
|
+
end_line=source_bytes.count(b"\n") + 1,
|
|
38
|
+
)
|
|
39
|
+
result.entities.append(file_entity)
|
|
40
|
+
|
|
41
|
+
# Let's fix parent_id for file. The parser.py should probably assemble Repository and Directory entities.
|
|
42
|
+
# But we'll leave it as None for now and let the caller fix it, or we can just say parent_id is the directory.
|
|
43
|
+
parent_dir = file_info.relative_path.rsplit("/", 1)[0] if "/" in file_info.relative_path else "repo"
|
|
44
|
+
file_entity = Entity(
|
|
45
|
+
id=file_id,
|
|
46
|
+
type=EntityType.FILE,
|
|
47
|
+
name=file_info.absolute_path.name,
|
|
48
|
+
path=file_info.relative_path,
|
|
49
|
+
parent_id=parent_dir,
|
|
50
|
+
start_line=1,
|
|
51
|
+
end_line=source_bytes.count(b"\n") + 1,
|
|
52
|
+
)
|
|
53
|
+
result.entities[-1] = file_entity
|
|
54
|
+
|
|
55
|
+
def walk(node: tree_sitter.Node, current_parent_id: str, is_in_class: bool = False):
|
|
56
|
+
# Extract Imports
|
|
57
|
+
if node.type == "import_statement":
|
|
58
|
+
for child in node.children:
|
|
59
|
+
if child.type == "dotted_name":
|
|
60
|
+
imported_module = child.text.decode("utf-8")
|
|
61
|
+
result.relationships.append(
|
|
62
|
+
Relationship(
|
|
63
|
+
source_id=file_id,
|
|
64
|
+
target_id=imported_module, # This will be resolved conservatively later, but for now we record it as is. Wait, IMPORTS should target the module id.
|
|
65
|
+
type=RelationshipType.IMPORTS,
|
|
66
|
+
)
|
|
67
|
+
)
|
|
68
|
+
elif node.type == "import_from_statement":
|
|
69
|
+
module_node = node.child_by_field_name("module_name")
|
|
70
|
+
if module_node:
|
|
71
|
+
module_name = module_node.text.decode("utf-8")
|
|
72
|
+
result.relationships.append(
|
|
73
|
+
Relationship(
|
|
74
|
+
source_id=file_id,
|
|
75
|
+
target_id=module_name,
|
|
76
|
+
type=RelationshipType.IMPORTS,
|
|
77
|
+
)
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
# Extract Classes
|
|
81
|
+
elif node.type == "class_definition":
|
|
82
|
+
name_node = node.child_by_field_name("name")
|
|
83
|
+
if name_node:
|
|
84
|
+
name = name_node.text.decode("utf-8")
|
|
85
|
+
class_id = f"{current_parent_id}::{name}"
|
|
86
|
+
|
|
87
|
+
result.entities.append(
|
|
88
|
+
Entity(
|
|
89
|
+
id=class_id,
|
|
90
|
+
type=EntityType.CLASS,
|
|
91
|
+
name=name,
|
|
92
|
+
path=file_info.relative_path,
|
|
93
|
+
parent_id=current_parent_id,
|
|
94
|
+
start_line=node.start_point[0] + 1,
|
|
95
|
+
end_line=node.end_point[0] + 1,
|
|
96
|
+
)
|
|
97
|
+
)
|
|
98
|
+
result.relationships.append(
|
|
99
|
+
Relationship(
|
|
100
|
+
source_id=current_parent_id,
|
|
101
|
+
target_id=class_id,
|
|
102
|
+
type=RelationshipType.CONTAINS,
|
|
103
|
+
)
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
# Extract Base Classes (INHERITS)
|
|
107
|
+
superclasses = node.child_by_field_name("superclasses")
|
|
108
|
+
if superclasses:
|
|
109
|
+
for child in superclasses.children:
|
|
110
|
+
if child.type == "identifier" or child.type == "attribute":
|
|
111
|
+
base_name = child.text.decode("utf-8")
|
|
112
|
+
result.relationships.append(
|
|
113
|
+
Relationship(
|
|
114
|
+
source_id=class_id,
|
|
115
|
+
target_id=base_name, # Raw target, we may need to resolve it, but INHERITS is usually raw or we can use the same resolver.
|
|
116
|
+
type=RelationshipType.INHERITS,
|
|
117
|
+
)
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
for child in node.children:
|
|
121
|
+
walk(child, class_id, is_in_class=True)
|
|
122
|
+
return
|
|
123
|
+
|
|
124
|
+
# Extract Functions/Methods
|
|
125
|
+
elif node.type == "function_definition":
|
|
126
|
+
name_node = node.child_by_field_name("name")
|
|
127
|
+
if name_node:
|
|
128
|
+
name = name_node.text.decode("utf-8")
|
|
129
|
+
func_id = f"{current_parent_id}::{name}"
|
|
130
|
+
func_type = EntityType.METHOD if is_in_class else EntityType.FUNCTION
|
|
131
|
+
|
|
132
|
+
result.entities.append(
|
|
133
|
+
Entity(
|
|
134
|
+
id=func_id,
|
|
135
|
+
type=func_type,
|
|
136
|
+
name=name,
|
|
137
|
+
path=file_info.relative_path,
|
|
138
|
+
parent_id=current_parent_id,
|
|
139
|
+
start_line=node.start_point[0] + 1,
|
|
140
|
+
end_line=node.end_point[0] + 1,
|
|
141
|
+
)
|
|
142
|
+
)
|
|
143
|
+
result.relationships.append(
|
|
144
|
+
Relationship(
|
|
145
|
+
source_id=current_parent_id,
|
|
146
|
+
target_id=func_id,
|
|
147
|
+
type=RelationshipType.CONTAINS,
|
|
148
|
+
)
|
|
149
|
+
)
|
|
150
|
+
for child in node.children:
|
|
151
|
+
walk(child, func_id, is_in_class=False)
|
|
152
|
+
return
|
|
153
|
+
|
|
154
|
+
# Extract Calls
|
|
155
|
+
elif node.type == "call":
|
|
156
|
+
func_node = node.child_by_field_name("function")
|
|
157
|
+
if func_node:
|
|
158
|
+
target_name = func_node.text.decode("utf-8")
|
|
159
|
+
if "." in target_name:
|
|
160
|
+
target_name = target_name.split(".")[-1]
|
|
161
|
+
result.raw_calls.append(
|
|
162
|
+
RawCall(
|
|
163
|
+
caller_id=current_parent_id,
|
|
164
|
+
target_name=target_name,
|
|
165
|
+
source_file=file_info.relative_path,
|
|
166
|
+
line=node.start_point[0] + 1,
|
|
167
|
+
)
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
for child in node.children:
|
|
171
|
+
walk(child, current_parent_id, is_in_class)
|
|
172
|
+
|
|
173
|
+
walk(tree.root_node, file_id)
|
|
174
|
+
return result
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"""TypeScript structural extractor.
|
|
2
|
+
|
|
3
|
+
Walks a TypeScript tree-sitter AST to extract entities, relationships,
|
|
4
|
+
and raw calls.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import tree_sitter
|
|
10
|
+
|
|
11
|
+
from structural_engine.parser.extractors.base import ExtractionResult, LanguageAdapter
|
|
12
|
+
from structural_engine.parser.models import (
|
|
13
|
+
Entity,
|
|
14
|
+
EntityType,
|
|
15
|
+
FileInfo,
|
|
16
|
+
RawCall,
|
|
17
|
+
Relationship,
|
|
18
|
+
RelationshipType,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class TypeScriptAdapter(LanguageAdapter):
|
|
23
|
+
"""AST extractor for TypeScript."""
|
|
24
|
+
|
|
25
|
+
def extract(
|
|
26
|
+
self, file_info: FileInfo, tree: tree_sitter.Tree, source_bytes: bytes
|
|
27
|
+
) -> ExtractionResult:
|
|
28
|
+
result = ExtractionResult()
|
|
29
|
+
|
|
30
|
+
file_id = file_info.relative_path
|
|
31
|
+
parent_dir = file_info.relative_path.rsplit("/", 1)[0] if "/" in file_info.relative_path else "repo"
|
|
32
|
+
file_entity = Entity(
|
|
33
|
+
id=file_id,
|
|
34
|
+
type=EntityType.FILE,
|
|
35
|
+
name=file_info.absolute_path.name,
|
|
36
|
+
path=file_info.relative_path,
|
|
37
|
+
parent_id=parent_dir,
|
|
38
|
+
start_line=1,
|
|
39
|
+
end_line=source_bytes.count(b"\n") + 1,
|
|
40
|
+
)
|
|
41
|
+
result.entities.append(file_entity)
|
|
42
|
+
|
|
43
|
+
def walk(node: tree_sitter.Node, current_parent_id: str, is_in_class: bool = False):
|
|
44
|
+
# Extract Imports
|
|
45
|
+
if node.type == "import_statement":
|
|
46
|
+
source_node = node.child_by_field_name("source")
|
|
47
|
+
if source_node:
|
|
48
|
+
imported_module = source_node.text.decode("utf-8").strip("'\"")
|
|
49
|
+
result.relationships.append(
|
|
50
|
+
Relationship(
|
|
51
|
+
source_id=file_id,
|
|
52
|
+
target_id=imported_module,
|
|
53
|
+
type=RelationshipType.IMPORTS,
|
|
54
|
+
)
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
# Extract Classes / Interfaces
|
|
58
|
+
elif node.type in ("class_declaration", "interface_declaration"):
|
|
59
|
+
name_node = node.child_by_field_name("name")
|
|
60
|
+
if name_node:
|
|
61
|
+
name = name_node.text.decode("utf-8")
|
|
62
|
+
entity_id = f"{current_parent_id}::{name}"
|
|
63
|
+
|
|
64
|
+
entity_type = EntityType.INTERFACE if node.type == "interface_declaration" else EntityType.CLASS
|
|
65
|
+
|
|
66
|
+
result.entities.append(
|
|
67
|
+
Entity(
|
|
68
|
+
id=entity_id,
|
|
69
|
+
type=entity_type,
|
|
70
|
+
name=name,
|
|
71
|
+
path=file_info.relative_path,
|
|
72
|
+
parent_id=current_parent_id,
|
|
73
|
+
start_line=node.start_point[0] + 1,
|
|
74
|
+
end_line=node.end_point[0] + 1,
|
|
75
|
+
)
|
|
76
|
+
)
|
|
77
|
+
result.relationships.append(
|
|
78
|
+
Relationship(
|
|
79
|
+
source_id=current_parent_id,
|
|
80
|
+
target_id=entity_id,
|
|
81
|
+
type=RelationshipType.CONTAINS,
|
|
82
|
+
)
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
for child in node.children:
|
|
86
|
+
if child.type == "class_heritage":
|
|
87
|
+
for heritage_child in child.children:
|
|
88
|
+
if heritage_child.type == "extends_clause":
|
|
89
|
+
for gchild in heritage_child.children:
|
|
90
|
+
if gchild.type == "identifier" or gchild.type == "type_identifier":
|
|
91
|
+
result.relationships.append(
|
|
92
|
+
Relationship(
|
|
93
|
+
source_id=entity_id,
|
|
94
|
+
target_id=gchild.text.decode("utf-8"),
|
|
95
|
+
type=RelationshipType.INHERITS,
|
|
96
|
+
)
|
|
97
|
+
)
|
|
98
|
+
elif heritage_child.type == "implements_clause":
|
|
99
|
+
for gchild in heritage_child.children:
|
|
100
|
+
if gchild.type == "type_identifier" or gchild.type == "identifier":
|
|
101
|
+
result.relationships.append(
|
|
102
|
+
Relationship(
|
|
103
|
+
source_id=entity_id,
|
|
104
|
+
target_id=gchild.text.decode("utf-8"),
|
|
105
|
+
type=RelationshipType.IMPLEMENTS,
|
|
106
|
+
)
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
for child in node.children:
|
|
110
|
+
walk(child, entity_id, is_in_class=(entity_type == EntityType.CLASS))
|
|
111
|
+
return
|
|
112
|
+
|
|
113
|
+
# Extract Functions
|
|
114
|
+
elif node.type in ("function_declaration", "method_definition", "arrow_function"):
|
|
115
|
+
# Arrow functions might not have a direct 'name' field if assigned to variable,
|
|
116
|
+
# but let's stick to basics for V1
|
|
117
|
+
name_node = node.child_by_field_name("name")
|
|
118
|
+
if name_node:
|
|
119
|
+
name = name_node.text.decode("utf-8")
|
|
120
|
+
func_id = f"{current_parent_id}::{name}"
|
|
121
|
+
func_type = EntityType.METHOD if is_in_class or node.type == "method_definition" else EntityType.FUNCTION
|
|
122
|
+
|
|
123
|
+
result.entities.append(
|
|
124
|
+
Entity(
|
|
125
|
+
id=func_id,
|
|
126
|
+
type=func_type,
|
|
127
|
+
name=name,
|
|
128
|
+
path=file_info.relative_path,
|
|
129
|
+
parent_id=current_parent_id,
|
|
130
|
+
start_line=node.start_point[0] + 1,
|
|
131
|
+
end_line=node.end_point[0] + 1,
|
|
132
|
+
)
|
|
133
|
+
)
|
|
134
|
+
result.relationships.append(
|
|
135
|
+
Relationship(
|
|
136
|
+
source_id=current_parent_id,
|
|
137
|
+
target_id=func_id,
|
|
138
|
+
type=RelationshipType.CONTAINS,
|
|
139
|
+
)
|
|
140
|
+
)
|
|
141
|
+
for child in node.children:
|
|
142
|
+
walk(child, func_id, is_in_class=False)
|
|
143
|
+
return
|
|
144
|
+
|
|
145
|
+
# Extract Calls
|
|
146
|
+
elif node.type == "call_expression":
|
|
147
|
+
func_node = node.child_by_field_name("function")
|
|
148
|
+
if func_node:
|
|
149
|
+
target_name = func_node.text.decode("utf-8")
|
|
150
|
+
if "." in target_name:
|
|
151
|
+
target_name = target_name.split(".")[-1]
|
|
152
|
+
result.raw_calls.append(
|
|
153
|
+
RawCall(
|
|
154
|
+
caller_id=current_parent_id,
|
|
155
|
+
target_name=target_name,
|
|
156
|
+
source_file=file_info.relative_path,
|
|
157
|
+
line=node.start_point[0] + 1,
|
|
158
|
+
)
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
for child in node.children:
|
|
162
|
+
walk(child, current_parent_id, is_in_class)
|
|
163
|
+
|
|
164
|
+
walk(tree.root_node, file_id)
|
|
165
|
+
return result
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""Parser domain models.
|
|
2
|
+
|
|
3
|
+
All structural data types produced by the parser pipeline.
|
|
4
|
+
|
|
5
|
+
These models are internal to the Structural Engine. The Runtime never
|
|
6
|
+
imports them directly — it only sees the ``StructuralSnapshot`` indirectly
|
|
7
|
+
through result objects returned by the Engine's public interface.
|
|
8
|
+
|
|
9
|
+
Invariants
|
|
10
|
+
----------
|
|
11
|
+
- All models are frozen (immutable).
|
|
12
|
+
- ``Entity.id`` uses stable, path-based identifiers — never UUIDs.
|
|
13
|
+
- ``StructuralSnapshot`` contains no metadata, revisions, or timestamps.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from dataclasses import dataclass, field
|
|
19
|
+
from enum import Enum, auto
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# ── Enums ─────────────────────────────────────────────────────────────
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class EntityType(Enum):
|
|
27
|
+
"""Classification of structural entities discovered in a repository."""
|
|
28
|
+
|
|
29
|
+
REPOSITORY = auto()
|
|
30
|
+
DIRECTORY = auto()
|
|
31
|
+
FILE = auto()
|
|
32
|
+
CLASS = auto()
|
|
33
|
+
INTERFACE = auto()
|
|
34
|
+
FUNCTION = auto()
|
|
35
|
+
METHOD = auto()
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class RelationshipType(Enum):
|
|
39
|
+
"""Classification of structural relationships between entities."""
|
|
40
|
+
|
|
41
|
+
CONTAINS = auto()
|
|
42
|
+
IMPORTS = auto()
|
|
43
|
+
INHERITS = auto()
|
|
44
|
+
IMPLEMENTS = auto()
|
|
45
|
+
CALLS = auto()
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class Language(Enum):
|
|
49
|
+
"""Supported programming languages for structural parsing.
|
|
50
|
+
|
|
51
|
+
V1 supports Python, TypeScript, and JavaScript.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
PYTHON = auto()
|
|
55
|
+
TYPESCRIPT = auto()
|
|
56
|
+
JAVASCRIPT = auto()
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
# ── Extension → Language mapping ──────────────────────────────────────
|
|
60
|
+
|
|
61
|
+
EXTENSION_LANGUAGE_MAP: dict[str, Language] = {
|
|
62
|
+
".py": Language.PYTHON,
|
|
63
|
+
".ts": Language.TYPESCRIPT,
|
|
64
|
+
".tsx": Language.TYPESCRIPT,
|
|
65
|
+
".js": Language.JAVASCRIPT,
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# ── Data models ───────────────────────────────────────────────────────
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@dataclass(frozen=True)
|
|
73
|
+
class FileInfo:
|
|
74
|
+
"""A discovered source file with its resolved language.
|
|
75
|
+
|
|
76
|
+
Produced by file discovery, consumed by the tree-sitter parsing stage.
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
absolute_path: Path
|
|
80
|
+
"""Absolute path to the source file on disk."""
|
|
81
|
+
|
|
82
|
+
relative_path: str
|
|
83
|
+
"""Path relative to repository root, using forward slashes (e.g. ``src/auth.py``)."""
|
|
84
|
+
|
|
85
|
+
language: Language
|
|
86
|
+
"""Detected programming language."""
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@dataclass(frozen=True)
|
|
90
|
+
class Entity:
|
|
91
|
+
"""A structural entity extracted from the repository.
|
|
92
|
+
|
|
93
|
+
Entities are identified by stable, path-based string IDs that are
|
|
94
|
+
deterministic across machines and runs.
|
|
95
|
+
|
|
96
|
+
ID Examples
|
|
97
|
+
-----------
|
|
98
|
+
::
|
|
99
|
+
|
|
100
|
+
repo
|
|
101
|
+
src
|
|
102
|
+
src/auth.py
|
|
103
|
+
src/auth.py::verify_token
|
|
104
|
+
src/auth.py::UserService
|
|
105
|
+
src/auth.py::UserService::create_user
|
|
106
|
+
"""
|
|
107
|
+
|
|
108
|
+
id: str
|
|
109
|
+
"""Stable path-based identifier. Never a UUID."""
|
|
110
|
+
|
|
111
|
+
type: EntityType
|
|
112
|
+
"""Structural classification of this entity."""
|
|
113
|
+
|
|
114
|
+
name: str
|
|
115
|
+
"""Human-readable name (e.g. ``verify_token``, ``UserService``)."""
|
|
116
|
+
|
|
117
|
+
path: str
|
|
118
|
+
"""Relative file path within the repository (forward slashes)."""
|
|
119
|
+
|
|
120
|
+
parent_id: str | None
|
|
121
|
+
"""ID of the containing entity, or *None* for the repository root."""
|
|
122
|
+
|
|
123
|
+
start_line: int
|
|
124
|
+
"""1-indexed start line in the source file. 0 for non-file entities."""
|
|
125
|
+
|
|
126
|
+
end_line: int
|
|
127
|
+
"""1-indexed end line in the source file. 0 for non-file entities."""
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@dataclass(frozen=True)
|
|
131
|
+
class Relationship:
|
|
132
|
+
"""A directed structural relationship between two entities.
|
|
133
|
+
|
|
134
|
+
Relationships are matched for diffing using the composite key
|
|
135
|
+
``(source_id, target_id, type)``.
|
|
136
|
+
"""
|
|
137
|
+
|
|
138
|
+
source_id: str
|
|
139
|
+
"""ID of the originating entity."""
|
|
140
|
+
|
|
141
|
+
target_id: str
|
|
142
|
+
"""ID of the target entity."""
|
|
143
|
+
|
|
144
|
+
type: RelationshipType
|
|
145
|
+
"""Classification of this relationship."""
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
@dataclass(frozen=True)
|
|
149
|
+
class RawCall:
|
|
150
|
+
"""An unresolved function/method call extracted from source code.
|
|
151
|
+
|
|
152
|
+
Raw calls are an intermediate representation produced by call
|
|
153
|
+
extraction and consumed by the call resolver. They do not appear
|
|
154
|
+
in the final snapshot.
|
|
155
|
+
"""
|
|
156
|
+
|
|
157
|
+
caller_id: str
|
|
158
|
+
"""Stable ID of the entity that makes the call."""
|
|
159
|
+
|
|
160
|
+
target_name: str
|
|
161
|
+
"""Unresolved name of the called function/method."""
|
|
162
|
+
|
|
163
|
+
source_file: str
|
|
164
|
+
"""Relative path to the source file."""
|
|
165
|
+
|
|
166
|
+
line: int
|
|
167
|
+
"""1-indexed line number of the call site."""
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
@dataclass(frozen=True)
|
|
171
|
+
class StructuralSnapshot:
|
|
172
|
+
"""Complete structural truth of a repository at a point in time.
|
|
173
|
+
|
|
174
|
+
Contains all discovered entities and relationships.
|
|
175
|
+
No metadata, revisions, timestamps, or reports.
|
|
176
|
+
|
|
177
|
+
Sorting contract (guarantees deterministic output):
|
|
178
|
+
- ``entities`` sorted by ``id``
|
|
179
|
+
- ``relationships`` sorted by ``(source_id, target_id, type.name)``
|
|
180
|
+
"""
|
|
181
|
+
|
|
182
|
+
entities: tuple[Entity, ...] = field(default_factory=tuple)
|
|
183
|
+
"""All structural entities, sorted by ``id``."""
|
|
184
|
+
|
|
185
|
+
relationships: tuple[Relationship, ...] = field(default_factory=tuple)
|
|
186
|
+
"""All structural relationships, sorted by composite key."""
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""Parser — public interface.
|
|
2
|
+
|
|
3
|
+
The Parser is the observation subsystem of the Structural Engine.
|
|
4
|
+
It answers one question:
|
|
5
|
+
|
|
6
|
+
*What does the repository look like right now?*
|
|
7
|
+
|
|
8
|
+
Contract
|
|
9
|
+
--------
|
|
10
|
+
::
|
|
11
|
+
|
|
12
|
+
parse(paths: RepositoryPaths) -> StructuralSnapshot
|
|
13
|
+
|
|
14
|
+
Invariants
|
|
15
|
+
----------
|
|
16
|
+
- **Deterministic:** Identical repository states always produce identical
|
|
17
|
+
snapshots, regardless of machine, user, or OS.
|
|
18
|
+
- **Stateless:** Never reads ``state.yaml``, previous snapshots, reports,
|
|
19
|
+
logs, or knowledge. Current repository state is the only authority.
|
|
20
|
+
- **Side-effect free:** Performs no writes, no filesystem mutations,
|
|
21
|
+
no revision generation, no persistence.
|
|
22
|
+
- **No Brain awareness:** Accepts ``RepositoryPaths`` but only consumes
|
|
23
|
+
``paths.repo_root``. All other fields are ignored.
|
|
24
|
+
|
|
25
|
+
Pipeline (Phase 2A stub — discovery only)
|
|
26
|
+
-----------------------------------------
|
|
27
|
+
::
|
|
28
|
+
|
|
29
|
+
RepositoryPaths
|
|
30
|
+
↓
|
|
31
|
+
repo_root
|
|
32
|
+
↓
|
|
33
|
+
File Discovery
|
|
34
|
+
↓
|
|
35
|
+
Language Detection
|
|
36
|
+
↓
|
|
37
|
+
[Tree-sitter Parse] ← Phase 2B
|
|
38
|
+
↓
|
|
39
|
+
[Entity Extraction] ← Phase 2B
|
|
40
|
+
↓
|
|
41
|
+
[Relationship Extraction] ← Phase 2B
|
|
42
|
+
↓
|
|
43
|
+
[Call Resolution] ← Phase 2B
|
|
44
|
+
↓
|
|
45
|
+
StructuralSnapshot
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
from __future__ import annotations
|
|
49
|
+
|
|
50
|
+
import structlog
|
|
51
|
+
|
|
52
|
+
from runtime.repository.models import RepositoryPaths
|
|
53
|
+
from structural_engine.parser.discovery import discover_files
|
|
54
|
+
from structural_engine.parser.models import StructuralSnapshot, Entity, Relationship, RawCall, Language
|
|
55
|
+
|
|
56
|
+
logger = structlog.get_logger(__name__)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def parse(paths: RepositoryPaths) -> StructuralSnapshot:
|
|
60
|
+
"""Parse a repository and produce its structural snapshot.
|
|
61
|
+
|
|
62
|
+
This is the sole public entry point of the Parser subsystem.
|
|
63
|
+
Called by ``StructuralEngine.initialize()`` and
|
|
64
|
+
``StructuralEngine.sync()``.
|
|
65
|
+
|
|
66
|
+
Parameters
|
|
67
|
+
----------
|
|
68
|
+
paths : RepositoryPaths
|
|
69
|
+
Canonical 9-field path contract. Only ``paths.repo_root`` is
|
|
70
|
+
consumed by the parser.
|
|
71
|
+
|
|
72
|
+
Returns
|
|
73
|
+
-------
|
|
74
|
+
StructuralSnapshot
|
|
75
|
+
Complete structural truth of the repository.
|
|
76
|
+
Entities sorted by ``id``, relationships sorted by composite key.
|
|
77
|
+
|
|
78
|
+
Notes
|
|
79
|
+
-----
|
|
80
|
+
**Phase 2A stub:** Currently performs file discovery and language
|
|
81
|
+
detection only. Tree-sitter extraction, entity/relationship
|
|
82
|
+
building, and call resolution will be added in Phase 2B.
|
|
83
|
+
"""
|
|
84
|
+
repo_root = paths.repo_root
|
|
85
|
+
|
|
86
|
+
logger.info("parser.started", repo_root=str(repo_root))
|
|
87
|
+
|
|
88
|
+
# ── Stage 1: File Discovery + Language Detection ──────────────
|
|
89
|
+
files = discover_files(repo_root)
|
|
90
|
+
logger.info(
|
|
91
|
+
"parser.discovery_complete",
|
|
92
|
+
files_found=len(files),
|
|
93
|
+
languages={
|
|
94
|
+
lang.name: sum(1 for f in files if f.language == lang)
|
|
95
|
+
for lang in set(f.language for f in files)
|
|
96
|
+
},
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# ── Stages 2–7: Tree-sitter parsing, extraction, resolution ──
|
|
100
|
+
all_entities: list[Entity] = []
|
|
101
|
+
all_relationships: list[Relationship] = []
|
|
102
|
+
all_raw_calls: list[RawCall] = []
|
|
103
|
+
|
|
104
|
+
for file_info in files:
|
|
105
|
+
# 1. Get the language parser
|
|
106
|
+
try:
|
|
107
|
+
from structural_engine.parser.tree_sitter.registry import get_parser
|
|
108
|
+
ts_parser = get_parser(file_info.language)
|
|
109
|
+
except ValueError:
|
|
110
|
+
logger.warning("parser.unsupported_language", file=file_info.relative_path, language=file_info.language.name)
|
|
111
|
+
continue
|
|
112
|
+
|
|
113
|
+
# 2. Parse the source code
|
|
114
|
+
try:
|
|
115
|
+
source_bytes = file_info.absolute_path.read_bytes()
|
|
116
|
+
tree = ts_parser.parse(source_bytes)
|
|
117
|
+
except Exception as e:
|
|
118
|
+
logger.error("parser.parse_failed", file=file_info.relative_path, error=str(e))
|
|
119
|
+
continue
|
|
120
|
+
|
|
121
|
+
# 3. Extract structural facts
|
|
122
|
+
adapter = None
|
|
123
|
+
if file_info.language == Language.PYTHON:
|
|
124
|
+
from structural_engine.parser.languages.python.adapter import PythonAdapter
|
|
125
|
+
adapter = PythonAdapter()
|
|
126
|
+
elif file_info.language == Language.JAVASCRIPT:
|
|
127
|
+
from structural_engine.parser.languages.javascript.adapter import JavaScriptAdapter
|
|
128
|
+
adapter = JavaScriptAdapter()
|
|
129
|
+
elif file_info.language == Language.TYPESCRIPT:
|
|
130
|
+
from structural_engine.parser.languages.typescript.adapter import TypeScriptAdapter
|
|
131
|
+
adapter = TypeScriptAdapter()
|
|
132
|
+
|
|
133
|
+
if adapter:
|
|
134
|
+
result = adapter.extract(file_info, tree, source_bytes)
|
|
135
|
+
all_entities.extend(result.entities)
|
|
136
|
+
all_relationships.extend(result.relationships)
|
|
137
|
+
all_raw_calls.extend(result.raw_calls)
|
|
138
|
+
|
|
139
|
+
# 4. Resolve calls
|
|
140
|
+
from structural_engine.parser.resolvers.calls import resolve_calls
|
|
141
|
+
resolved_calls = resolve_calls(all_entities, all_relationships, all_raw_calls)
|
|
142
|
+
all_relationships.extend(resolved_calls)
|
|
143
|
+
|
|
144
|
+
# 5. Sort for deterministic output
|
|
145
|
+
all_entities.sort(key=lambda e: e.id)
|
|
146
|
+
all_relationships.sort(key=lambda r: (r.source_id, r.target_id, r.type.name))
|
|
147
|
+
|
|
148
|
+
snapshot = StructuralSnapshot(
|
|
149
|
+
entities=tuple(all_entities),
|
|
150
|
+
relationships=tuple(all_relationships)
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
logger.info(
|
|
154
|
+
"parser.complete",
|
|
155
|
+
entities=len(snapshot.entities),
|
|
156
|
+
relationships=len(snapshot.relationships),
|
|
157
|
+
resolved_calls=len(resolved_calls),
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
return snapshot
|