flurryx-code-memory 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_memory/__init__.py +1 -0
- code_memory/claims/__init__.py +32 -0
- code_memory/claims/extractor.py +325 -0
- code_memory/claims/indexer.py +258 -0
- code_memory/claims/resolver.py +186 -0
- code_memory/claims/store.py +424 -0
- code_memory/cli.py +1192 -0
- code_memory/config.py +268 -0
- code_memory/embed/__init__.py +224 -0
- code_memory/embed/cache.py +204 -0
- code_memory/embed/m3.py +174 -0
- code_memory/embed/ollama.py +92 -0
- code_memory/embed/tei.py +106 -0
- code_memory/episodic/__init__.py +3 -0
- code_memory/episodic/sqlite_store.py +278 -0
- code_memory/extractor/__init__.py +3 -0
- code_memory/extractor/csproj.py +166 -0
- code_memory/extractor/dll.py +385 -0
- code_memory/extractor/gitignore.py +162 -0
- code_memory/extractor/nuget.py +275 -0
- code_memory/extractor/sanity.py +124 -0
- code_memory/extractor/sln.py +108 -0
- code_memory/extractor/treesitter.py +1172 -0
- code_memory/graph/__init__.py +3 -0
- code_memory/graph/falkor_store.py +740 -0
- code_memory/mcp_server.py +1816 -0
- code_memory/metrics.py +260 -0
- code_memory/orchestrator/__init__.py +13 -0
- code_memory/orchestrator/git_delta.py +211 -0
- code_memory/orchestrator/ingest_state.py +71 -0
- code_memory/orchestrator/pipeline.py +1478 -0
- code_memory/orchestrator/reset.py +130 -0
- code_memory/orchestrator/resolver.py +825 -0
- code_memory/orchestrator/retrieve.py +505 -0
- code_memory/resilience.py +73 -0
- code_memory/sync/__init__.py +20 -0
- code_memory/sync/autostart/__init__.py +42 -0
- code_memory/sync/autostart/base.py +106 -0
- code_memory/sync/autostart/launchd.py +115 -0
- code_memory/sync/autostart/schtasks.py +155 -0
- code_memory/sync/autostart/systemd.py +113 -0
- code_memory/sync/hooks.py +164 -0
- code_memory/sync/safety.py +65 -0
- code_memory/sync/snapshot.py +461 -0
- code_memory/sync/store.py +399 -0
- code_memory/sync/sync.py +405 -0
- code_memory/sync/watcher.py +320 -0
- code_memory/vector/__init__.py +3 -0
- code_memory/vector/qdrant_store.py +302 -0
- flurryx_code_memory-0.4.0.dist-info/METADATA +26 -0
- flurryx_code_memory-0.4.0.dist-info/RECORD +53 -0
- flurryx_code_memory-0.4.0.dist-info/WHEEL +4 -0
- flurryx_code_memory-0.4.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
"""Minimal `.csproj` parser — enough to populate Project graph nodes.
|
|
2
|
+
|
|
3
|
+
We deliberately don't try to be MSBuild. Real evaluation would need to
|
|
4
|
+
expand properties, follow `<Import>` chains, conditionalise on
|
|
5
|
+
configurations, etc. Almost none of that matters for "which projects
|
|
6
|
+
reference which, and which NuGet packages do they pull in" — which is
|
|
7
|
+
the question the graph needs to answer for cross-project navigation.
|
|
8
|
+
|
|
9
|
+
Anything we can't statically extract (PackageReference Update,
|
|
10
|
+
ProjectReference behind a property, MSBuild-evaluated paths) is
|
|
11
|
+
skipped. The output is a best-effort snapshot, not a build plan.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import logging
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from xml.etree import ElementTree as ET
|
|
20
|
+
|
|
21
|
+
log = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# Modern SDK-style csprojs ship with no XML namespace; legacy
|
|
25
|
+
# (pre-2017) ones use http://schemas.microsoft.com/developer/msbuild/2003.
|
|
26
|
+
# Strip namespaces on parse so both layouts feed the same selectors.
|
|
27
|
+
def _strip_ns(tag: str) -> str:
|
|
28
|
+
if "}" in tag:
|
|
29
|
+
return tag.split("}", 1)[1]
|
|
30
|
+
return tag
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _local_iter(root: ET.Element, name: str):
|
|
34
|
+
"""Iterate descendants with the given local name, ignoring XML namespace."""
|
|
35
|
+
for el in root.iter():
|
|
36
|
+
if _strip_ns(el.tag) == name:
|
|
37
|
+
yield el
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass(frozen=True)
|
|
41
|
+
class PackageRef:
|
|
42
|
+
name: str
|
|
43
|
+
version: str | None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class CsprojInfo:
|
|
48
|
+
"""One project's externally-visible structure."""
|
|
49
|
+
|
|
50
|
+
path: str
|
|
51
|
+
name: str
|
|
52
|
+
assembly_name: str | None = None
|
|
53
|
+
target_framework: str | None = None
|
|
54
|
+
project_references: list[str] = field(default_factory=list) # absolute paths
|
|
55
|
+
package_references: list[PackageRef] = field(default_factory=list)
|
|
56
|
+
sdk_style: bool = True
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def parse_csproj(csproj_path: str | Path) -> CsprojInfo | None:
|
|
60
|
+
"""Parse a single `.csproj` (or `.fsproj` / `.vbproj`) file.
|
|
61
|
+
|
|
62
|
+
Returns ``None`` when the file isn't valid XML — the .NET tooling
|
|
63
|
+
can technically accept comment-only or empty files in some
|
|
64
|
+
scenarios; we'd rather skip than crash the ingest.
|
|
65
|
+
"""
|
|
66
|
+
p = Path(csproj_path).resolve()
|
|
67
|
+
try:
|
|
68
|
+
tree = ET.parse(p)
|
|
69
|
+
except (ET.ParseError, OSError) as e:
|
|
70
|
+
log.warning("csproj: skipping %s — %s", p, e)
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
root = tree.getroot()
|
|
74
|
+
sdk_style = root.attrib.get("Sdk") is not None or _strip_ns(root.tag) == "Project"
|
|
75
|
+
|
|
76
|
+
info = CsprojInfo(
|
|
77
|
+
path=str(p),
|
|
78
|
+
name=p.stem,
|
|
79
|
+
sdk_style=sdk_style,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# Assembly name — falls back to project filename per MSBuild defaults.
|
|
83
|
+
for el in _local_iter(root, "AssemblyName"):
|
|
84
|
+
if el.text:
|
|
85
|
+
info.assembly_name = el.text.strip()
|
|
86
|
+
break
|
|
87
|
+
if info.assembly_name is None:
|
|
88
|
+
info.assembly_name = info.name
|
|
89
|
+
|
|
90
|
+
# Target framework — prefer <TargetFramework>, fall back to
|
|
91
|
+
# <TargetFrameworks> (multi-target; keep the raw list).
|
|
92
|
+
for el in _local_iter(root, "TargetFramework"):
|
|
93
|
+
if el.text:
|
|
94
|
+
info.target_framework = el.text.strip()
|
|
95
|
+
break
|
|
96
|
+
if info.target_framework is None:
|
|
97
|
+
for el in _local_iter(root, "TargetFrameworks"):
|
|
98
|
+
if el.text:
|
|
99
|
+
info.target_framework = el.text.strip()
|
|
100
|
+
break
|
|
101
|
+
|
|
102
|
+
base_dir = p.parent
|
|
103
|
+
|
|
104
|
+
# ProjectReference Include="..\Foo\Foo.csproj"
|
|
105
|
+
for el in _local_iter(root, "ProjectReference"):
|
|
106
|
+
include = el.attrib.get("Include")
|
|
107
|
+
if not include:
|
|
108
|
+
continue
|
|
109
|
+
resolved = _resolve_project_path(base_dir, include)
|
|
110
|
+
if resolved is None:
|
|
111
|
+
continue
|
|
112
|
+
info.project_references.append(str(resolved))
|
|
113
|
+
|
|
114
|
+
# PackageReference Include="Foo.Bar" Version="1.2.3"
|
|
115
|
+
seen_packages: set[str] = set()
|
|
116
|
+
for el in _local_iter(root, "PackageReference"):
|
|
117
|
+
name = el.attrib.get("Include") or el.attrib.get("Update")
|
|
118
|
+
if not name:
|
|
119
|
+
continue
|
|
120
|
+
if name in seen_packages:
|
|
121
|
+
continue
|
|
122
|
+
seen_packages.add(name)
|
|
123
|
+
version = el.attrib.get("Version")
|
|
124
|
+
if version is None:
|
|
125
|
+
# Some teams pin via <Version> child + Central Package Management.
|
|
126
|
+
child = el.find("./{*}Version") or el.find("Version")
|
|
127
|
+
if child is not None and child.text:
|
|
128
|
+
version = child.text.strip()
|
|
129
|
+
info.package_references.append(PackageRef(name=name, version=version))
|
|
130
|
+
|
|
131
|
+
return info
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _resolve_project_path(base_dir: Path, include: str) -> Path | None:
|
|
135
|
+
"""Resolve an MSBuild ProjectReference include path.
|
|
136
|
+
|
|
137
|
+
Handles forward + backward slashes and bare relative paths. Skips
|
|
138
|
+
references whose path doesn't exist on disk so we never emit dead
|
|
139
|
+
Project nodes.
|
|
140
|
+
"""
|
|
141
|
+
normalized = include.replace("\\", "/")
|
|
142
|
+
candidate = (base_dir / normalized).resolve()
|
|
143
|
+
if candidate.exists():
|
|
144
|
+
return candidate
|
|
145
|
+
return None
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
# Project file extensions worth walking. Includes F# and VB so a
|
|
149
|
+
# polyglot solution doesn't lose half its project graph.
|
|
150
|
+
_PROJECT_FILE_SUFFIXES = (".csproj", ".fsproj", ".vbproj")
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def walk_csprojs(root: str | Path) -> list[CsprojInfo]:
|
|
154
|
+
"""Walk ``root`` for project files and return parsed ``CsprojInfo``."""
|
|
155
|
+
out: list[CsprojInfo] = []
|
|
156
|
+
root_path = Path(root).resolve()
|
|
157
|
+
for ext in _PROJECT_FILE_SUFFIXES:
|
|
158
|
+
for p in root_path.rglob(f"*{ext}"):
|
|
159
|
+
# Skip obvious build outputs to keep the project graph
|
|
160
|
+
# tight; these don't reflect source structure.
|
|
161
|
+
if any(part in {"bin", "obj", "node_modules"} for part in p.parts):
|
|
162
|
+
continue
|
|
163
|
+
info = parse_csproj(p)
|
|
164
|
+
if info is not None:
|
|
165
|
+
out.append(info)
|
|
166
|
+
return out
|
|
@@ -0,0 +1,385 @@
|
|
|
1
|
+
"""Read .NET assembly metadata from PE files.
|
|
2
|
+
|
|
3
|
+
Implementation: pure-Python via ``dnfile`` (read-only PE/ECMA-335
|
|
4
|
+
parser). No .NET runtime required; we ingest binaries even on hosts
|
|
5
|
+
that have never had `dotnet` installed.
|
|
6
|
+
|
|
7
|
+
Scope of this module is deliberately narrow:
|
|
8
|
+
|
|
9
|
+
* one ``AssemblyInfo`` per DLL — identity (name + version) + flat list
|
|
10
|
+
of public ``TypeRef`` entries.
|
|
11
|
+
* private / internal / nested-non-public types are dropped at parse
|
|
12
|
+
time. Indexing implementation types would balloon the graph without
|
|
13
|
+
buying the agent anything; only the public surface is reachable
|
|
14
|
+
from other assemblies anyway.
|
|
15
|
+
* no member-level data (methods, properties, fields). The schema
|
|
16
|
+
decision in this PR is "Assembly + public Type only"; members can
|
|
17
|
+
be added later as a separate layer.
|
|
18
|
+
|
|
19
|
+
The reader is best-effort. Corrupt PE files, native DLLs that happen
|
|
20
|
+
to have a `.dll` extension, and assemblies without a CLR header are
|
|
21
|
+
all skipped quietly so a single bad file in `bin/` doesn't kill an
|
|
22
|
+
ingest.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import logging
|
|
28
|
+
from dataclasses import dataclass, field
|
|
29
|
+
from pathlib import Path
|
|
30
|
+
|
|
31
|
+
log = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass(frozen=True)
|
|
35
|
+
class TypeRef:
|
|
36
|
+
"""One public type exposed by an assembly."""
|
|
37
|
+
|
|
38
|
+
namespace: str
|
|
39
|
+
name: str
|
|
40
|
+
kind: str # "class" | "interface" | "struct" | "enum" | "delegate"
|
|
41
|
+
sealed: bool = False
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass(frozen=True)
|
|
45
|
+
class MemberRef:
|
|
46
|
+
"""One public member of a Type — methods only at this layer.
|
|
47
|
+
|
|
48
|
+
Kept narrow on purpose: properties + events + fields can be added
|
|
49
|
+
when an agent needs them, but methods are what call-site
|
|
50
|
+
resolution will actually disambiguate against. Listing every
|
|
51
|
+
private field of every NuGet type would balloon the graph for
|
|
52
|
+
no return.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
name: str
|
|
56
|
+
kind: str # "method" | "constructor"
|
|
57
|
+
static: bool
|
|
58
|
+
params: int # parameter count (without ``this``)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@dataclass
|
|
62
|
+
class AssemblyInfo:
|
|
63
|
+
"""Top-level result of parsing one DLL."""
|
|
64
|
+
|
|
65
|
+
path: str
|
|
66
|
+
name: str
|
|
67
|
+
version: str
|
|
68
|
+
public_key_token: str | None = None
|
|
69
|
+
types: list[TypeRef] = field(default_factory=list)
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def identity(self) -> str:
|
|
73
|
+
"""Canonical key for the graph: ``Name, Version=X.Y.Z.W``.
|
|
74
|
+
|
|
75
|
+
Distinct versions of the same assembly are distinct nodes so a
|
|
76
|
+
repo using both `Foo 1.0` and `Foo 2.0` (via separate
|
|
77
|
+
ProjectReferences) doesn't accidentally collapse them.
|
|
78
|
+
"""
|
|
79
|
+
return f"{self.name}, Version={self.version}"
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def parse_assembly(dll_path: str | Path) -> AssemblyInfo | None:
|
|
83
|
+
"""Parse one DLL into :class:`AssemblyInfo`. Returns ``None`` on failure.
|
|
84
|
+
|
|
85
|
+
Failures we silence:
|
|
86
|
+
|
|
87
|
+
* Native (non-CLR) DLLs — common in `bin/` for projects pulling in
|
|
88
|
+
C++ helpers. ``dnfile`` raises when the CLR header is missing.
|
|
89
|
+
* Corrupted / truncated PE files.
|
|
90
|
+
* Permission denied.
|
|
91
|
+
|
|
92
|
+
Failures we propagate: nothing — DLL parsing must not abort an
|
|
93
|
+
ingest. The caller (Pipeline) treats ``None`` as "skip silently".
|
|
94
|
+
"""
|
|
95
|
+
p = Path(dll_path).resolve()
|
|
96
|
+
try:
|
|
97
|
+
# Import lazily so the rest of the package stays importable
|
|
98
|
+
# without the optional ``[dotnet]`` extra installed.
|
|
99
|
+
import dnfile
|
|
100
|
+
except ImportError:
|
|
101
|
+
log.warning(
|
|
102
|
+
"dll: dnfile not installed; install code-memory[dotnet] "
|
|
103
|
+
"to index .NET assemblies"
|
|
104
|
+
)
|
|
105
|
+
return None
|
|
106
|
+
|
|
107
|
+
try:
|
|
108
|
+
pe = dnfile.dnPE(str(p), fast_load=True)
|
|
109
|
+
pe.parse_data_directories()
|
|
110
|
+
except Exception as e: # noqa: BLE001 — dnfile raises many subclasses
|
|
111
|
+
log.debug("dll: failed to parse %s — %s", p, e)
|
|
112
|
+
return None
|
|
113
|
+
|
|
114
|
+
if pe.net is None or pe.net.mdtables is None:
|
|
115
|
+
return None # not a managed assembly
|
|
116
|
+
|
|
117
|
+
asm_table = pe.net.mdtables.Assembly
|
|
118
|
+
if asm_table is None or asm_table.num_rows == 0:
|
|
119
|
+
# `.dll` that's a netmodule, not a standalone assembly. Skip.
|
|
120
|
+
return None
|
|
121
|
+
asm_row = asm_table.rows[0]
|
|
122
|
+
name = _row_text(asm_row, "Name")
|
|
123
|
+
if not name:
|
|
124
|
+
return None
|
|
125
|
+
version = (
|
|
126
|
+
f"{asm_row.MajorVersion}.{asm_row.MinorVersion}."
|
|
127
|
+
f"{asm_row.BuildNumber}.{asm_row.RevisionNumber}"
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
info = AssemblyInfo(
|
|
131
|
+
path=str(p),
|
|
132
|
+
name=name,
|
|
133
|
+
version=version,
|
|
134
|
+
public_key_token=_pub_key_token(asm_row),
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
td_table = pe.net.mdtables.TypeDef
|
|
138
|
+
if td_table is not None:
|
|
139
|
+
for row in td_table.rows:
|
|
140
|
+
tref = _typedef_to_ref(row)
|
|
141
|
+
if tref is not None:
|
|
142
|
+
info.types.append(tref)
|
|
143
|
+
return info
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
# --------------------------------------------------------------- internals
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _row_text(row: object, attr: str) -> str | None:
|
|
150
|
+
"""Pull a string field off an mdtable row; dnfile returns plain strs already."""
|
|
151
|
+
value = getattr(row, attr, None)
|
|
152
|
+
if value is None:
|
|
153
|
+
return None
|
|
154
|
+
if isinstance(value, bytes):
|
|
155
|
+
return value.decode("utf-8", errors="replace")
|
|
156
|
+
return str(value)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _pub_key_token(asm_row: object) -> str | None:
|
|
160
|
+
"""Return the public-key-token (lowercase hex) if the assembly has one.
|
|
161
|
+
|
|
162
|
+
The token is the last 8 bytes of the public key's SHA-1, byte-
|
|
163
|
+
reversed (the .NET convention). Returns ``None`` for unsigned
|
|
164
|
+
assemblies and any extraction failure — token is metadata, not
|
|
165
|
+
structural data, so silence is fine.
|
|
166
|
+
"""
|
|
167
|
+
pk = getattr(asm_row, "PublicKey", None)
|
|
168
|
+
if pk is None:
|
|
169
|
+
return None
|
|
170
|
+
# dnfile wraps blobs in ``HeapItemBinary``. ``value`` is the
|
|
171
|
+
# straight bytes attribute; ``value_bytes`` is a method on newer
|
|
172
|
+
# releases. Try in order, treating callables as method-getters.
|
|
173
|
+
blob = b""
|
|
174
|
+
for attr in ("value", "value_bytes", "raw_data"):
|
|
175
|
+
v = getattr(pk, attr, None)
|
|
176
|
+
if v is None:
|
|
177
|
+
continue
|
|
178
|
+
if callable(v):
|
|
179
|
+
try:
|
|
180
|
+
v = v()
|
|
181
|
+
except Exception: # noqa: BLE001
|
|
182
|
+
continue
|
|
183
|
+
if isinstance(v, (bytes, bytearray)) and v:
|
|
184
|
+
blob = bytes(v)
|
|
185
|
+
break
|
|
186
|
+
if isinstance(pk, (bytes, bytearray)) and not blob:
|
|
187
|
+
blob = bytes(pk)
|
|
188
|
+
if not blob:
|
|
189
|
+
return None
|
|
190
|
+
import hashlib
|
|
191
|
+
|
|
192
|
+
return hashlib.sha1(blob).digest()[-8:][::-1].hex()
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _typedef_to_ref(row: object) -> TypeRef | None:
|
|
196
|
+
"""Translate one TypeDef row into a public :class:`TypeRef`, or None.
|
|
197
|
+
|
|
198
|
+
Filters out:
|
|
199
|
+
* compiler-synthesised ``<Module>`` pseudo-type
|
|
200
|
+
* non-public / non-nested-public types
|
|
201
|
+
* nested types whose enclosing visibility is private — they're
|
|
202
|
+
noise for cross-assembly use even if their own flag is public.
|
|
203
|
+
We approximate via the TypeNamespace check: nested types live
|
|
204
|
+
under their enclosing type via the NestedClass table, which we
|
|
205
|
+
don't walk here; for the public-surface use case, top-level
|
|
206
|
+
types are the right cut.
|
|
207
|
+
"""
|
|
208
|
+
namespace = _row_text(row, "TypeNamespace") or ""
|
|
209
|
+
name = _row_text(row, "TypeName") or ""
|
|
210
|
+
if not name or name == "<Module>":
|
|
211
|
+
return None
|
|
212
|
+
|
|
213
|
+
flags = getattr(row, "Flags", None)
|
|
214
|
+
if flags is None:
|
|
215
|
+
return None
|
|
216
|
+
# Visibility: keep public top-level (tdPublic) and public nested
|
|
217
|
+
# (tdNestedPublic). Drop everything else.
|
|
218
|
+
if not (
|
|
219
|
+
getattr(flags, "tdPublic", False) or getattr(flags, "tdNestedPublic", False)
|
|
220
|
+
):
|
|
221
|
+
return None
|
|
222
|
+
|
|
223
|
+
kind = _classify_type(flags)
|
|
224
|
+
sealed = bool(getattr(flags, "tdSealed", False))
|
|
225
|
+
|
|
226
|
+
return TypeRef(namespace=namespace, name=name, kind=kind, sealed=sealed)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def _classify_type(flags: object) -> str:
|
|
230
|
+
"""Derive a coarse kind from TypeDef flags + parent (best-effort).
|
|
231
|
+
|
|
232
|
+
Real-precise kind classification needs the BaseType pointer
|
|
233
|
+
(e.g. inherits ``System.Enum`` ⇒ enum, ``System.Delegate`` ⇒
|
|
234
|
+
delegate). We don't walk that here — coarse ``class`` /
|
|
235
|
+
``interface`` / ``struct`` is enough for "what types does this
|
|
236
|
+
assembly expose" answers. ``enum`` and ``delegate`` get folded
|
|
237
|
+
into ``class`` and ``struct`` respectively.
|
|
238
|
+
"""
|
|
239
|
+
if getattr(flags, "tdInterface", False):
|
|
240
|
+
return "interface"
|
|
241
|
+
# Layout flags hint at value types. tdSequentialLayout /
|
|
242
|
+
# tdExplicitLayout typically mean a struct.
|
|
243
|
+
if getattr(flags, "tdSequentialLayout", False) or getattr(
|
|
244
|
+
flags, "tdExplicitLayout", False
|
|
245
|
+
):
|
|
246
|
+
return "struct"
|
|
247
|
+
return "class"
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
# --------------------------------------------------------------- batch
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def walk_dlls(paths: list[str | Path]) -> list[AssemblyInfo]:
|
|
254
|
+
"""Parse a precomputed list of DLL paths, skipping failures.
|
|
255
|
+
|
|
256
|
+
Caller is responsible for path resolution (the NuGet / output-dir
|
|
257
|
+
walker lives in ``code_memory.extractor.nuget``); this helper just
|
|
258
|
+
fans the parse out so the pipeline stays linear.
|
|
259
|
+
"""
|
|
260
|
+
out: list[AssemblyInfo] = []
|
|
261
|
+
for p in paths:
|
|
262
|
+
info = parse_assembly(p)
|
|
263
|
+
if info is not None:
|
|
264
|
+
out.append(info)
|
|
265
|
+
return out
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
# --------------------------------------------------------------- members (on-demand)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def parse_type_members(
|
|
272
|
+
dll_path: str | Path,
|
|
273
|
+
namespace: str,
|
|
274
|
+
name: str,
|
|
275
|
+
) -> list[MemberRef] | None:
|
|
276
|
+
"""Return the public methods declared on ``namespace.name`` in ``dll_path``.
|
|
277
|
+
|
|
278
|
+
Read-once, no caching — designed to back an MCP tool that queries
|
|
279
|
+
members lazily rather than bulk-indexing every member of every
|
|
280
|
+
referenced assembly (which would multiply the graph by 50-100x).
|
|
281
|
+
|
|
282
|
+
Returns:
|
|
283
|
+
* a list (possibly empty for a type with no public methods),
|
|
284
|
+
* ``None`` when the assembly can't be parsed, the type isn't
|
|
285
|
+
found, or dnfile isn't installed.
|
|
286
|
+
"""
|
|
287
|
+
p = Path(dll_path).resolve()
|
|
288
|
+
try:
|
|
289
|
+
import dnfile
|
|
290
|
+
except ImportError:
|
|
291
|
+
return None
|
|
292
|
+
try:
|
|
293
|
+
pe = dnfile.dnPE(str(p), fast_load=True)
|
|
294
|
+
pe.parse_data_directories()
|
|
295
|
+
except Exception: # noqa: BLE001
|
|
296
|
+
return None
|
|
297
|
+
if pe.net is None or pe.net.mdtables is None:
|
|
298
|
+
return None
|
|
299
|
+
|
|
300
|
+
td_table = pe.net.mdtables.TypeDef
|
|
301
|
+
if td_table is None:
|
|
302
|
+
return None
|
|
303
|
+
|
|
304
|
+
target_row = None
|
|
305
|
+
target_idx = None
|
|
306
|
+
for i, row in enumerate(td_table.rows):
|
|
307
|
+
if _row_text(row, "TypeName") == name and (_row_text(row, "TypeNamespace") or "") == namespace:
|
|
308
|
+
target_row = row
|
|
309
|
+
target_idx = i
|
|
310
|
+
break
|
|
311
|
+
if target_row is None or target_idx is None:
|
|
312
|
+
return None
|
|
313
|
+
|
|
314
|
+
methods = _members_for_type(td_table, target_row, target_idx)
|
|
315
|
+
return methods
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def _members_for_type(
|
|
319
|
+
td_table: object, row: object, idx: int
|
|
320
|
+
) -> list[MemberRef]:
|
|
321
|
+
"""Return public methods declared directly on this TypeDef.
|
|
322
|
+
|
|
323
|
+
Methods inherited from base types are NOT listed — the row's
|
|
324
|
+
MethodList only contains declarations local to the type. Adding
|
|
325
|
+
inherited members requires walking the BaseType pointer chain,
|
|
326
|
+
which we skip for the same balloon-the-graph reason as bulk
|
|
327
|
+
members.
|
|
328
|
+
"""
|
|
329
|
+
method_list = getattr(row, "MethodList", None)
|
|
330
|
+
if not method_list:
|
|
331
|
+
return []
|
|
332
|
+
|
|
333
|
+
# The next TypeDef row's MethodList tells us where this row's
|
|
334
|
+
# methods end. dnfile resolves the inclusive range for us via the
|
|
335
|
+
# MDTableIndex pointers — each entry is one MethodDef row.
|
|
336
|
+
out: list[MemberRef] = []
|
|
337
|
+
seen: set[tuple[str, int, bool]] = set()
|
|
338
|
+
for idx_ref in method_list:
|
|
339
|
+
try:
|
|
340
|
+
method_row = idx_ref.table.rows[idx_ref.row_index - 1]
|
|
341
|
+
except (AttributeError, IndexError):
|
|
342
|
+
continue
|
|
343
|
+
flags = getattr(method_row, "Flags", None)
|
|
344
|
+
if flags is None:
|
|
345
|
+
continue
|
|
346
|
+
if not getattr(flags, "mdPublic", False):
|
|
347
|
+
continue
|
|
348
|
+
name = _row_text(method_row, "Name") or ""
|
|
349
|
+
if not name:
|
|
350
|
+
continue
|
|
351
|
+
is_ctor = name in (".ctor", ".cctor")
|
|
352
|
+
param_count = _method_param_count(method_row)
|
|
353
|
+
static = bool(getattr(flags, "mdStatic", False))
|
|
354
|
+
key = (name, param_count, static)
|
|
355
|
+
if key in seen:
|
|
356
|
+
continue
|
|
357
|
+
seen.add(key)
|
|
358
|
+
out.append(
|
|
359
|
+
MemberRef(
|
|
360
|
+
name=name,
|
|
361
|
+
kind="constructor" if is_ctor else "method",
|
|
362
|
+
static=static,
|
|
363
|
+
params=param_count,
|
|
364
|
+
)
|
|
365
|
+
)
|
|
366
|
+
return out
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def _method_param_count(method_row: object) -> int:
|
|
370
|
+
"""Best-effort param count from the MethodDef's ParamList length.
|
|
371
|
+
|
|
372
|
+
The ParamList includes the return value slot for some signatures
|
|
373
|
+
(when the method has marshalling/attribute metadata on its
|
|
374
|
+
return). We can't disambiguate that without parsing the method
|
|
375
|
+
signature blob — which is out of scope here. Off-by-one on rare
|
|
376
|
+
methods is acceptable; the goal is overload disambiguation, not
|
|
377
|
+
exact reflection.
|
|
378
|
+
"""
|
|
379
|
+
plist = getattr(method_row, "ParamList", None)
|
|
380
|
+
if plist is None:
|
|
381
|
+
return 0
|
|
382
|
+
try:
|
|
383
|
+
return len(plist)
|
|
384
|
+
except TypeError:
|
|
385
|
+
return 0
|