flurryx-code-memory 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. code_memory/__init__.py +1 -0
  2. code_memory/claims/__init__.py +32 -0
  3. code_memory/claims/extractor.py +325 -0
  4. code_memory/claims/indexer.py +258 -0
  5. code_memory/claims/resolver.py +186 -0
  6. code_memory/claims/store.py +424 -0
  7. code_memory/cli.py +1192 -0
  8. code_memory/config.py +268 -0
  9. code_memory/embed/__init__.py +224 -0
  10. code_memory/embed/cache.py +204 -0
  11. code_memory/embed/m3.py +174 -0
  12. code_memory/embed/ollama.py +92 -0
  13. code_memory/embed/tei.py +106 -0
  14. code_memory/episodic/__init__.py +3 -0
  15. code_memory/episodic/sqlite_store.py +278 -0
  16. code_memory/extractor/__init__.py +3 -0
  17. code_memory/extractor/csproj.py +166 -0
  18. code_memory/extractor/dll.py +385 -0
  19. code_memory/extractor/gitignore.py +162 -0
  20. code_memory/extractor/nuget.py +275 -0
  21. code_memory/extractor/sanity.py +124 -0
  22. code_memory/extractor/sln.py +108 -0
  23. code_memory/extractor/treesitter.py +1172 -0
  24. code_memory/graph/__init__.py +3 -0
  25. code_memory/graph/falkor_store.py +740 -0
  26. code_memory/mcp_server.py +1816 -0
  27. code_memory/metrics.py +260 -0
  28. code_memory/orchestrator/__init__.py +13 -0
  29. code_memory/orchestrator/git_delta.py +211 -0
  30. code_memory/orchestrator/ingest_state.py +71 -0
  31. code_memory/orchestrator/pipeline.py +1478 -0
  32. code_memory/orchestrator/reset.py +130 -0
  33. code_memory/orchestrator/resolver.py +825 -0
  34. code_memory/orchestrator/retrieve.py +505 -0
  35. code_memory/resilience.py +73 -0
  36. code_memory/sync/__init__.py +20 -0
  37. code_memory/sync/autostart/__init__.py +42 -0
  38. code_memory/sync/autostart/base.py +106 -0
  39. code_memory/sync/autostart/launchd.py +115 -0
  40. code_memory/sync/autostart/schtasks.py +155 -0
  41. code_memory/sync/autostart/systemd.py +113 -0
  42. code_memory/sync/hooks.py +164 -0
  43. code_memory/sync/safety.py +65 -0
  44. code_memory/sync/snapshot.py +461 -0
  45. code_memory/sync/store.py +399 -0
  46. code_memory/sync/sync.py +405 -0
  47. code_memory/sync/watcher.py +320 -0
  48. code_memory/vector/__init__.py +3 -0
  49. code_memory/vector/qdrant_store.py +302 -0
  50. flurryx_code_memory-0.4.0.dist-info/METADATA +26 -0
  51. flurryx_code_memory-0.4.0.dist-info/RECORD +53 -0
  52. flurryx_code_memory-0.4.0.dist-info/WHEEL +4 -0
  53. flurryx_code_memory-0.4.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,166 @@
1
+ """Minimal `.csproj` parser — enough to populate Project graph nodes.
2
+
3
+ We deliberately don't try to be MSBuild. Real evaluation would need to
4
+ expand properties, follow `<Import>` chains, conditionalise on
5
+ configurations, etc. Almost none of that matters for "which projects
6
+ reference which, and which NuGet packages do they pull in" — which is
7
+ the question the graph needs to answer for cross-project navigation.
8
+
9
+ Anything we can't statically extract (PackageReference Update,
10
+ ProjectReference behind a property, MSBuild-evaluated paths) is
11
+ skipped. The output is a best-effort snapshot, not a build plan.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import logging
17
+ from dataclasses import dataclass, field
18
+ from pathlib import Path
19
+ from xml.etree import ElementTree as ET
20
+
21
+ log = logging.getLogger(__name__)
22
+
23
+
24
+ # Modern SDK-style csprojs ship with no XML namespace; legacy
25
+ # (pre-2017) ones use http://schemas.microsoft.com/developer/msbuild/2003.
26
+ # Strip namespaces on parse so both layouts feed the same selectors.
27
+ def _strip_ns(tag: str) -> str:
28
+ if "}" in tag:
29
+ return tag.split("}", 1)[1]
30
+ return tag
31
+
32
+
33
+ def _local_iter(root: ET.Element, name: str):
34
+ """Iterate descendants with the given local name, ignoring XML namespace."""
35
+ for el in root.iter():
36
+ if _strip_ns(el.tag) == name:
37
+ yield el
38
+
39
+
40
+ @dataclass(frozen=True)
41
+ class PackageRef:
42
+ name: str
43
+ version: str | None
44
+
45
+
46
+ @dataclass
47
+ class CsprojInfo:
48
+ """One project's externally-visible structure."""
49
+
50
+ path: str
51
+ name: str
52
+ assembly_name: str | None = None
53
+ target_framework: str | None = None
54
+ project_references: list[str] = field(default_factory=list) # absolute paths
55
+ package_references: list[PackageRef] = field(default_factory=list)
56
+ sdk_style: bool = True
57
+
58
+
59
+ def parse_csproj(csproj_path: str | Path) -> CsprojInfo | None:
60
+ """Parse a single `.csproj` (or `.fsproj` / `.vbproj`) file.
61
+
62
+ Returns ``None`` when the file isn't valid XML — the .NET tooling
63
+ can technically accept comment-only or empty files in some
64
+ scenarios; we'd rather skip than crash the ingest.
65
+ """
66
+ p = Path(csproj_path).resolve()
67
+ try:
68
+ tree = ET.parse(p)
69
+ except (ET.ParseError, OSError) as e:
70
+ log.warning("csproj: skipping %s — %s", p, e)
71
+ return None
72
+
73
+ root = tree.getroot()
74
+ sdk_style = root.attrib.get("Sdk") is not None or _strip_ns(root.tag) == "Project"
75
+
76
+ info = CsprojInfo(
77
+ path=str(p),
78
+ name=p.stem,
79
+ sdk_style=sdk_style,
80
+ )
81
+
82
+ # Assembly name — falls back to project filename per MSBuild defaults.
83
+ for el in _local_iter(root, "AssemblyName"):
84
+ if el.text:
85
+ info.assembly_name = el.text.strip()
86
+ break
87
+ if info.assembly_name is None:
88
+ info.assembly_name = info.name
89
+
90
+ # Target framework — prefer <TargetFramework>, fall back to
91
+ # <TargetFrameworks> (multi-target; keep the raw list).
92
+ for el in _local_iter(root, "TargetFramework"):
93
+ if el.text:
94
+ info.target_framework = el.text.strip()
95
+ break
96
+ if info.target_framework is None:
97
+ for el in _local_iter(root, "TargetFrameworks"):
98
+ if el.text:
99
+ info.target_framework = el.text.strip()
100
+ break
101
+
102
+ base_dir = p.parent
103
+
104
+ # ProjectReference Include="..\Foo\Foo.csproj"
105
+ for el in _local_iter(root, "ProjectReference"):
106
+ include = el.attrib.get("Include")
107
+ if not include:
108
+ continue
109
+ resolved = _resolve_project_path(base_dir, include)
110
+ if resolved is None:
111
+ continue
112
+ info.project_references.append(str(resolved))
113
+
114
+ # PackageReference Include="Foo.Bar" Version="1.2.3"
115
+ seen_packages: set[str] = set()
116
+ for el in _local_iter(root, "PackageReference"):
117
+ name = el.attrib.get("Include") or el.attrib.get("Update")
118
+ if not name:
119
+ continue
120
+ if name in seen_packages:
121
+ continue
122
+ seen_packages.add(name)
123
+ version = el.attrib.get("Version")
124
+ if version is None:
125
+ # Some teams pin via <Version> child + Central Package Management.
126
+ child = el.find("./{*}Version") or el.find("Version")
127
+ if child is not None and child.text:
128
+ version = child.text.strip()
129
+ info.package_references.append(PackageRef(name=name, version=version))
130
+
131
+ return info
132
+
133
+
134
+ def _resolve_project_path(base_dir: Path, include: str) -> Path | None:
135
+ """Resolve an MSBuild ProjectReference include path.
136
+
137
+ Handles forward + backward slashes and bare relative paths. Skips
138
+ references whose path doesn't exist on disk so we never emit dead
139
+ Project nodes.
140
+ """
141
+ normalized = include.replace("\\", "/")
142
+ candidate = (base_dir / normalized).resolve()
143
+ if candidate.exists():
144
+ return candidate
145
+ return None
146
+
147
+
148
+ # Project file extensions worth walking. Includes F# and VB so a
149
+ # polyglot solution doesn't lose half its project graph.
150
+ _PROJECT_FILE_SUFFIXES = (".csproj", ".fsproj", ".vbproj")
151
+
152
+
153
+ def walk_csprojs(root: str | Path) -> list[CsprojInfo]:
154
+ """Walk ``root`` for project files and return parsed ``CsprojInfo``."""
155
+ out: list[CsprojInfo] = []
156
+ root_path = Path(root).resolve()
157
+ for ext in _PROJECT_FILE_SUFFIXES:
158
+ for p in root_path.rglob(f"*{ext}"):
159
+ # Skip obvious build outputs to keep the project graph
160
+ # tight; these don't reflect source structure.
161
+ if any(part in {"bin", "obj", "node_modules"} for part in p.parts):
162
+ continue
163
+ info = parse_csproj(p)
164
+ if info is not None:
165
+ out.append(info)
166
+ return out
@@ -0,0 +1,385 @@
1
+ """Read .NET assembly metadata from PE files.
2
+
3
+ Implementation: pure-Python via ``dnfile`` (read-only PE/ECMA-335
4
+ parser). No .NET runtime required; we ingest binaries even on hosts
5
+ that have never had `dotnet` installed.
6
+
7
+ Scope of this module is deliberately narrow:
8
+
9
+ * one ``AssemblyInfo`` per DLL — identity (name + version) + flat list
10
+ of public ``TypeRef`` entries.
11
+ * private / internal / nested-non-public types are dropped at parse
12
+ time. Indexing implementation types would balloon the graph without
13
+ buying the agent anything; only the public surface is reachable
14
+ from other assemblies anyway.
15
+ * no member-level data (methods, properties, fields). The schema
16
+ decision in this PR is "Assembly + public Type only"; members can
17
+ be added later as a separate layer.
18
+
19
+ The reader is best-effort. Corrupt PE files, native DLLs that happen
20
+ to have a `.dll` extension, and assemblies without a CLR header are
21
+ all skipped quietly so a single bad file in `bin/` doesn't kill an
22
+ ingest.
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import logging
28
+ from dataclasses import dataclass, field
29
+ from pathlib import Path
30
+
31
+ log = logging.getLogger(__name__)
32
+
33
+
34
+ @dataclass(frozen=True)
35
+ class TypeRef:
36
+ """One public type exposed by an assembly."""
37
+
38
+ namespace: str
39
+ name: str
40
+ kind: str # "class" | "interface" | "struct" | "enum" | "delegate"
41
+ sealed: bool = False
42
+
43
+
44
+ @dataclass(frozen=True)
45
+ class MemberRef:
46
+ """One public member of a Type — methods only at this layer.
47
+
48
+ Kept narrow on purpose: properties + events + fields can be added
49
+ when an agent needs them, but methods are what call-site
50
+ resolution will actually disambiguate against. Listing every
51
+ private field of every NuGet type would balloon the graph for
52
+ no return.
53
+ """
54
+
55
+ name: str
56
+ kind: str # "method" | "constructor"
57
+ static: bool
58
+ params: int # parameter count (without ``this``)
59
+
60
+
61
+ @dataclass
62
+ class AssemblyInfo:
63
+ """Top-level result of parsing one DLL."""
64
+
65
+ path: str
66
+ name: str
67
+ version: str
68
+ public_key_token: str | None = None
69
+ types: list[TypeRef] = field(default_factory=list)
70
+
71
+ @property
72
+ def identity(self) -> str:
73
+ """Canonical key for the graph: ``Name, Version=X.Y.Z.W``.
74
+
75
+ Distinct versions of the same assembly are distinct nodes so a
76
+ repo using both `Foo 1.0` and `Foo 2.0` (via separate
77
+ ProjectReferences) doesn't accidentally collapse them.
78
+ """
79
+ return f"{self.name}, Version={self.version}"
80
+
81
+
82
+ def parse_assembly(dll_path: str | Path) -> AssemblyInfo | None:
83
+ """Parse one DLL into :class:`AssemblyInfo`. Returns ``None`` on failure.
84
+
85
+ Failures we silence:
86
+
87
+ * Native (non-CLR) DLLs — common in `bin/` for projects pulling in
88
+ C++ helpers. ``dnfile`` raises when the CLR header is missing.
89
+ * Corrupted / truncated PE files.
90
+ * Permission denied.
91
+
92
+ Failures we propagate: nothing — DLL parsing must not abort an
93
+ ingest. The caller (Pipeline) treats ``None`` as "skip silently".
94
+ """
95
+ p = Path(dll_path).resolve()
96
+ try:
97
+ # Import lazily so the rest of the package stays importable
98
+ # without the optional ``[dotnet]`` extra installed.
99
+ import dnfile
100
+ except ImportError:
101
+ log.warning(
102
+ "dll: dnfile not installed; install code-memory[dotnet] "
103
+ "to index .NET assemblies"
104
+ )
105
+ return None
106
+
107
+ try:
108
+ pe = dnfile.dnPE(str(p), fast_load=True)
109
+ pe.parse_data_directories()
110
+ except Exception as e: # noqa: BLE001 — dnfile raises many subclasses
111
+ log.debug("dll: failed to parse %s — %s", p, e)
112
+ return None
113
+
114
+ if pe.net is None or pe.net.mdtables is None:
115
+ return None # not a managed assembly
116
+
117
+ asm_table = pe.net.mdtables.Assembly
118
+ if asm_table is None or asm_table.num_rows == 0:
119
+ # `.dll` that's a netmodule, not a standalone assembly. Skip.
120
+ return None
121
+ asm_row = asm_table.rows[0]
122
+ name = _row_text(asm_row, "Name")
123
+ if not name:
124
+ return None
125
+ version = (
126
+ f"{asm_row.MajorVersion}.{asm_row.MinorVersion}."
127
+ f"{asm_row.BuildNumber}.{asm_row.RevisionNumber}"
128
+ )
129
+
130
+ info = AssemblyInfo(
131
+ path=str(p),
132
+ name=name,
133
+ version=version,
134
+ public_key_token=_pub_key_token(asm_row),
135
+ )
136
+
137
+ td_table = pe.net.mdtables.TypeDef
138
+ if td_table is not None:
139
+ for row in td_table.rows:
140
+ tref = _typedef_to_ref(row)
141
+ if tref is not None:
142
+ info.types.append(tref)
143
+ return info
144
+
145
+
146
+ # --------------------------------------------------------------- internals
147
+
148
+
149
+ def _row_text(row: object, attr: str) -> str | None:
150
+ """Pull a string field off an mdtable row; dnfile returns plain strs already."""
151
+ value = getattr(row, attr, None)
152
+ if value is None:
153
+ return None
154
+ if isinstance(value, bytes):
155
+ return value.decode("utf-8", errors="replace")
156
+ return str(value)
157
+
158
+
159
+ def _pub_key_token(asm_row: object) -> str | None:
160
+ """Return the public-key-token (lowercase hex) if the assembly has one.
161
+
162
+ The token is the last 8 bytes of the public key's SHA-1, byte-
163
+ reversed (the .NET convention). Returns ``None`` for unsigned
164
+ assemblies and any extraction failure — token is metadata, not
165
+ structural data, so silence is fine.
166
+ """
167
+ pk = getattr(asm_row, "PublicKey", None)
168
+ if pk is None:
169
+ return None
170
+ # dnfile wraps blobs in ``HeapItemBinary``. ``value`` is the
171
+ # straight bytes attribute; ``value_bytes`` is a method on newer
172
+ # releases. Try in order, treating callables as method-getters.
173
+ blob = b""
174
+ for attr in ("value", "value_bytes", "raw_data"):
175
+ v = getattr(pk, attr, None)
176
+ if v is None:
177
+ continue
178
+ if callable(v):
179
+ try:
180
+ v = v()
181
+ except Exception: # noqa: BLE001
182
+ continue
183
+ if isinstance(v, (bytes, bytearray)) and v:
184
+ blob = bytes(v)
185
+ break
186
+ if isinstance(pk, (bytes, bytearray)) and not blob:
187
+ blob = bytes(pk)
188
+ if not blob:
189
+ return None
190
+ import hashlib
191
+
192
+ return hashlib.sha1(blob).digest()[-8:][::-1].hex()
193
+
194
+
195
+ def _typedef_to_ref(row: object) -> TypeRef | None:
196
+ """Translate one TypeDef row into a public :class:`TypeRef`, or None.
197
+
198
+ Filters out:
199
+ * compiler-synthesised ``<Module>`` pseudo-type
200
+ * non-public / non-nested-public types
201
+ * nested types whose enclosing visibility is private — they're
202
+ noise for cross-assembly use even if their own flag is public.
203
+ We approximate via the TypeNamespace check: nested types live
204
+ under their enclosing type via the NestedClass table, which we
205
+ don't walk here; for the public-surface use case, top-level
206
+ types are the right cut.
207
+ """
208
+ namespace = _row_text(row, "TypeNamespace") or ""
209
+ name = _row_text(row, "TypeName") or ""
210
+ if not name or name == "<Module>":
211
+ return None
212
+
213
+ flags = getattr(row, "Flags", None)
214
+ if flags is None:
215
+ return None
216
+ # Visibility: keep public top-level (tdPublic) and public nested
217
+ # (tdNestedPublic). Drop everything else.
218
+ if not (
219
+ getattr(flags, "tdPublic", False) or getattr(flags, "tdNestedPublic", False)
220
+ ):
221
+ return None
222
+
223
+ kind = _classify_type(flags)
224
+ sealed = bool(getattr(flags, "tdSealed", False))
225
+
226
+ return TypeRef(namespace=namespace, name=name, kind=kind, sealed=sealed)
227
+
228
+
229
+ def _classify_type(flags: object) -> str:
230
+ """Derive a coarse kind from TypeDef flags + parent (best-effort).
231
+
232
+ Real-precise kind classification needs the BaseType pointer
233
+ (e.g. inherits ``System.Enum`` ⇒ enum, ``System.Delegate`` ⇒
234
+ delegate). We don't walk that here — coarse ``class`` /
235
+ ``interface`` / ``struct`` is enough for "what types does this
236
+ assembly expose" answers. ``enum`` and ``delegate`` get folded
237
+ into ``class`` and ``struct`` respectively.
238
+ """
239
+ if getattr(flags, "tdInterface", False):
240
+ return "interface"
241
+ # Layout flags hint at value types. tdSequentialLayout /
242
+ # tdExplicitLayout typically mean a struct.
243
+ if getattr(flags, "tdSequentialLayout", False) or getattr(
244
+ flags, "tdExplicitLayout", False
245
+ ):
246
+ return "struct"
247
+ return "class"
248
+
249
+
250
+ # --------------------------------------------------------------- batch
251
+
252
+
253
+ def walk_dlls(paths: list[str | Path]) -> list[AssemblyInfo]:
254
+ """Parse a precomputed list of DLL paths, skipping failures.
255
+
256
+ Caller is responsible for path resolution (the NuGet / output-dir
257
+ walker lives in ``code_memory.extractor.nuget``); this helper just
258
+ fans the parse out so the pipeline stays linear.
259
+ """
260
+ out: list[AssemblyInfo] = []
261
+ for p in paths:
262
+ info = parse_assembly(p)
263
+ if info is not None:
264
+ out.append(info)
265
+ return out
266
+
267
+
268
+ # --------------------------------------------------------------- members (on-demand)
269
+
270
+
271
+ def parse_type_members(
272
+ dll_path: str | Path,
273
+ namespace: str,
274
+ name: str,
275
+ ) -> list[MemberRef] | None:
276
+ """Return the public methods declared on ``namespace.name`` in ``dll_path``.
277
+
278
+ Read-once, no caching — designed to back an MCP tool that queries
279
+ members lazily rather than bulk-indexing every member of every
280
+ referenced assembly (which would multiply the graph by 50-100x).
281
+
282
+ Returns:
283
+ * a list (possibly empty for a type with no public methods),
284
+ * ``None`` when the assembly can't be parsed, the type isn't
285
+ found, or dnfile isn't installed.
286
+ """
287
+ p = Path(dll_path).resolve()
288
+ try:
289
+ import dnfile
290
+ except ImportError:
291
+ return None
292
+ try:
293
+ pe = dnfile.dnPE(str(p), fast_load=True)
294
+ pe.parse_data_directories()
295
+ except Exception: # noqa: BLE001
296
+ return None
297
+ if pe.net is None or pe.net.mdtables is None:
298
+ return None
299
+
300
+ td_table = pe.net.mdtables.TypeDef
301
+ if td_table is None:
302
+ return None
303
+
304
+ target_row = None
305
+ target_idx = None
306
+ for i, row in enumerate(td_table.rows):
307
+ if _row_text(row, "TypeName") == name and (_row_text(row, "TypeNamespace") or "") == namespace:
308
+ target_row = row
309
+ target_idx = i
310
+ break
311
+ if target_row is None or target_idx is None:
312
+ return None
313
+
314
+ methods = _members_for_type(td_table, target_row, target_idx)
315
+ return methods
316
+
317
+
318
+ def _members_for_type(
319
+ td_table: object, row: object, idx: int
320
+ ) -> list[MemberRef]:
321
+ """Return public methods declared directly on this TypeDef.
322
+
323
+ Methods inherited from base types are NOT listed — the row's
324
+ MethodList only contains declarations local to the type. Adding
325
+ inherited members requires walking the BaseType pointer chain,
326
+ which we skip for the same balloon-the-graph reason as bulk
327
+ members.
328
+ """
329
+ method_list = getattr(row, "MethodList", None)
330
+ if not method_list:
331
+ return []
332
+
333
+ # The next TypeDef row's MethodList tells us where this row's
334
+ # methods end. dnfile resolves the inclusive range for us via the
335
+ # MDTableIndex pointers — each entry is one MethodDef row.
336
+ out: list[MemberRef] = []
337
+ seen: set[tuple[str, int, bool]] = set()
338
+ for idx_ref in method_list:
339
+ try:
340
+ method_row = idx_ref.table.rows[idx_ref.row_index - 1]
341
+ except (AttributeError, IndexError):
342
+ continue
343
+ flags = getattr(method_row, "Flags", None)
344
+ if flags is None:
345
+ continue
346
+ if not getattr(flags, "mdPublic", False):
347
+ continue
348
+ name = _row_text(method_row, "Name") or ""
349
+ if not name:
350
+ continue
351
+ is_ctor = name in (".ctor", ".cctor")
352
+ param_count = _method_param_count(method_row)
353
+ static = bool(getattr(flags, "mdStatic", False))
354
+ key = (name, param_count, static)
355
+ if key in seen:
356
+ continue
357
+ seen.add(key)
358
+ out.append(
359
+ MemberRef(
360
+ name=name,
361
+ kind="constructor" if is_ctor else "method",
362
+ static=static,
363
+ params=param_count,
364
+ )
365
+ )
366
+ return out
367
+
368
+
369
+ def _method_param_count(method_row: object) -> int:
370
+ """Best-effort param count from the MethodDef's ParamList length.
371
+
372
+ The ParamList includes the return value slot for some signatures
373
+ (when the method has marshalling/attribute metadata on its
374
+ return). We can't disambiguate that without parsing the method
375
+ signature blob — which is out of scope here. Off-by-one on rare
376
+ methods is acceptable; the goal is overload disambiguation, not
377
+ exact reflection.
378
+ """
379
+ plist = getattr(method_row, "ParamList", None)
380
+ if plist is None:
381
+ return 0
382
+ try:
383
+ return len(plist)
384
+ except TypeError:
385
+ return 0