codemap-aimemory 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codemap_aimemory/__init__.py +5 -0
- codemap_aimemory/cli.py +157 -0
- codemap_aimemory/emitter.py +184 -0
- codemap_aimemory/enrich.py +113 -0
- codemap_aimemory/ids.py +63 -0
- codemap_aimemory/llm.py +205 -0
- codemap_aimemory-0.3.0.dist-info/METADATA +54 -0
- codemap_aimemory-0.3.0.dist-info/RECORD +10 -0
- codemap_aimemory-0.3.0.dist-info/WHEEL +4 -0
- codemap_aimemory-0.3.0.dist-info/entry_points.txt +5 -0
codemap_aimemory/cli.py
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
"""``codemap enrich`` subcommand — populate the LLM enrichment overlay.
|
|
2
|
+
|
|
3
|
+
Registered via the ``codemap.cli_commands`` entry-point group (introduced in
|
|
4
|
+
codemap-core 0.3.0). Core CLI discovers and mounts the subcommand at startup;
|
|
5
|
+
without ``codemap-aimemory`` installed it simply isn't there.
|
|
6
|
+
|
|
7
|
+
The command runs the deterministic L1 emitter is **not** affected — this
|
|
8
|
+
only writes the optional ``.ai-memory/enrichment/<sha1>.yml`` overlay files.
|
|
9
|
+
The next time the emitter runs (next ``codemap index``), it merges them into
|
|
10
|
+
``entities/functions.yml`` keyed by ``symbol_id``.
|
|
11
|
+
|
|
12
|
+
Configuration sources (in priority order, first wins):
|
|
13
|
+
|
|
14
|
+
1. CLI flags: ``--api-key``, ``--base-url``, ``--model``, ``--backend``
|
|
15
|
+
2. Environment variables:
|
|
16
|
+
* ``CODEMAP_LLM_API_KEY`` (fallbacks: ``ANTHROPIC_API_KEY`` /
|
|
17
|
+
``OPENAI_API_KEY``)
|
|
18
|
+
* ``CODEMAP_LLM_BASE_URL`` (fallback: ``OPENAI_BASE_URL`` /
|
|
19
|
+
``ANTHROPIC_BASE_URL``)
|
|
20
|
+
* ``CODEMAP_LLM_MODEL``
|
|
21
|
+
* ``CODEMAP_LLM_BACKEND``
|
|
22
|
+
3. Built-in defaults (model: ``gpt-4o-mini``; backend: ``openai``).
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
from datetime import UTC, datetime
|
|
28
|
+
from pathlib import Path
|
|
29
|
+
from typing import Annotated
|
|
30
|
+
|
|
31
|
+
import typer
|
|
32
|
+
|
|
33
|
+
from codemap_aimemory.enrich import enrich
|
|
34
|
+
from codemap_aimemory.llm import build_client, env_default
|
|
35
|
+
|
|
36
|
+
__all__ = ["register"]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def register(app: typer.Typer) -> None:
|
|
40
|
+
@app.command("enrich")
|
|
41
|
+
def enrich_command(
|
|
42
|
+
path: Annotated[
|
|
43
|
+
Path,
|
|
44
|
+
typer.Argument(
|
|
45
|
+
exists=True,
|
|
46
|
+
file_okay=False,
|
|
47
|
+
dir_okay=True,
|
|
48
|
+
resolve_path=True,
|
|
49
|
+
help="Project root containing ``.codemap/``.",
|
|
50
|
+
),
|
|
51
|
+
] = Path("."),
|
|
52
|
+
backend: Annotated[
|
|
53
|
+
str,
|
|
54
|
+
typer.Option(
|
|
55
|
+
"--backend",
|
|
56
|
+
envvar="CODEMAP_LLM_BACKEND",
|
|
57
|
+
help="LLM backend: ``openai`` (default, any OpenAI-compatible "
|
|
58
|
+
"endpoint), ``anthropic`` (native SDK), or ``ollama``.",
|
|
59
|
+
),
|
|
60
|
+
] = "openai",
|
|
61
|
+
model: Annotated[
|
|
62
|
+
str,
|
|
63
|
+
typer.Option(
|
|
64
|
+
"--model",
|
|
65
|
+
envvar="CODEMAP_LLM_MODEL",
|
|
66
|
+
help="Model name passed verbatim to the backend.",
|
|
67
|
+
),
|
|
68
|
+
] = "gpt-4o-mini",
|
|
69
|
+
api_key: Annotated[
|
|
70
|
+
str,
|
|
71
|
+
typer.Option(
|
|
72
|
+
"--api-key",
|
|
73
|
+
envvar="CODEMAP_LLM_API_KEY",
|
|
74
|
+
help="API key (also picked up from CODEMAP_LLM_API_KEY, "
|
|
75
|
+
"ANTHROPIC_API_KEY, or OPENAI_API_KEY).",
|
|
76
|
+
),
|
|
77
|
+
] = "",
|
|
78
|
+
base_url: Annotated[
|
|
79
|
+
str,
|
|
80
|
+
typer.Option(
|
|
81
|
+
"--base-url",
|
|
82
|
+
envvar="CODEMAP_LLM_BASE_URL",
|
|
83
|
+
help="Override the LLM API base URL (e.g. a self-hosted "
|
|
84
|
+
"OpenAI-compatible endpoint).",
|
|
85
|
+
),
|
|
86
|
+
] = "",
|
|
87
|
+
changed_only: Annotated[
|
|
88
|
+
bool,
|
|
89
|
+
typer.Option(
|
|
90
|
+
"--changed-only",
|
|
91
|
+
help="Skip symbols whose enrichment file already exists.",
|
|
92
|
+
),
|
|
93
|
+
] = False,
|
|
94
|
+
dry_run: Annotated[
|
|
95
|
+
bool,
|
|
96
|
+
typer.Option(
|
|
97
|
+
"--dry-run",
|
|
98
|
+
help="Report what would be enriched and exit without calling the LLM.",
|
|
99
|
+
),
|
|
100
|
+
] = False,
|
|
101
|
+
) -> None:
|
|
102
|
+
"""Generate LLM enrichment overlay files under ``.ai-memory/enrichment/``."""
|
|
103
|
+
from codemap.io.json_store import JsonStore # local import — IO layer
|
|
104
|
+
|
|
105
|
+
codemap_dir = path / ".codemap"
|
|
106
|
+
if not codemap_dir.exists():
|
|
107
|
+
typer.echo(
|
|
108
|
+
f"Error: no .codemap/ at {path}. Run `codemap index` first.",
|
|
109
|
+
err=True,
|
|
110
|
+
)
|
|
111
|
+
raise typer.Exit(code=2)
|
|
112
|
+
|
|
113
|
+
# Resolve credentials with env fallbacks beyond what envvar= covers.
|
|
114
|
+
resolved_key = api_key or env_default(
|
|
115
|
+
"CODEMAP_LLM_API_KEY", "ANTHROPIC_API_KEY", "OPENAI_API_KEY"
|
|
116
|
+
)
|
|
117
|
+
resolved_base = base_url or env_default(
|
|
118
|
+
"CODEMAP_LLM_BASE_URL", "OPENAI_BASE_URL", "ANTHROPIC_BASE_URL"
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
out_dir = path / ".ai-memory"
|
|
122
|
+
|
|
123
|
+
if dry_run:
|
|
124
|
+
with JsonStore.open(codemap_dir, mode="r") as store:
|
|
125
|
+
fn_count = sum(1 for s in store.iter_symbols() if s.kind in {"method", "function"})
|
|
126
|
+
typer.echo(
|
|
127
|
+
f"Would enrich {fn_count} function/method symbols using "
|
|
128
|
+
f"{backend}/{model}.\nOutput dir: {out_dir / 'enrichment'}"
|
|
129
|
+
)
|
|
130
|
+
return
|
|
131
|
+
|
|
132
|
+
if backend != "ollama" and not resolved_key:
|
|
133
|
+
typer.echo(
|
|
134
|
+
"Error: API key is required for non-Ollama backends. Set "
|
|
135
|
+
"--api-key or CODEMAP_LLM_API_KEY / OPENAI_API_KEY / "
|
|
136
|
+
"ANTHROPIC_API_KEY.",
|
|
137
|
+
err=True,
|
|
138
|
+
)
|
|
139
|
+
raise typer.Exit(code=2)
|
|
140
|
+
|
|
141
|
+
client = build_client(
|
|
142
|
+
backend=backend,
|
|
143
|
+
model=model,
|
|
144
|
+
api_key=resolved_key or "",
|
|
145
|
+
base_url=resolved_base,
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
generated_at = datetime.now(UTC).strftime("%Y-%m-%d")
|
|
149
|
+
with JsonStore.open(codemap_dir, mode="r") as store:
|
|
150
|
+
written = enrich(
|
|
151
|
+
store,
|
|
152
|
+
client,
|
|
153
|
+
out_dir,
|
|
154
|
+
generated_at=generated_at,
|
|
155
|
+
changed_only=changed_only,
|
|
156
|
+
)
|
|
157
|
+
typer.echo(f"Wrote {len(written)} enrichment file(s) under {out_dir / 'enrichment'}.")
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
"""AiMemoryEmitter — write the four-layer memory model's L1 layout atomically.
|
|
2
|
+
|
|
3
|
+
Reads a :class:`codemap.core.store.ReadOnlyStore` and emits, under
|
|
4
|
+
``<output_dir>/.ai-memory/`` (or the explicit ``output_dir`` if it already
|
|
5
|
+
ends in ``.ai-memory``):
|
|
6
|
+
|
|
7
|
+
* ``entities/functions.yml`` — fn-/cls- entities with calls/called_by/
|
|
8
|
+
related_tables, signature, line_range, change_count_90d, confidence
|
|
9
|
+
* ``entities/tables.yml`` — tbl- entities
|
|
10
|
+
* ``entities/files.yml`` — file-* entities
|
|
11
|
+
* ``relations/call-graph.yml`` — calls edges
|
|
12
|
+
* ``relations/table-relations.yml`` — accesses_table edges
|
|
13
|
+
* ``relations/rule-constraints.yml`` — empty placeholder (L2 owns)
|
|
14
|
+
|
|
15
|
+
If an ``enrichment/`` folder sits alongside the output, the emitter loads
|
|
16
|
+
``business_meaning`` / ``related_rules`` keyed by ``symbol_id`` and merges
|
|
17
|
+
them in — but never writes back to the core index. Per ADR-0013 / the
|
|
18
|
+
spec's two-layer separation, the deterministic index never carries
|
|
19
|
+
explanation text.
|
|
20
|
+
|
|
21
|
+
All writes are atomic (tmp + ``os.replace``) so an Agent reading
|
|
22
|
+
``.ai-memory/`` mid-rebuild never sees a partial tree.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import contextlib
|
|
28
|
+
import os
|
|
29
|
+
import tempfile
|
|
30
|
+
from collections import defaultdict
|
|
31
|
+
from pathlib import Path
|
|
32
|
+
from typing import Any, ClassVar
|
|
33
|
+
|
|
34
|
+
import yaml
|
|
35
|
+
|
|
36
|
+
from codemap.core.store import ReadOnlyStore
|
|
37
|
+
from codemap.emitters.base import EmitContext, EmitResult
|
|
38
|
+
from codemap_aimemory.enrich import load_enrichment
|
|
39
|
+
from codemap_aimemory.ids import build_entity_ids
|
|
40
|
+
|
|
41
|
+
_FN_KINDS = frozenset({"method", "function"})
|
|
42
|
+
_CLS_KINDS = frozenset({"class", "interface"})
|
|
43
|
+
|
|
44
|
+
__all__ = ["AiMemoryEmitter"]
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class AiMemoryEmitter:
|
|
48
|
+
name: ClassVar[str] = "ai-memory"
|
|
49
|
+
version: ClassVar[str] = "0.1.0"
|
|
50
|
+
|
|
51
|
+
def emit(self, store: ReadOnlyStore, ctx: EmitContext) -> EmitResult:
|
|
52
|
+
out_dir = _resolve_out_dir(ctx.output_dir)
|
|
53
|
+
|
|
54
|
+
symbols = list(store.iter_symbols())
|
|
55
|
+
edges = list(store.iter_edges())
|
|
56
|
+
sid_str: dict[Any, str] = {s.id: str(s.id) for s in symbols}
|
|
57
|
+
kinds: dict[str, str] = {sid_str[s.id]: s.kind for s in symbols}
|
|
58
|
+
eid = build_entity_ids(list(kinds.keys()), kinds)
|
|
59
|
+
known: set[str] = set(eid)
|
|
60
|
+
enrichment = load_enrichment(out_dir / "enrichment")
|
|
61
|
+
|
|
62
|
+
calls: defaultdict[str, list[str]] = defaultdict(list)
|
|
63
|
+
called_by: defaultdict[str, list[str]] = defaultdict(list)
|
|
64
|
+
related_tables: defaultdict[str, list[str]] = defaultdict(list)
|
|
65
|
+
# Two-hop expansion: a Java method that maps_to a sql_mapping which
|
|
66
|
+
# in turn accesses_table T should surface T in the method's
|
|
67
|
+
# related_tables. We collect maps_to and accesses_table separately
|
|
68
|
+
# first, then fan out maps_to sources onto the targets' tables.
|
|
69
|
+
maps_to_src: defaultdict[str, list[str]] = defaultdict(list)
|
|
70
|
+
accesses_by_src: defaultdict[str, list[str]] = defaultdict(list)
|
|
71
|
+
rel_calls: list[dict[str, Any]] = []
|
|
72
|
+
rel_tables: list[dict[str, Any]] = []
|
|
73
|
+
|
|
74
|
+
for edge in edges:
|
|
75
|
+
src = str(edge.source)
|
|
76
|
+
tgt = str(edge.target)
|
|
77
|
+
if src not in known or tgt not in known:
|
|
78
|
+
continue
|
|
79
|
+
if edge.kind == "calls":
|
|
80
|
+
calls[src].append(eid[tgt])
|
|
81
|
+
called_by[tgt].append(eid[src])
|
|
82
|
+
rel_calls.append(
|
|
83
|
+
{
|
|
84
|
+
"from": eid[src],
|
|
85
|
+
"to": eid[tgt],
|
|
86
|
+
"type": "calls",
|
|
87
|
+
"confidence": edge.confidence,
|
|
88
|
+
}
|
|
89
|
+
)
|
|
90
|
+
elif edge.kind == "accesses_table":
|
|
91
|
+
accesses_by_src[src].append(eid[tgt])
|
|
92
|
+
related_tables[src].append(eid[tgt])
|
|
93
|
+
rel_tables.append(
|
|
94
|
+
{
|
|
95
|
+
"from": eid[src],
|
|
96
|
+
"to": eid[tgt],
|
|
97
|
+
"type": "accesses_table",
|
|
98
|
+
"confidence": edge.confidence,
|
|
99
|
+
}
|
|
100
|
+
)
|
|
101
|
+
elif edge.kind == "maps_to":
|
|
102
|
+
maps_to_src[src].append(tgt)
|
|
103
|
+
|
|
104
|
+
# Fan out: for every java method that maps_to a sql_mapping, copy
|
|
105
|
+
# that sql_mapping's table targets onto the method's related_tables.
|
|
106
|
+
for method_sid, mapping_sids in maps_to_src.items():
|
|
107
|
+
for mapping_sid in mapping_sids:
|
|
108
|
+
for table_eid in accesses_by_src.get(mapping_sid, []):
|
|
109
|
+
if table_eid not in related_tables[method_sid]:
|
|
110
|
+
related_tables[method_sid].append(table_eid)
|
|
111
|
+
|
|
112
|
+
functions: list[dict[str, Any]] = []
|
|
113
|
+
tables: list[dict[str, Any]] = []
|
|
114
|
+
for sym in symbols:
|
|
115
|
+
sid = sid_str[sym.id]
|
|
116
|
+
enr = enrichment.get(sid, {})
|
|
117
|
+
base = {
|
|
118
|
+
"id": eid[sid],
|
|
119
|
+
"symbol_id": sid,
|
|
120
|
+
"file": str(sym.file),
|
|
121
|
+
"line_range": [sym.range.start_line, sym.range.end_line],
|
|
122
|
+
"signature": sym.signature,
|
|
123
|
+
"confidence": sym.confidence,
|
|
124
|
+
"change_count_90d": sym.extra.get("change_count_90d"),
|
|
125
|
+
"business_meaning": enr.get("business_meaning"),
|
|
126
|
+
}
|
|
127
|
+
if sym.kind in _FN_KINDS:
|
|
128
|
+
base["type"] = "function"
|
|
129
|
+
base["calls"] = calls.get(sid, [])
|
|
130
|
+
base["called_by"] = called_by.get(sid, [])
|
|
131
|
+
base["related_tables"] = related_tables.get(sid, [])
|
|
132
|
+
base["related_rules"] = enr.get("related_rules", [])
|
|
133
|
+
functions.append(base)
|
|
134
|
+
elif sym.kind in _CLS_KINDS:
|
|
135
|
+
base["type"] = "class"
|
|
136
|
+
base["calls"] = calls.get(sid, [])
|
|
137
|
+
base["called_by"] = called_by.get(sid, [])
|
|
138
|
+
base["related_tables"] = related_tables.get(sid, [])
|
|
139
|
+
base["related_rules"] = enr.get("related_rules", [])
|
|
140
|
+
functions.append(base)
|
|
141
|
+
elif sym.kind == "table":
|
|
142
|
+
base["type"] = "table"
|
|
143
|
+
tables.append(base)
|
|
144
|
+
|
|
145
|
+
files = sorted({str(s.file) for s in symbols})
|
|
146
|
+
files_yml = [{"id": f"file-{i}", "path": p} for i, p in enumerate(files)]
|
|
147
|
+
|
|
148
|
+
outputs: dict[str, list[Any]] = {
|
|
149
|
+
"entities/functions.yml": functions,
|
|
150
|
+
"entities/tables.yml": tables,
|
|
151
|
+
"entities/files.yml": files_yml,
|
|
152
|
+
"relations/call-graph.yml": rel_calls,
|
|
153
|
+
"relations/table-relations.yml": rel_tables,
|
|
154
|
+
"relations/rule-constraints.yml": [],
|
|
155
|
+
}
|
|
156
|
+
written = _atomic_write_tree(out_dir, outputs)
|
|
157
|
+
return EmitResult(files_written=written, diagnostics=[])
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _resolve_out_dir(out_dir: Path) -> Path:
|
|
161
|
+
"""The orchestrator passes ``<project>/.ai-memory`` already; treat any
|
|
162
|
+
other path as the project root and append the standard subdir."""
|
|
163
|
+
if out_dir.name == ".ai-memory":
|
|
164
|
+
return out_dir
|
|
165
|
+
return out_dir / ".ai-memory"
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _atomic_write_tree(out_dir: Path, outputs: dict[str, list[Any]]) -> list[str]:
|
|
169
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
170
|
+
written: list[str] = []
|
|
171
|
+
for rel, data in outputs.items():
|
|
172
|
+
target = out_dir / rel
|
|
173
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
174
|
+
fd, tmp = tempfile.mkstemp(dir=target.parent, suffix=".tmp")
|
|
175
|
+
try:
|
|
176
|
+
with os.fdopen(fd, "w", encoding="utf-8") as fh:
|
|
177
|
+
yaml.safe_dump(data, fh, allow_unicode=True, sort_keys=False)
|
|
178
|
+
os.replace(tmp, target)
|
|
179
|
+
except Exception:
|
|
180
|
+
with contextlib.suppress(OSError):
|
|
181
|
+
os.unlink(tmp)
|
|
182
|
+
raise
|
|
183
|
+
written.append(rel)
|
|
184
|
+
return written
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"""Optional LLM enrichment overlay.
|
|
2
|
+
|
|
3
|
+
Two contract surfaces:
|
|
4
|
+
|
|
5
|
+
* :func:`load_enrichment` — read every ``enrichment/*.yml`` in a folder,
|
|
6
|
+
return a ``symbol_id → payload dict`` map; missing folder → ``{}``. Used
|
|
7
|
+
by :class:`AiMemoryEmitter` to merge ``business_meaning`` /
|
|
8
|
+
``related_rules`` at emit time.
|
|
9
|
+
* :func:`enrich` — for every function/method symbol in the store, call the
|
|
10
|
+
injected ``LlmClient.describe(...)`` to produce the explanation payload
|
|
11
|
+
and write one file per symbol under ``enrichment/``. Files use a sha1
|
|
12
|
+
prefix of the SCIP id as the filename so re-runs are idempotent and
|
|
13
|
+
collision-free. ``changed_only=True`` skips symbols whose enrichment
|
|
14
|
+
file already exists.
|
|
15
|
+
|
|
16
|
+
The core deterministic index never reads these files; only the emitter
|
|
17
|
+
does, and even then only to fill the explanation slots — structural fields
|
|
18
|
+
remain whatever the indexers + bridges produced.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import contextlib
|
|
24
|
+
import hashlib
|
|
25
|
+
import os
|
|
26
|
+
import tempfile
|
|
27
|
+
from pathlib import Path
|
|
28
|
+
from typing import Any, Protocol
|
|
29
|
+
|
|
30
|
+
import yaml
|
|
31
|
+
|
|
32
|
+
from codemap.core.store import ReadOnlyStore
|
|
33
|
+
|
|
34
|
+
__all__ = ["LlmClient", "enrich", "load_enrichment"]
|
|
35
|
+
|
|
36
|
+
_FN_KINDS = frozenset({"method", "function"})
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class LlmClient(Protocol):
|
|
40
|
+
model: str
|
|
41
|
+
|
|
42
|
+
def describe(self, symbol: dict[str, Any]) -> dict[str, Any]: ...
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def load_enrichment(dir_: Path) -> dict[str, dict[str, Any]]:
|
|
46
|
+
if not dir_.exists():
|
|
47
|
+
return {}
|
|
48
|
+
out: dict[str, dict[str, Any]] = {}
|
|
49
|
+
for f in sorted(dir_.glob("*.yml")):
|
|
50
|
+
try:
|
|
51
|
+
data = yaml.safe_load(f.read_text(encoding="utf-8")) or {}
|
|
52
|
+
except yaml.YAMLError:
|
|
53
|
+
continue
|
|
54
|
+
sid = data.get("symbol_id")
|
|
55
|
+
if isinstance(sid, str):
|
|
56
|
+
out[sid] = data
|
|
57
|
+
return out
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def enrich(
|
|
61
|
+
store: ReadOnlyStore,
|
|
62
|
+
llm: LlmClient,
|
|
63
|
+
out_dir: Path,
|
|
64
|
+
*,
|
|
65
|
+
generated_at: str,
|
|
66
|
+
changed_only: bool = False,
|
|
67
|
+
) -> list[str]:
|
|
68
|
+
enr_dir = _resolve_enr_dir(out_dir)
|
|
69
|
+
enr_dir.mkdir(parents=True, exist_ok=True)
|
|
70
|
+
existing = load_enrichment(enr_dir) if changed_only else {}
|
|
71
|
+
written: list[str] = []
|
|
72
|
+
for sym in store.iter_symbols():
|
|
73
|
+
if sym.kind not in _FN_KINDS:
|
|
74
|
+
continue
|
|
75
|
+
sid = str(sym.id)
|
|
76
|
+
if changed_only and sid in existing:
|
|
77
|
+
continue
|
|
78
|
+
result = llm.describe(
|
|
79
|
+
{
|
|
80
|
+
"symbol_id": sid,
|
|
81
|
+
"signature": sym.signature,
|
|
82
|
+
"file": str(sym.file),
|
|
83
|
+
}
|
|
84
|
+
)
|
|
85
|
+
payload = {
|
|
86
|
+
"symbol_id": sid,
|
|
87
|
+
"business_meaning": result.get("business_meaning"),
|
|
88
|
+
"related_rules": result.get("related_rules", []),
|
|
89
|
+
"confidence": "llm",
|
|
90
|
+
"source_model": llm.model,
|
|
91
|
+
"generated_at": generated_at,
|
|
92
|
+
}
|
|
93
|
+
target = enr_dir / f"{hashlib.sha1(sid.encode('utf-8')).hexdigest()[:12]}.yml"
|
|
94
|
+
fd, tmp = tempfile.mkstemp(dir=enr_dir, suffix=".tmp")
|
|
95
|
+
try:
|
|
96
|
+
with os.fdopen(fd, "w", encoding="utf-8") as fh:
|
|
97
|
+
yaml.safe_dump(payload, fh, allow_unicode=True, sort_keys=False)
|
|
98
|
+
os.replace(tmp, target)
|
|
99
|
+
except Exception:
|
|
100
|
+
with contextlib.suppress(OSError):
|
|
101
|
+
os.unlink(tmp)
|
|
102
|
+
raise
|
|
103
|
+
written.append(target.name)
|
|
104
|
+
return written
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _resolve_enr_dir(out_dir: Path) -> Path:
|
|
108
|
+
"""Accept either ``<project>/.ai-memory`` or its ``enrichment`` subdir."""
|
|
109
|
+
if out_dir.name == "enrichment":
|
|
110
|
+
return out_dir
|
|
111
|
+
if out_dir.name == ".ai-memory":
|
|
112
|
+
return out_dir / "enrichment"
|
|
113
|
+
return out_dir / ".ai-memory" / "enrichment"
|
codemap_aimemory/ids.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""Derive stable, unique ``entity_id`` slugs from SCIP symbol id strings.
|
|
2
|
+
|
|
3
|
+
Rules:
|
|
4
|
+
|
|
5
|
+
* base slug = ``{prefix}-{last_descriptor_name}`` where ``prefix`` is
|
|
6
|
+
decided by the symbol's kind via :func:`prefix_for` (fn / cls / tbl /
|
|
7
|
+
route / field / sym).
|
|
8
|
+
* if multiple symbols collide on the same base slug (different SCIP ids
|
|
9
|
+
but same kind + last name), every entry in the collision group gets an
|
|
10
|
+
``-<sha1[:8]>`` suffix derived from its SCIP id — never just the second
|
|
11
|
+
one, so the resulting ids stay stable when adding / removing symbols.
|
|
12
|
+
* iteration order is sorted(symbol_ids) for determinism.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import hashlib
|
|
18
|
+
from collections import defaultdict
|
|
19
|
+
|
|
20
|
+
from codemap.core.symbol import SymbolID
|
|
21
|
+
|
|
22
|
+
__all__ = ["build_entity_ids", "prefix_for", "slug_base"]
|
|
23
|
+
|
|
24
|
+
_PREFIX_BY_KIND: dict[str, str] = {
|
|
25
|
+
"method": "fn",
|
|
26
|
+
"function": "fn",
|
|
27
|
+
"class": "cls",
|
|
28
|
+
"interface": "cls",
|
|
29
|
+
"table": "tbl",
|
|
30
|
+
"route": "route",
|
|
31
|
+
"field": "field",
|
|
32
|
+
"sql_mapping": "sql",
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def prefix_for(kind: str) -> str:
|
|
37
|
+
return _PREFIX_BY_KIND.get(kind, "sym")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def slug_base(symbol_id_str: str) -> str:
|
|
41
|
+
return SymbolID.parse(symbol_id_str).descriptors[-1].name
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def build_entity_ids(
|
|
45
|
+
symbol_ids: list[str],
|
|
46
|
+
kinds: dict[str, str] | None = None,
|
|
47
|
+
) -> dict[str, str]:
|
|
48
|
+
kinds = kinds or {}
|
|
49
|
+
base_of: dict[str, str] = {}
|
|
50
|
+
groups: defaultdict[str, list[str]] = defaultdict(list)
|
|
51
|
+
for sid in sorted(symbol_ids):
|
|
52
|
+
base = f"{prefix_for(kinds.get(sid, ''))}-{slug_base(sid)}"
|
|
53
|
+
base_of[sid] = base
|
|
54
|
+
groups[base].append(sid)
|
|
55
|
+
out: dict[str, str] = {}
|
|
56
|
+
for sid in symbol_ids:
|
|
57
|
+
base = base_of[sid]
|
|
58
|
+
if len(groups[base]) == 1:
|
|
59
|
+
out[sid] = base
|
|
60
|
+
else:
|
|
61
|
+
h = hashlib.sha1(sid.encode("utf-8")).hexdigest()[:8]
|
|
62
|
+
out[sid] = f"{base}-{h}"
|
|
63
|
+
return out
|
codemap_aimemory/llm.py
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
"""Minimal OpenAI-compatible HTTP client for the optional ``codemap enrich``
|
|
2
|
+
CLI command.
|
|
3
|
+
|
|
4
|
+
Talks the OpenAI Chat Completions wire format, which both proper OpenAI,
|
|
5
|
+
Anthropic-via-proxy, Ollama, vLLM, and most aggregators speak. The only
|
|
6
|
+
runtime dependency is ``httpx`` (already widely co-installed with pydantic /
|
|
7
|
+
Anthropic SDK families). The plugin's optional ``[llm]`` extra pulls the
|
|
8
|
+
official ``anthropic`` SDK for users who want it instead — that path goes
|
|
9
|
+
through :class:`AnthropicClient` below.
|
|
10
|
+
|
|
11
|
+
The client satisfies the ``LlmClient`` Protocol in :mod:`codemap_aimemory.enrich`:
|
|
12
|
+
|
|
13
|
+
class LlmClient(Protocol):
|
|
14
|
+
model: str
|
|
15
|
+
def describe(self, symbol: dict[str, Any]) -> dict[str, Any]: ...
|
|
16
|
+
|
|
17
|
+
``describe(symbol)`` returns ``{"business_meaning": str, "related_rules":
|
|
18
|
+
list[str]}``. Network / parse failures degrade gracefully — both keys come
|
|
19
|
+
back empty so the enrichment file still lands with the correct shape and
|
|
20
|
+
the orchestrator never crashes a Plan-4 pipeline on a transient LLM hiccup.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import json
|
|
26
|
+
import os
|
|
27
|
+
from typing import Any
|
|
28
|
+
|
|
29
|
+
__all__ = ["AnthropicClient", "OpenAICompatibleClient", "build_client"]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
_SYSTEM_PROMPT = (
|
|
33
|
+
"You annotate code symbols for an internal knowledge graph. "
|
|
34
|
+
"For each symbol you receive, return STRICT JSON with exactly two keys: "
|
|
35
|
+
"`business_meaning` (a one-sentence Chinese or English description of "
|
|
36
|
+
"what the symbol does in business terms) and `related_rules` (a list of "
|
|
37
|
+
"short rule identifiers it implements; [] if none). Do not wrap the JSON "
|
|
38
|
+
"in markdown, do not add commentary, do not include any other keys."
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class OpenAICompatibleClient:
|
|
43
|
+
"""Works against any /v1/chat/completions endpoint (OpenAI, vLLM, Ollama
|
|
44
|
+
+ ``--openai-api`` adapter, LM Studio, OneAPI / Higress aggregators)."""
|
|
45
|
+
|
|
46
|
+
def __init__(
|
|
47
|
+
self,
|
|
48
|
+
*,
|
|
49
|
+
model: str,
|
|
50
|
+
api_key: str,
|
|
51
|
+
base_url: str = "https://api.openai.com/v1",
|
|
52
|
+
timeout: float = 30.0,
|
|
53
|
+
) -> None:
|
|
54
|
+
self.model = model
|
|
55
|
+
self._api_key = api_key
|
|
56
|
+
self._base_url = base_url.rstrip("/")
|
|
57
|
+
self._timeout = timeout
|
|
58
|
+
|
|
59
|
+
def describe(self, symbol: dict[str, Any]) -> dict[str, Any]:
|
|
60
|
+
try:
|
|
61
|
+
import httpx
|
|
62
|
+
except ImportError as exc: # pragma: no cover
|
|
63
|
+
raise RuntimeError(
|
|
64
|
+
"codemap enrich requires httpx. Install with: pip install codemap-aimemory[llm]"
|
|
65
|
+
) from exc
|
|
66
|
+
|
|
67
|
+
payload = {
|
|
68
|
+
"model": self.model,
|
|
69
|
+
"messages": [
|
|
70
|
+
{"role": "system", "content": _SYSTEM_PROMPT},
|
|
71
|
+
{
|
|
72
|
+
"role": "user",
|
|
73
|
+
"content": (
|
|
74
|
+
f"Symbol: {symbol.get('symbol_id')}\n"
|
|
75
|
+
f"Signature: {symbol.get('signature') or '(none)'}\n"
|
|
76
|
+
f"File: {symbol.get('file')}\n"
|
|
77
|
+
"Respond with the JSON object."
|
|
78
|
+
),
|
|
79
|
+
},
|
|
80
|
+
],
|
|
81
|
+
"temperature": 0.0,
|
|
82
|
+
"response_format": {"type": "json_object"},
|
|
83
|
+
}
|
|
84
|
+
headers = {
|
|
85
|
+
"Authorization": f"Bearer {self._api_key}",
|
|
86
|
+
"Content-Type": "application/json",
|
|
87
|
+
}
|
|
88
|
+
try:
|
|
89
|
+
with httpx.Client(timeout=self._timeout) as client:
|
|
90
|
+
resp = client.post(
|
|
91
|
+
f"{self._base_url}/chat/completions",
|
|
92
|
+
json=payload,
|
|
93
|
+
headers=headers,
|
|
94
|
+
)
|
|
95
|
+
resp.raise_for_status()
|
|
96
|
+
data = resp.json()
|
|
97
|
+
content = data["choices"][0]["message"]["content"]
|
|
98
|
+
parsed = json.loads(content)
|
|
99
|
+
except (httpx.HTTPError, KeyError, IndexError, json.JSONDecodeError, ValueError):
|
|
100
|
+
return {"business_meaning": None, "related_rules": []}
|
|
101
|
+
return {
|
|
102
|
+
"business_meaning": parsed.get("business_meaning"),
|
|
103
|
+
"related_rules": list(parsed.get("related_rules") or []),
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class AnthropicClient:
|
|
108
|
+
"""Native Anthropic Messages API (`anthropic` SDK). Use this when the
|
|
109
|
+
project already depends on the official SDK; otherwise the OpenAI-compat
|
|
110
|
+
path covers Claude through the standard Bearer-token aggregators too."""
|
|
111
|
+
|
|
112
|
+
def __init__(
|
|
113
|
+
self,
|
|
114
|
+
*,
|
|
115
|
+
model: str,
|
|
116
|
+
api_key: str,
|
|
117
|
+
base_url: str | None = None,
|
|
118
|
+
) -> None:
|
|
119
|
+
self.model = model
|
|
120
|
+
self._api_key = api_key
|
|
121
|
+
self._base_url = base_url
|
|
122
|
+
|
|
123
|
+
def describe(self, symbol: dict[str, Any]) -> dict[str, Any]:
|
|
124
|
+
try:
|
|
125
|
+
import anthropic # type: ignore[import-not-found]
|
|
126
|
+
except ImportError as exc: # pragma: no cover
|
|
127
|
+
raise RuntimeError(
|
|
128
|
+
"AnthropicClient requires the anthropic SDK. "
|
|
129
|
+
"Install with: pip install codemap-aimemory[llm]"
|
|
130
|
+
) from exc
|
|
131
|
+
|
|
132
|
+
client_kwargs: dict[str, Any] = {"api_key": self._api_key}
|
|
133
|
+
if self._base_url:
|
|
134
|
+
client_kwargs["base_url"] = self._base_url
|
|
135
|
+
try:
|
|
136
|
+
client = anthropic.Anthropic(**client_kwargs)
|
|
137
|
+
response = client.messages.create(
|
|
138
|
+
model=self.model,
|
|
139
|
+
max_tokens=1024,
|
|
140
|
+
system=_SYSTEM_PROMPT,
|
|
141
|
+
messages=[
|
|
142
|
+
{
|
|
143
|
+
"role": "user",
|
|
144
|
+
"content": (
|
|
145
|
+
f"Symbol: {symbol.get('symbol_id')}\n"
|
|
146
|
+
f"Signature: {symbol.get('signature') or '(none)'}\n"
|
|
147
|
+
f"File: {symbol.get('file')}\n"
|
|
148
|
+
"Respond with the JSON object only."
|
|
149
|
+
),
|
|
150
|
+
}
|
|
151
|
+
],
|
|
152
|
+
)
|
|
153
|
+
text = "".join(block.text for block in response.content if hasattr(block, "text"))
|
|
154
|
+
parsed = json.loads(text)
|
|
155
|
+
except Exception: # pragma: no cover - depends on SDK errors
|
|
156
|
+
return {"business_meaning": None, "related_rules": []}
|
|
157
|
+
return {
|
|
158
|
+
"business_meaning": parsed.get("business_meaning"),
|
|
159
|
+
"related_rules": list(parsed.get("related_rules") or []),
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def build_client(
|
|
164
|
+
*,
|
|
165
|
+
backend: str,
|
|
166
|
+
model: str,
|
|
167
|
+
api_key: str,
|
|
168
|
+
base_url: str | None = None,
|
|
169
|
+
) -> Any:
|
|
170
|
+
"""Construct a client by backend keyword. Resolves OpenAI-style defaults.
|
|
171
|
+
|
|
172
|
+
``backend``:
|
|
173
|
+
* ``"openai"`` / ``"openai-compatible"`` — :class:`OpenAICompatibleClient`,
|
|
174
|
+
default base_url ``https://api.openai.com/v1``
|
|
175
|
+
* ``"anthropic"`` — :class:`AnthropicClient` using the official SDK
|
|
176
|
+
* ``"ollama"`` — :class:`OpenAICompatibleClient`, default base_url
|
|
177
|
+
``http://localhost:11434/v1``, ignores api_key
|
|
178
|
+
"""
|
|
179
|
+
b = backend.lower()
|
|
180
|
+
if b in {"openai", "openai-compatible"}:
|
|
181
|
+
return OpenAICompatibleClient(
|
|
182
|
+
model=model,
|
|
183
|
+
api_key=api_key,
|
|
184
|
+
base_url=base_url or "https://api.openai.com/v1",
|
|
185
|
+
)
|
|
186
|
+
if b == "anthropic":
|
|
187
|
+
return AnthropicClient(model=model, api_key=api_key, base_url=base_url)
|
|
188
|
+
if b == "ollama":
|
|
189
|
+
return OpenAICompatibleClient(
|
|
190
|
+
model=model,
|
|
191
|
+
api_key=api_key or "ollama",
|
|
192
|
+
base_url=base_url or "http://localhost:11434/v1",
|
|
193
|
+
)
|
|
194
|
+
raise ValueError(f"unknown LLM backend {backend!r}; expected openai / anthropic / ollama")
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def env_default(*names: str) -> str | None:
|
|
198
|
+
"""Return the first non-empty value among ``os.environ[name]`` for any
|
|
199
|
+
``name`` in ``names``. Used to thread CLI options through environment
|
|
200
|
+
variables when the user doesn't pass them explicitly."""
|
|
201
|
+
for name in names:
|
|
202
|
+
value = os.environ.get(name)
|
|
203
|
+
if value:
|
|
204
|
+
return value
|
|
205
|
+
return None
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codemap-aimemory
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Emit the four-layer memory model's L1 (.ai-memory/) from a CodeMap index
|
|
5
|
+
Project-URL: Homepage, https://github.com/qxbyte/codemap
|
|
6
|
+
Author: CodeMap Contributors
|
|
7
|
+
License: MIT
|
|
8
|
+
Keywords: ai-memory,codemap,emitter
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Topic :: Software Development
|
|
12
|
+
Requires-Python: >=3.11
|
|
13
|
+
Requires-Dist: codemap-core<0.4,>=0.3.0
|
|
14
|
+
Requires-Dist: httpx>=0.27
|
|
15
|
+
Requires-Dist: pyyaml>=6.0
|
|
16
|
+
Requires-Dist: typer>=0.12
|
|
17
|
+
Provides-Extra: dev
|
|
18
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
19
|
+
Provides-Extra: llm
|
|
20
|
+
Requires-Dist: anthropic>=0.39; extra == 'llm'
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
|
|
23
|
+
# codemap-aimemory
|
|
24
|
+
|
|
25
|
+
Emits the [four-layer memory model](https://github.com/qxbyte/codemap)'s
|
|
26
|
+
L1 (`.ai-memory/`) output from a CodeMap index.
|
|
27
|
+
|
|
28
|
+
## What it writes
|
|
29
|
+
|
|
30
|
+
```
|
|
31
|
+
.ai-memory/
|
|
32
|
+
├── entities/
|
|
33
|
+
│ ├── functions.yml fn-* / cls-* entities, with calls / called_by /
|
|
34
|
+
│ │ related_tables / signature / line_range
|
|
35
|
+
│ ├── tables.yml tbl-* entities
|
|
36
|
+
│ └── files.yml file-* entities
|
|
37
|
+
├── relations/
|
|
38
|
+
│ ├── call-graph.yml from / to / type=calls / confidence
|
|
39
|
+
│ ├── table-relations.yml from / to / type=accesses_table / confidence
|
|
40
|
+
│ └── rule-constraints.yml (empty; managed by L2)
|
|
41
|
+
└── enrichment/ optional LLM overlay (one file per enriched
|
|
42
|
+
symbol, loaded by the emitter to fill
|
|
43
|
+
business_meaning / related_rules)
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
All writes are atomic per file (tmp + rename), so an Agent reading
|
|
47
|
+
`.ai-memory/` never sees a half-written tree.
|
|
48
|
+
|
|
49
|
+
## Two-layer separation
|
|
50
|
+
|
|
51
|
+
The core L1 output (above) is purely deterministic — no LLM. The optional
|
|
52
|
+
`enrich` CLI fills `business_meaning` / `related_rules` into separate
|
|
53
|
+
`enrichment/*.yml` files. The emitter merges enrichment values when it
|
|
54
|
+
re-emits, but the core JSON-store index never carries explanation text.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
codemap_aimemory/__init__.py,sha256=4pw3riA5cPNsFbeEBC64VhwKBqFj7RkRiBChAwW-oYE,140
|
|
2
|
+
codemap_aimemory/cli.py,sha256=o9mRy7rqwLLHlmhhbjvNFM6X8jN6JGIlCXGw7OTAsag,5483
|
|
3
|
+
codemap_aimemory/emitter.py,sha256=R7mrNDS6JalfqsXWAFGQMuDOahuIG_6FHWBa821vcyA,7614
|
|
4
|
+
codemap_aimemory/enrich.py,sha256=TLl5kPjZbyE55KIDq95996tTPY8w8wcVsuwvpHLMqL0,3604
|
|
5
|
+
codemap_aimemory/ids.py,sha256=AYNbBno1U47IpLBqY8amIqpUMngqSM4BUIesAOOdUcE,1866
|
|
6
|
+
codemap_aimemory/llm.py,sha256=FVfrZvaFNcQzIAbUquOXw7dY2jV5RnCrJeB13xclXrc,7717
|
|
7
|
+
codemap_aimemory-0.3.0.dist-info/METADATA,sha256=8hQPKznDmyPp4CdhYvazWK-6eNLrdz9MXmJ0T2hIehc,2056
|
|
8
|
+
codemap_aimemory-0.3.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
9
|
+
codemap_aimemory-0.3.0.dist-info/entry_points.txt,sha256=ex8A_irptUKBNzUDA9giikrK8ZQ5umw7xQiJSGCxiLM,135
|
|
10
|
+
codemap_aimemory-0.3.0.dist-info/RECORD,,
|