agentforge-graph 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentforge_graph/__init__.py +6 -0
- agentforge_graph/chunking/__init__.py +12 -0
- agentforge_graph/chunking/cast.py +159 -0
- agentforge_graph/chunking/chunk.py +19 -0
- agentforge_graph/chunking/tokens.py +15 -0
- agentforge_graph/cli.py +607 -0
- agentforge_graph/config.py +259 -0
- agentforge_graph/core/__init__.py +54 -0
- agentforge_graph/core/conformance.py +270 -0
- agentforge_graph/core/contracts.py +163 -0
- agentforge_graph/core/kinds.py +68 -0
- agentforge_graph/core/models.py +134 -0
- agentforge_graph/core/provenance.py +62 -0
- agentforge_graph/core/symbols.py +116 -0
- agentforge_graph/embed/__init__.py +28 -0
- agentforge_graph/embed/base.py +22 -0
- agentforge_graph/embed/bedrock.py +85 -0
- agentforge_graph/embed/fake.py +34 -0
- agentforge_graph/embed/openai.py +67 -0
- agentforge_graph/embed/pipeline.py +184 -0
- agentforge_graph/embed/registry.py +66 -0
- agentforge_graph/embed/report.py +15 -0
- agentforge_graph/enrich/__init__.py +70 -0
- agentforge_graph/enrich/anthropic.py +38 -0
- agentforge_graph/enrich/anthropic_client.py +109 -0
- agentforge_graph/enrich/bedrock.py +24 -0
- agentforge_graph/enrich/bedrock_client.py +115 -0
- agentforge_graph/enrich/bedrock_summarizer.py +23 -0
- agentforge_graph/enrich/claude.py +172 -0
- agentforge_graph/enrich/enricher.py +108 -0
- agentforge_graph/enrich/governs.py +173 -0
- agentforge_graph/enrich/governs_enricher.py +152 -0
- agentforge_graph/enrich/heuristics.py +224 -0
- agentforge_graph/enrich/judge.py +63 -0
- agentforge_graph/enrich/registry.py +133 -0
- agentforge_graph/enrich/report.py +60 -0
- agentforge_graph/enrich/summarizer.py +62 -0
- agentforge_graph/enrich/summary_enricher.py +211 -0
- agentforge_graph/enrich/taxonomy.py +38 -0
- agentforge_graph/frameworks/__init__.py +29 -0
- agentforge_graph/frameworks/base.py +75 -0
- agentforge_graph/frameworks/detect.py +124 -0
- agentforge_graph/frameworks/extractor.py +63 -0
- agentforge_graph/frameworks/orm.py +93 -0
- agentforge_graph/frameworks/packs/_js_ast.py +56 -0
- agentforge_graph/frameworks/packs/_python_ast.py +157 -0
- agentforge_graph/frameworks/packs/django/__init__.py +240 -0
- agentforge_graph/frameworks/packs/django/models.scm +7 -0
- agentforge_graph/frameworks/packs/express/__init__.py +133 -0
- agentforge_graph/frameworks/packs/express/routes.scm +8 -0
- agentforge_graph/frameworks/packs/fastapi/__init__.py +210 -0
- agentforge_graph/frameworks/packs/fastapi/depends.scm +6 -0
- agentforge_graph/frameworks/packs/fastapi/routes.scm +10 -0
- agentforge_graph/frameworks/packs/flask/__init__.py +143 -0
- agentforge_graph/frameworks/packs/flask/routes.scm +11 -0
- agentforge_graph/frameworks/packs/nestjs/__init__.py +205 -0
- agentforge_graph/frameworks/packs/nestjs/routes.scm +6 -0
- agentforge_graph/frameworks/packs/spring/__init__.py +267 -0
- agentforge_graph/frameworks/packs/spring/routes.scm +6 -0
- agentforge_graph/frameworks/packs/sqlalchemy/__init__.py +250 -0
- agentforge_graph/frameworks/packs/sqlalchemy/models.scm +7 -0
- agentforge_graph/frameworks/registry.py +44 -0
- agentforge_graph/ingest/__init__.py +30 -0
- agentforge_graph/ingest/codegraph.py +847 -0
- agentforge_graph/ingest/extractor.py +353 -0
- agentforge_graph/ingest/incremental/__init__.py +25 -0
- agentforge_graph/ingest/incremental/detect.py +118 -0
- agentforge_graph/ingest/incremental/dirty.py +61 -0
- agentforge_graph/ingest/incremental/indexer.py +218 -0
- agentforge_graph/ingest/incremental/meta.py +72 -0
- agentforge_graph/ingest/incremental/ports.py +39 -0
- agentforge_graph/ingest/pack.py +160 -0
- agentforge_graph/ingest/packs/__init__.py +34 -0
- agentforge_graph/ingest/packs/cpp/__init__.py +35 -0
- agentforge_graph/ingest/packs/cpp/references.scm +15 -0
- agentforge_graph/ingest/packs/cpp/structure.scm +49 -0
- agentforge_graph/ingest/packs/csharp/__init__.py +35 -0
- agentforge_graph/ingest/packs/csharp/references.scm +12 -0
- agentforge_graph/ingest/packs/csharp/structure.scm +45 -0
- agentforge_graph/ingest/packs/go/__init__.py +38 -0
- agentforge_graph/ingest/packs/go/references.scm +12 -0
- agentforge_graph/ingest/packs/go/structure.scm +64 -0
- agentforge_graph/ingest/packs/java/__init__.py +35 -0
- agentforge_graph/ingest/packs/java/references.scm +12 -0
- agentforge_graph/ingest/packs/java/structure.scm +38 -0
- agentforge_graph/ingest/packs/javascript/__init__.py +34 -0
- agentforge_graph/ingest/packs/javascript/references.scm +11 -0
- agentforge_graph/ingest/packs/javascript/structure.scm +166 -0
- agentforge_graph/ingest/packs/php/__init__.py +35 -0
- agentforge_graph/ingest/packs/php/references.scm +15 -0
- agentforge_graph/ingest/packs/php/structure.scm +44 -0
- agentforge_graph/ingest/packs/python/__init__.py +25 -0
- agentforge_graph/ingest/packs/python/references.scm +14 -0
- agentforge_graph/ingest/packs/python/structure.scm +57 -0
- agentforge_graph/ingest/packs/ruby/__init__.py +37 -0
- agentforge_graph/ingest/packs/ruby/references.scm +12 -0
- agentforge_graph/ingest/packs/ruby/structure.scm +37 -0
- agentforge_graph/ingest/packs/rust/__init__.py +39 -0
- agentforge_graph/ingest/packs/rust/references.scm +12 -0
- agentforge_graph/ingest/packs/rust/structure.scm +46 -0
- agentforge_graph/ingest/packs/typescript/__init__.py +31 -0
- agentforge_graph/ingest/packs/typescript/references.scm +11 -0
- agentforge_graph/ingest/packs/typescript/structure.scm +99 -0
- agentforge_graph/ingest/pipeline.py +134 -0
- agentforge_graph/ingest/report.py +84 -0
- agentforge_graph/ingest/resolver.py +467 -0
- agentforge_graph/ingest/source.py +79 -0
- agentforge_graph/knowledge/__init__.py +28 -0
- agentforge_graph/knowledge/adr.py +136 -0
- agentforge_graph/knowledge/commits.py +152 -0
- agentforge_graph/knowledge/ingest.py +312 -0
- agentforge_graph/knowledge/mentions.py +71 -0
- agentforge_graph/knowledge/report.py +32 -0
- agentforge_graph/main.py +21 -0
- agentforge_graph/providers.py +36 -0
- agentforge_graph/repomap/__init__.py +14 -0
- agentforge_graph/repomap/rank.py +161 -0
- agentforge_graph/repomap/render.py +55 -0
- agentforge_graph/repomap/repomap.py +66 -0
- agentforge_graph/retrieve/__init__.py +21 -0
- agentforge_graph/retrieve/pack.py +76 -0
- agentforge_graph/retrieve/rerank.py +251 -0
- agentforge_graph/retrieve/retriever.py +286 -0
- agentforge_graph/retrieve/scoring.py +36 -0
- agentforge_graph/serve/__init__.py +19 -0
- agentforge_graph/serve/engine.py +204 -0
- agentforge_graph/serve/http_runner.py +133 -0
- agentforge_graph/serve/server.py +110 -0
- agentforge_graph/serve/tools.py +307 -0
- agentforge_graph/store/__init__.py +32 -0
- agentforge_graph/store/_rowmap.py +102 -0
- agentforge_graph/store/errors.py +22 -0
- agentforge_graph/store/facade.py +89 -0
- agentforge_graph/store/kuzu_store.py +380 -0
- agentforge_graph/store/lance_store.py +146 -0
- agentforge_graph/store/neo4j_store.py +294 -0
- agentforge_graph/store/pgvector_store.py +170 -0
- agentforge_graph/store/registry.py +45 -0
- agentforge_graph/temporal/__init__.py +36 -0
- agentforge_graph/temporal/backfill.py +338 -0
- agentforge_graph/temporal/events.py +82 -0
- agentforge_graph/temporal/index.py +190 -0
- agentforge_graph/temporal/mining.py +190 -0
- agentforge_graph/temporal/recorder.py +114 -0
- agentforge_graph/temporal/store.py +282 -0
- agentforge_graph-0.3.2.dist-info/METADATA +291 -0
- agentforge_graph-0.3.2.dist-info/RECORD +151 -0
- agentforge_graph-0.3.2.dist-info/WHEEL +4 -0
- agentforge_graph-0.3.2.dist-info/entry_points.txt +3 -0
- agentforge_graph-0.3.2.dist-info/licenses/LICENSE +202 -0
- agentforge_graph-0.3.2.dist-info/licenses/NOTICE +14 -0
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
"""Typed reader for ``ckg.yaml`` — this agent's *own* engine config (NOT
|
|
2
|
+
the framework's ``agentforge.yaml``, which has a strict validator).
|
|
3
|
+
|
|
4
|
+
Unlike the framework file, ours is intentionally lenient: unknown keys are
|
|
5
|
+
ignored (``extra='ignore'``) so a config written for a later feature still
|
|
6
|
+
loads for an earlier one. The ``store:`` (feat-003) and ``ingest:``
|
|
7
|
+
(feat-002) blocks are modelled today; chunking/retrieve/… sections gain
|
|
8
|
+
their own models as those features land.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, ClassVar, Self
|
|
15
|
+
|
|
16
|
+
import yaml
|
|
17
|
+
from pydantic import BaseModel, Field, ValidationError, field_validator
|
|
18
|
+
|
|
19
|
+
# Default directories excluded from ingestion (mirrors ckg.yaml's ingest.exclude).
|
|
20
|
+
DEFAULT_EXCLUDES = [
|
|
21
|
+
"**/node_modules/**",
|
|
22
|
+
"**/.venv/**",
|
|
23
|
+
"**/dist/**",
|
|
24
|
+
"**/.git/**",
|
|
25
|
+
"**/.ckg/**",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _read_block[T: _Block](model: type[T], key: str, ckg_yaml: str | Path | None) -> T:
|
|
30
|
+
"""Parse one top-level block of ckg.yaml into ``model``. Missing file or
|
|
31
|
+
``None`` → defaults; malformed YAML / block → ``StoreConfigError``."""
|
|
32
|
+
# Imported lazily to avoid an import cycle (store.facade imports this).
|
|
33
|
+
from agentforge_graph.store.errors import StoreConfigError
|
|
34
|
+
|
|
35
|
+
if ckg_yaml is None:
|
|
36
|
+
return model()
|
|
37
|
+
p = Path(ckg_yaml)
|
|
38
|
+
if not p.exists():
|
|
39
|
+
return model()
|
|
40
|
+
try:
|
|
41
|
+
data = yaml.safe_load(p.read_text()) or {}
|
|
42
|
+
except yaml.YAMLError as exc:
|
|
43
|
+
raise StoreConfigError(f"could not parse {p}: {exc}") from exc
|
|
44
|
+
try:
|
|
45
|
+
return model.model_validate(data.get(key) or {})
|
|
46
|
+
except ValidationError as exc:
|
|
47
|
+
raise StoreConfigError(f"invalid {key} config in {p}: {exc}") from exc
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class _Block(BaseModel):
|
|
51
|
+
"""Base for a ckg.yaml section that knows its top-level key."""
|
|
52
|
+
|
|
53
|
+
KEY: ClassVar[str] = ""
|
|
54
|
+
|
|
55
|
+
@classmethod
|
|
56
|
+
def load(cls, ckg_yaml: str | Path | None = None) -> Self:
|
|
57
|
+
return _read_block(cls, cls.KEY, ckg_yaml)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class GraphCfg(BaseModel):
|
|
61
|
+
driver: str = "kuzu"
|
|
62
|
+
config: dict[str, Any] = Field(default_factory=dict)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class VectorCfg(BaseModel):
|
|
66
|
+
driver: str = "lancedb"
|
|
67
|
+
config: dict[str, Any] = Field(default_factory=dict)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class StoreConfig(_Block):
|
|
71
|
+
"""The ``store:`` block of ckg.yaml (ADR-0006)."""
|
|
72
|
+
|
|
73
|
+
KEY: ClassVar[str] = "store"
|
|
74
|
+
path: str = ".ckg"
|
|
75
|
+
graph: GraphCfg = Field(default_factory=GraphCfg)
|
|
76
|
+
vectors: VectorCfg = Field(default_factory=VectorCfg)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class IngestConfig(_Block):
|
|
80
|
+
"""The ``ingest:`` block of ckg.yaml (feat-002 / ADR-0009)."""
|
|
81
|
+
|
|
82
|
+
KEY: ClassVar[str] = "ingest"
|
|
83
|
+
languages: str | list[str] = "auto" # "auto" or an explicit list of pack names
|
|
84
|
+
exclude: list[str] = Field(default_factory=lambda: list(DEFAULT_EXCLUDES))
|
|
85
|
+
max_file_kb: int = 512
|
|
86
|
+
lsp_assist: bool = False # opt-in resolution escalation (Tier B); inert at 0.1
|
|
87
|
+
incremental: bool = True # feat-004: re-index only the diff when a prior index exists
|
|
88
|
+
resolve_scope_hops: int = 1 # import-graph hops to re-resolve around a changed file
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class ChunkingConfig(_Block):
|
|
92
|
+
"""The ``chunking:`` block of ckg.yaml (feat-005 / ADR-0007)."""
|
|
93
|
+
|
|
94
|
+
KEY: ClassVar[str] = "chunking"
|
|
95
|
+
max_tokens: int = 512
|
|
96
|
+
min_tokens: int = 64
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class EmbedConfig(_Block):
|
|
100
|
+
"""The ``embed:`` block of ckg.yaml (feat-005). Default driver is
|
|
101
|
+
``bedrock`` (Cohere embed-v4); tests/CI use ``fake``."""
|
|
102
|
+
|
|
103
|
+
KEY: ClassVar[str] = "embed"
|
|
104
|
+
# ENH-003: bedrock | fake | openai | <entry-point>. `openai` also covers
|
|
105
|
+
# OpenAI-compatible local servers via `base_url` (Ollama/vLLM/LM Studio).
|
|
106
|
+
driver: str = "bedrock"
|
|
107
|
+
model: str = "cohere.embed-v4:0"
|
|
108
|
+
region: str = "us-east-1"
|
|
109
|
+
dim: int = 1024
|
|
110
|
+
batch_size: int = 96
|
|
111
|
+
assume_role_arn: str = "" # set for CI; empty = default AWS credential chain
|
|
112
|
+
base_url: str = "" # ENH-003: OpenAI-compatible endpoint (empty = provider default)
|
|
113
|
+
api_key_env: str = "" # ENH-003: env var holding the API key (empty = provider default)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _default_edge_weights() -> dict[str, float]:
|
|
117
|
+
# By provenance: resolved facts outrank parsed; llm is second-class (ADR-0004).
|
|
118
|
+
return {"resolved": 1.0, "manual": 0.8, "parsed": 0.5, "llm": 0.3}
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class RetrieveConfig(_Block):
|
|
122
|
+
"""The ``retrieve:`` block of ckg.yaml (feat-006 / ADR-0008)."""
|
|
123
|
+
|
|
124
|
+
KEY: ClassVar[str] = "retrieve"
|
|
125
|
+
k: int = 8
|
|
126
|
+
depth: int = 1
|
|
127
|
+
decay: float = 0.6
|
|
128
|
+
fanout_cap: int = 25 # max neighbors expanded per hop (overflow noted, not silent)
|
|
129
|
+
# ENH-009: off (default) | lexical | cross_encoder. `lexical` is a
|
|
130
|
+
# deterministic subtoken blend (helps keyword/symbol-naming queries, mixed on
|
|
131
|
+
# prose). `cross_encoder` is a real semantic re-score via sentence-transformers
|
|
132
|
+
# (the `rerank` extra; lazy-loaded). Both opt-in (measure, don't blind-flip).
|
|
133
|
+
rerank: str = "off"
|
|
134
|
+
rerank_weight: float = 0.5 # final = (1-w)*base + w*signal (overlap | σ(cross))
|
|
135
|
+
rerank_model: str = "" # cross_encoder model id (empty = a small ms-marco default)
|
|
136
|
+
edge_weights: dict[str, float] = Field(default_factory=_default_edge_weights)
|
|
137
|
+
# feat-010: scale ADR/doc (`source_type: doc`) vector hits so code outranks
|
|
138
|
+
# equally-similar prose by default, mitigating doc-volume dilution. Neutralised
|
|
139
|
+
# (→ 1.0) when the query smells architectural (decision/why/design/…).
|
|
140
|
+
doc_weight: float = 0.7
|
|
141
|
+
|
|
142
|
+
@field_validator("rerank", mode="before")
|
|
143
|
+
@classmethod
|
|
144
|
+
def _coerce_rerank(cls, v: Any) -> Any:
|
|
145
|
+
# YAML 1.1 parses bare `off`/`on` as booleans, so `rerank: off` (as shipped
|
|
146
|
+
# in ckg.yaml) arrives as False and would fail string validation. Map the
|
|
147
|
+
# booleans back to the canonical modes: off -> disabled, on -> the lexical
|
|
148
|
+
# reranker (the only enabled mode).
|
|
149
|
+
if isinstance(v, bool):
|
|
150
|
+
return "lexical" if v else "off"
|
|
151
|
+
return v
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class RepoMapConfig(_Block):
|
|
155
|
+
"""The ``repomap:`` block of ckg.yaml (feat-007)."""
|
|
156
|
+
|
|
157
|
+
KEY: ClassVar[str] = "repomap"
|
|
158
|
+
default_budget: int = 2000
|
|
159
|
+
damping: float = 0.85
|
|
160
|
+
kinds: list[str] = Field(default_factory=lambda: ["Class", "Function", "Method"])
|
|
161
|
+
edge_weights: dict[str, float] = Field(default_factory=_default_edge_weights)
|
|
162
|
+
# ENH-007: down-weight clearly-private symbols (leading-underscore names or
|
|
163
|
+
# `_`-prefixed modules) so the map surfaces the public API first. A weight,
|
|
164
|
+
# not a filter: private hubs can still appear when genuinely central. In
|
|
165
|
+
# [0, 1]; 0.0 = pure centrality, higher demotes private harder. Private
|
|
166
|
+
# symbols are multiplied by (1 - public_bias).
|
|
167
|
+
public_bias: float = 0.5
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
class ServeConfig(_Block):
|
|
171
|
+
"""The ``serve:`` block of ckg.yaml (feat-008 — MCP server + guardrails)."""
|
|
172
|
+
|
|
173
|
+
KEY: ClassVar[str] = "serve"
|
|
174
|
+
transport: str = "stdio" # feat-008: stdio | http (streamable-HTTP at /mcp)
|
|
175
|
+
host: str = "127.0.0.1" # http transport bind host
|
|
176
|
+
port: int = 8765 # http transport port
|
|
177
|
+
# ENH-005: bearer token for the HTTP transport (empty = no auth, localhost
|
|
178
|
+
# default). Prefer $CKG_HTTP_AUTH_TOKEN over putting the secret in ckg.yaml.
|
|
179
|
+
http_auth_token: str = ""
|
|
180
|
+
max_depth: int = 3
|
|
181
|
+
max_k: int = 50
|
|
182
|
+
response_token_cap: int = 6000
|
|
183
|
+
refresh_on_call: bool = False
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
class FrameworksConfig(_Block):
|
|
187
|
+
"""The ``frameworks:`` block of ckg.yaml (feat-011)."""
|
|
188
|
+
|
|
189
|
+
KEY: ClassVar[str] = "frameworks"
|
|
190
|
+
# "auto" → detect per repo; "off" → none; or an explicit list of pack names.
|
|
191
|
+
enabled: str | list[str] = "auto"
|
|
192
|
+
packs: list[str] = Field(default_factory=list) # force-enable, e.g. ["fastapi"]
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
class EnrichConfig(_Block):
|
|
196
|
+
"""The ``enrich:`` block of ckg.yaml (feat-012 — LLM enrichment).
|
|
197
|
+
|
|
198
|
+
Claude runs on **AWS Bedrock** (default) or the **direct Anthropic API**
|
|
199
|
+
(``provider: anthropic`` — ENH-003 phase 2, the non-AWS path). Never runs
|
|
200
|
+
implicitly — only ``ckg enrich`` / ``CodeGraph.enrich()``."""
|
|
201
|
+
|
|
202
|
+
KEY: ClassVar[str] = "enrich"
|
|
203
|
+
enabled: bool = True
|
|
204
|
+
# ENH-003: provider for BOTH judge + summarizer. bedrock | anthropic |
|
|
205
|
+
# scripted | <entry-point>. `anthropic` = direct Anthropic API (needs
|
|
206
|
+
# ANTHROPIC_API_KEY); `scripted` is the credential-free deterministic one.
|
|
207
|
+
provider: str = "bedrock"
|
|
208
|
+
# Default is a Bedrock inference-profile id (the `us.` prefix). The
|
|
209
|
+
# `anthropic` provider normalises it to the bare API id automatically.
|
|
210
|
+
model: str = "us.anthropic.claude-haiku-4-5-20251001-v1:0"
|
|
211
|
+
region: str = "us-east-1"
|
|
212
|
+
assume_role_arn: str = "" # set for CI; empty = default AWS credential chain
|
|
213
|
+
base_url: str = "" # ENH-003: Anthropic-compatible endpoint (empty = default)
|
|
214
|
+
api_key_env: str = "" # ENH-003: env var holding the API key (empty = ANTHROPIC_API_KEY)
|
|
215
|
+
budget_usd: float = 2.0 # per-run LLM judge cap (breaker)
|
|
216
|
+
confidence_floor: float = 0.7 # drop tags below this
|
|
217
|
+
taxonomy: str = "v1"
|
|
218
|
+
patterns_recall: str = "conservative" # ENH-001: conservative | broad
|
|
219
|
+
concurrency: int = 6 # ENH-002: in-flight LLM calls per enrich run
|
|
220
|
+
summary_max_words: int = 120 # feat-012 summaries
|
|
221
|
+
summary_levels: list[str] = Field(default_factory=lambda: ["file", "repo"])
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def _default_adr_globs() -> list[str]:
|
|
225
|
+
return ["docs/adr/**/*.md", "docs/decisions/**/*.md"]
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
class KnowledgeConfig(_Block):
|
|
229
|
+
"""The ``knowledge:`` block of ckg.yaml (feat-010 — ADR & docs ingestion).
|
|
230
|
+
|
|
231
|
+
Reads ``enabled`` + ``adr_globs`` (deterministic pass) and ``infer_budget_usd``
|
|
232
|
+
(the ``ckg enrich --decisions`` LLM matcher's USD cap). ``doc_globs``/
|
|
233
|
+
``commit_messages`` are declared for follow-ups; ``infer_governs`` is the
|
|
234
|
+
default for the LLM pass (the CLI flag runs it on demand regardless)."""
|
|
235
|
+
|
|
236
|
+
KEY: ClassVar[str] = "knowledge"
|
|
237
|
+
enabled: bool = True
|
|
238
|
+
adr_globs: list[str] = Field(default_factory=_default_adr_globs)
|
|
239
|
+
doc_globs: list[str] = Field(default_factory=list) # general docs → DocChunks+DESCRIBES
|
|
240
|
+
commit_messages: bool = False # ingest conventional/issue-ref commit messages
|
|
241
|
+
commit_messages_limit: int = 50 # how many recent commits to scan
|
|
242
|
+
infer_governs: bool = False # default for the LLM matcher (CLI flag overrides)
|
|
243
|
+
infer_budget_usd: float = 1.0 # USD cap for the infer_governs pass
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
class TemporalConfig(_Block):
|
|
247
|
+
"""The ``temporal:`` block of ckg.yaml (feat-009 — git-evolution layer).
|
|
248
|
+
|
|
249
|
+
**Opt-in (default off).** When on (and the source is a git repo), the
|
|
250
|
+
feat-004 refresh records symbol lifecycle into a ``.ckg/temporal.db``
|
|
251
|
+
sidecar — the basis for history / changed-since / as-of and churn ranking
|
|
252
|
+
signals. Off means delete-on-refresh, exactly as before. See
|
|
253
|
+
``docs/design/design-009-temporal-evolution-layer.md``."""
|
|
254
|
+
|
|
255
|
+
KEY: ClassVar[str] = "temporal"
|
|
256
|
+
enabled: bool = False
|
|
257
|
+
history_backfill: int = 0 # commits to replay at first index (chunk 4)
|
|
258
|
+
retention_commits: int = 1000 # prune closed events beyond this horizon (chunk 5)
|
|
259
|
+
rename_detection: str = "file" # file (exact git renames) | signature (intra-file, chunk 6)
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""agentforge_graph.core — the locked schema and contracts.
|
|
2
|
+
|
|
3
|
+
The stable surface every other feature plugs into: typed node/edge
|
|
4
|
+
kinds, value types (with provenance + symbol-ID validation enforced at
|
|
5
|
+
construction), the symbol-ID grammar, and the ABCs. This package imports
|
|
6
|
+
nothing from ``agentforge`` (ADR-0001) — it is the deterministic engine
|
|
7
|
+
core and is usable standalone.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from .contracts import Direction, Enricher, Extractor, GraphStore, VectorStore
|
|
13
|
+
from .kinds import EdgeKind, NodeKind
|
|
14
|
+
from .models import (
|
|
15
|
+
Edge,
|
|
16
|
+
Embedded,
|
|
17
|
+
FileSubgraph,
|
|
18
|
+
GraphQuery,
|
|
19
|
+
Node,
|
|
20
|
+
QueryResult,
|
|
21
|
+
ScoredRef,
|
|
22
|
+
SourceFile,
|
|
23
|
+
)
|
|
24
|
+
from .provenance import Provenance, Source
|
|
25
|
+
from .symbols import Descriptor, ParsedSymbol, SymbolID, normalize_path
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
# kinds
|
|
29
|
+
"NodeKind",
|
|
30
|
+
"EdgeKind",
|
|
31
|
+
# provenance
|
|
32
|
+
"Provenance",
|
|
33
|
+
"Source",
|
|
34
|
+
# symbols
|
|
35
|
+
"SymbolID",
|
|
36
|
+
"ParsedSymbol",
|
|
37
|
+
"Descriptor",
|
|
38
|
+
"normalize_path",
|
|
39
|
+
# models
|
|
40
|
+
"Node",
|
|
41
|
+
"Edge",
|
|
42
|
+
"FileSubgraph",
|
|
43
|
+
"SourceFile",
|
|
44
|
+
"GraphQuery",
|
|
45
|
+
"QueryResult",
|
|
46
|
+
"Embedded",
|
|
47
|
+
"ScoredRef",
|
|
48
|
+
# contracts
|
|
49
|
+
"Extractor",
|
|
50
|
+
"GraphStore",
|
|
51
|
+
"VectorStore",
|
|
52
|
+
"Enricher",
|
|
53
|
+
"Direction",
|
|
54
|
+
]
|
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
"""Reusable conformance suites for the core ABCs.
|
|
2
|
+
|
|
3
|
+
A feat-003 storage adapter or a feat-002 extractor proves it honours
|
|
4
|
+
the contract by subclassing the matching ``*Conformance`` base class and
|
|
5
|
+
providing the required fixture. The same suite runs against every
|
|
6
|
+
implementer, so they're interchangeable. Pytest-free at import time (the
|
|
7
|
+
async test methods are collected by pytest-asyncio in the *test* package;
|
|
8
|
+
this module never imports pytest, keeping the core import light).
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from .contracts import Extractor, GraphStore, VectorStore
|
|
14
|
+
from .kinds import EdgeKind, NodeKind
|
|
15
|
+
from .models import Edge, Embedded, FileSubgraph, GraphQuery, Node, SourceFile
|
|
16
|
+
from .provenance import Provenance
|
|
17
|
+
from .symbols import Descriptor, SymbolID
|
|
18
|
+
|
|
19
|
+
_LANG = "py"
|
|
20
|
+
_REPO = "sample"
|
|
21
|
+
_PATH = "src/app/auth.py"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def make_sample_subgraph(commit: str = "c0") -> FileSubgraph:
|
|
25
|
+
"""A tiny but valid subgraph: File ▸ Class ▸ Method, with CONTAINS."""
|
|
26
|
+
prov = Provenance.parsed("conformance", commit)
|
|
27
|
+
file_id = SymbolID.for_symbol(_LANG, _REPO, _PATH, "")
|
|
28
|
+
class_id = SymbolID.for_symbol(_LANG, _REPO, _PATH, Descriptor.type("Auth"))
|
|
29
|
+
method_id = SymbolID.for_symbol(
|
|
30
|
+
_LANG, _REPO, _PATH, Descriptor.type("Auth") + Descriptor.method("login")
|
|
31
|
+
)
|
|
32
|
+
nodes = [
|
|
33
|
+
Node(id=file_id, kind=NodeKind.FILE, name="auth.py", provenance=prov),
|
|
34
|
+
Node(id=class_id, kind=NodeKind.CLASS, name="Auth", span=(1, 20), provenance=prov),
|
|
35
|
+
Node(id=method_id, kind=NodeKind.METHOD, name="login", span=(2, 10), provenance=prov),
|
|
36
|
+
]
|
|
37
|
+
edges = [
|
|
38
|
+
Edge(src=file_id, dst=class_id, kind=EdgeKind.CONTAINS, provenance=prov),
|
|
39
|
+
Edge(src=class_id, dst=method_id, kind=EdgeKind.CONTAINS, provenance=prov),
|
|
40
|
+
]
|
|
41
|
+
return FileSubgraph(path=_PATH, content_hash=f"hash-{commit}", nodes=nodes, edges=edges)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class GraphStoreConformance:
|
|
45
|
+
"""Subclass in a feat-003 adapter; provide an async ``store`` fixture
|
|
46
|
+
yielding a fresh, empty ``GraphStore``."""
|
|
47
|
+
|
|
48
|
+
async def test_upsert_then_get(self, store: GraphStore) -> None:
|
|
49
|
+
sg = make_sample_subgraph()
|
|
50
|
+
await store.upsert(sg)
|
|
51
|
+
got = await store.get(sg.nodes[0].id)
|
|
52
|
+
assert got is not None
|
|
53
|
+
assert got.id == sg.nodes[0].id
|
|
54
|
+
|
|
55
|
+
async def test_reupsert_is_idempotent(self, store: GraphStore) -> None:
|
|
56
|
+
sg = make_sample_subgraph()
|
|
57
|
+
await store.upsert(sg)
|
|
58
|
+
await store.upsert(sg)
|
|
59
|
+
res = await store.query(GraphQuery(path_prefix="src/app"))
|
|
60
|
+
assert len(res.nodes) == len(sg.nodes)
|
|
61
|
+
|
|
62
|
+
async def test_delete_file_removes_nodes(self, store: GraphStore) -> None:
|
|
63
|
+
sg = make_sample_subgraph()
|
|
64
|
+
await store.upsert(sg)
|
|
65
|
+
await store.delete_file(sg.path)
|
|
66
|
+
assert await store.get(sg.nodes[0].id) is None
|
|
67
|
+
|
|
68
|
+
async def test_enrichment_survives_file_reupsert(self, store: GraphStore) -> None:
|
|
69
|
+
sg = make_sample_subgraph()
|
|
70
|
+
await store.upsert(sg)
|
|
71
|
+
target = sg.nodes[1].id # the class
|
|
72
|
+
summary_id = SymbolID.for_symbol(_LANG, _REPO, _PATH, "Auth#summary.")
|
|
73
|
+
llm = Provenance.llm("enricher", 0.9)
|
|
74
|
+
await store.add(
|
|
75
|
+
[
|
|
76
|
+
Node(id=summary_id, kind=NodeKind.SUMMARY, name="summary", provenance=llm),
|
|
77
|
+
Edge(src=summary_id, dst=target, kind=EdgeKind.SUMMARIZES, provenance=llm),
|
|
78
|
+
]
|
|
79
|
+
)
|
|
80
|
+
# the file changes and is re-indexed; the enrichment must survive
|
|
81
|
+
await store.upsert(make_sample_subgraph(commit="c1"))
|
|
82
|
+
assert await store.get(summary_id) is not None
|
|
83
|
+
|
|
84
|
+
async def test_set_attrs_merges_and_keeps_file_ownership(self, store: GraphStore) -> None:
|
|
85
|
+
sg = make_sample_subgraph()
|
|
86
|
+
await store.upsert(sg)
|
|
87
|
+
target = sg.nodes[1].id # the class
|
|
88
|
+
await store.set_attrs(target, {"churn_90d": 7})
|
|
89
|
+
await store.set_attrs(target, {"introduced": "abc123"}) # second merge
|
|
90
|
+
got = await store.get(target)
|
|
91
|
+
assert got is not None
|
|
92
|
+
assert got.attrs.get("churn_90d") == 7 # first patch survives the second
|
|
93
|
+
assert got.attrs.get("introduced") == "abc123"
|
|
94
|
+
# the patched node is still file-owned: delete_file removes it (the
|
|
95
|
+
# merge must not have clobbered origin_path, feat-009).
|
|
96
|
+
await store.delete_file(sg.path)
|
|
97
|
+
assert await store.get(target) is None
|
|
98
|
+
# patching an absent node is a no-op, not an error
|
|
99
|
+
await store.set_attrs(target, {"churn_90d": 1})
|
|
100
|
+
assert await store.get(target) is None
|
|
101
|
+
|
|
102
|
+
async def test_reserved_kind_preserved(self, store: GraphStore) -> None:
|
|
103
|
+
route_id = SymbolID.for_symbol(_LANG, _REPO, "src/app/api.py", "route(GET_x).")
|
|
104
|
+
await store.add(
|
|
105
|
+
[
|
|
106
|
+
Node(
|
|
107
|
+
id=route_id,
|
|
108
|
+
kind=NodeKind.ROUTE,
|
|
109
|
+
name="GET /x",
|
|
110
|
+
provenance=Provenance.parsed("conformance"),
|
|
111
|
+
)
|
|
112
|
+
]
|
|
113
|
+
)
|
|
114
|
+
got = await store.get(route_id)
|
|
115
|
+
assert got is not None
|
|
116
|
+
assert got.kind is NodeKind.ROUTE
|
|
117
|
+
|
|
118
|
+
async def test_neighbors_walks_contains(self, store: GraphStore) -> None:
|
|
119
|
+
sg = make_sample_subgraph()
|
|
120
|
+
await store.upsert(sg)
|
|
121
|
+
file_id, class_id, method_id = (n.id for n in sg.nodes)
|
|
122
|
+
one_hop = {n.id for n in await store.neighbors(file_id, [EdgeKind.CONTAINS], depth=1)}
|
|
123
|
+
assert class_id in one_hop
|
|
124
|
+
two_hop = {n.id for n in await store.neighbors(file_id, [EdgeKind.CONTAINS], depth=2)}
|
|
125
|
+
assert method_id in two_hop
|
|
126
|
+
|
|
127
|
+
async def test_clear_resolved_invalidates_and_gcs_packages(self, store: GraphStore) -> None:
|
|
128
|
+
sg = make_sample_subgraph()
|
|
129
|
+
await store.upsert(sg)
|
|
130
|
+
caller = sg.nodes[2].id # the method, in _PATH
|
|
131
|
+
# A resolved CALLS edge to a parsed symbol + a resolved IMPORTS edge to
|
|
132
|
+
# an external package stub, both owned by _PATH (feat-004 tagging).
|
|
133
|
+
pkg_id = SymbolID.for_symbol(_LANG, _REPO, "<external>", "react/namespace.")
|
|
134
|
+
resolved = Provenance.resolved("resolver")
|
|
135
|
+
await store.add(
|
|
136
|
+
[
|
|
137
|
+
Node(id=pkg_id, kind=NodeKind.PACKAGE, name="react", provenance=resolved),
|
|
138
|
+
Edge(
|
|
139
|
+
src=caller,
|
|
140
|
+
dst=sg.nodes[1].id,
|
|
141
|
+
kind=EdgeKind.CALLS,
|
|
142
|
+
provenance=resolved,
|
|
143
|
+
origin_path=_PATH,
|
|
144
|
+
),
|
|
145
|
+
Edge(
|
|
146
|
+
src=SymbolID.for_symbol(_LANG, _REPO, _PATH, ""),
|
|
147
|
+
dst=pkg_id,
|
|
148
|
+
kind=EdgeKind.IMPORTS,
|
|
149
|
+
provenance=resolved,
|
|
150
|
+
origin_path=_PATH,
|
|
151
|
+
),
|
|
152
|
+
]
|
|
153
|
+
)
|
|
154
|
+
assert await store.get(pkg_id) is not None
|
|
155
|
+
await store.clear_resolved([_PATH])
|
|
156
|
+
# resolved edges gone, the now-orphan package GC'd, parsed nodes intact
|
|
157
|
+
assert await store.adjacent(caller, [EdgeKind.CALLS], "out") == []
|
|
158
|
+
assert await store.get(pkg_id) is None
|
|
159
|
+
assert await store.get(sg.nodes[1].id) is not None # the class survives
|
|
160
|
+
|
|
161
|
+
async def test_clear_outgoing_removes_kind_from_sources(self, store: GraphStore) -> None:
|
|
162
|
+
sg = make_sample_subgraph()
|
|
163
|
+
await store.upsert(sg)
|
|
164
|
+
cls = sg.nodes[1].id
|
|
165
|
+
tag_id = SymbolID.for_symbol(_LANG, _REPO, "<taxonomy>", "Repository.")
|
|
166
|
+
llm = Provenance.llm("pattern-tags", 0.9)
|
|
167
|
+
await store.add(
|
|
168
|
+
[
|
|
169
|
+
Node(id=tag_id, kind=NodeKind.PATTERN_TAG, name="Repository", provenance=llm),
|
|
170
|
+
Edge(src=cls, dst=tag_id, kind=EdgeKind.TAGGED, provenance=llm),
|
|
171
|
+
]
|
|
172
|
+
)
|
|
173
|
+
assert await store.adjacent(cls, [EdgeKind.TAGGED], "out")
|
|
174
|
+
await store.clear_outgoing([cls], EdgeKind.TAGGED)
|
|
175
|
+
assert await store.adjacent(cls, [EdgeKind.TAGGED], "out") == []
|
|
176
|
+
# the CONTAINS edge from the same source is untouched (kind-scoped)
|
|
177
|
+
assert await store.adjacent(cls, [EdgeKind.CONTAINS], "out")
|
|
178
|
+
|
|
179
|
+
async def test_adjacent_directed(self, store: GraphStore) -> None:
|
|
180
|
+
sg = make_sample_subgraph() # File -CONTAINS-> Class -CONTAINS-> Method
|
|
181
|
+
await store.upsert(sg)
|
|
182
|
+
file_id, class_id, method_id = (n.id for n in sg.nodes)
|
|
183
|
+
out = await store.adjacent(class_id, [EdgeKind.CONTAINS], "out")
|
|
184
|
+
assert [(e.src, e.dst) for e in out] == [(class_id, method_id)]
|
|
185
|
+
incoming = await store.adjacent(class_id, [EdgeKind.CONTAINS], "in")
|
|
186
|
+
assert [(e.src, e.dst) for e in incoming] == [(file_id, class_id)]
|
|
187
|
+
both = await store.adjacent(class_id, [EdgeKind.CONTAINS], "both")
|
|
188
|
+
assert {(e.src, e.dst) for e in both} == {(file_id, class_id), (class_id, method_id)}
|
|
189
|
+
# kind filter excludes non-matching edges
|
|
190
|
+
assert await store.adjacent(class_id, [EdgeKind.CALLS], "both") == []
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
# Distinct kinds so a kind-filter discriminates; refs are valid SymbolIDs.
|
|
194
|
+
_SAMPLE_KINDS = (NodeKind.CHUNK, NodeKind.DOC_CHUNK, NodeKind.SUMMARY)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def make_sample_embeddings(dim: int = 8) -> list[Embedded]:
|
|
198
|
+
"""Three tiny one-hot vectors of dimension ``dim`` over distinct kinds,
|
|
199
|
+
for exercising a ``VectorStore`` without a real embedder."""
|
|
200
|
+
base = SymbolID.for_symbol(_LANG, _REPO, _PATH, Descriptor.type("Auth"))
|
|
201
|
+
return [
|
|
202
|
+
Embedded(
|
|
203
|
+
ref=f"{base}chunk{i}.",
|
|
204
|
+
vector=[1.0 if j == i else 0.0 for j in range(dim)],
|
|
205
|
+
kind=_SAMPLE_KINDS[i],
|
|
206
|
+
attrs={"ordinal": i},
|
|
207
|
+
)
|
|
208
|
+
for i in range(3)
|
|
209
|
+
]
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
class VectorStoreConformance:
|
|
213
|
+
"""Subclass in a feat-003 vector adapter; provide an async ``vectors``
|
|
214
|
+
fixture yielding a fresh, empty ``VectorStore``.
|
|
215
|
+
|
|
216
|
+
The ``filter`` contract targets first-class columns (``ref``, ``kind``,
|
|
217
|
+
``path``) — the portable subset every backend can honour — not nested
|
|
218
|
+
``attrs`` keys."""
|
|
219
|
+
|
|
220
|
+
async def test_upsert_then_search_finds_nearest(self, vectors: VectorStore) -> None:
|
|
221
|
+
items = make_sample_embeddings()
|
|
222
|
+
await vectors.upsert(items)
|
|
223
|
+
hits = await vectors.search(items[1].vector, k=1)
|
|
224
|
+
assert hits
|
|
225
|
+
assert hits[0].ref == items[1].ref
|
|
226
|
+
|
|
227
|
+
async def test_reupsert_is_idempotent(self, vectors: VectorStore) -> None:
|
|
228
|
+
items = make_sample_embeddings()
|
|
229
|
+
await vectors.upsert(items)
|
|
230
|
+
await vectors.upsert(items)
|
|
231
|
+
hits = await vectors.search(items[0].vector, k=10)
|
|
232
|
+
assert len({h.ref for h in hits}) == len(items)
|
|
233
|
+
|
|
234
|
+
async def test_search_respects_k(self, vectors: VectorStore) -> None:
|
|
235
|
+
items = make_sample_embeddings()
|
|
236
|
+
await vectors.upsert(items)
|
|
237
|
+
hits = await vectors.search(items[0].vector, k=2)
|
|
238
|
+
assert len(hits) <= 2
|
|
239
|
+
|
|
240
|
+
async def test_filter_constrains_results(self, vectors: VectorStore) -> None:
|
|
241
|
+
items = make_sample_embeddings()
|
|
242
|
+
await vectors.upsert(items)
|
|
243
|
+
hits = await vectors.search(items[0].vector, k=10, filter={"kind": NodeKind.CHUNK.value})
|
|
244
|
+
assert hits
|
|
245
|
+
assert all(h.ref == items[0].ref for h in hits)
|
|
246
|
+
|
|
247
|
+
async def test_delete_where_removes(self, vectors: VectorStore) -> None:
|
|
248
|
+
items = make_sample_embeddings()
|
|
249
|
+
await vectors.upsert(items)
|
|
250
|
+
await vectors.delete_where({"kind": NodeKind.DOC_CHUNK.value})
|
|
251
|
+
hits = await vectors.search(items[1].vector, k=10)
|
|
252
|
+
assert items[1].ref not in {h.ref for h in hits}
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
class ExtractorConformance:
|
|
256
|
+
"""Subclass in a feat-002/011 pack; provide ``extractor`` and
|
|
257
|
+
``sample_file`` fixtures."""
|
|
258
|
+
|
|
259
|
+
def test_output_is_valid_subgraph(self, extractor: Extractor, sample_file: SourceFile) -> None:
|
|
260
|
+
sg = extractor.extract(sample_file)
|
|
261
|
+
assert isinstance(sg, FileSubgraph)
|
|
262
|
+
assert sg.path
|
|
263
|
+
assert sg.content_hash
|
|
264
|
+
|
|
265
|
+
def test_extraction_is_deterministic(
|
|
266
|
+
self, extractor: Extractor, sample_file: SourceFile
|
|
267
|
+
) -> None:
|
|
268
|
+
first = extractor.extract(sample_file)
|
|
269
|
+
second = extractor.extract(sample_file)
|
|
270
|
+
assert first.model_dump() == second.model_dump()
|