agentforge-graph 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. agentforge_graph/__init__.py +6 -0
  2. agentforge_graph/chunking/__init__.py +12 -0
  3. agentforge_graph/chunking/cast.py +159 -0
  4. agentforge_graph/chunking/chunk.py +19 -0
  5. agentforge_graph/chunking/tokens.py +15 -0
  6. agentforge_graph/cli.py +607 -0
  7. agentforge_graph/config.py +259 -0
  8. agentforge_graph/core/__init__.py +54 -0
  9. agentforge_graph/core/conformance.py +270 -0
  10. agentforge_graph/core/contracts.py +163 -0
  11. agentforge_graph/core/kinds.py +68 -0
  12. agentforge_graph/core/models.py +134 -0
  13. agentforge_graph/core/provenance.py +62 -0
  14. agentforge_graph/core/symbols.py +116 -0
  15. agentforge_graph/embed/__init__.py +28 -0
  16. agentforge_graph/embed/base.py +22 -0
  17. agentforge_graph/embed/bedrock.py +85 -0
  18. agentforge_graph/embed/fake.py +34 -0
  19. agentforge_graph/embed/openai.py +67 -0
  20. agentforge_graph/embed/pipeline.py +184 -0
  21. agentforge_graph/embed/registry.py +66 -0
  22. agentforge_graph/embed/report.py +15 -0
  23. agentforge_graph/enrich/__init__.py +70 -0
  24. agentforge_graph/enrich/anthropic.py +38 -0
  25. agentforge_graph/enrich/anthropic_client.py +109 -0
  26. agentforge_graph/enrich/bedrock.py +24 -0
  27. agentforge_graph/enrich/bedrock_client.py +115 -0
  28. agentforge_graph/enrich/bedrock_summarizer.py +23 -0
  29. agentforge_graph/enrich/claude.py +172 -0
  30. agentforge_graph/enrich/enricher.py +108 -0
  31. agentforge_graph/enrich/governs.py +173 -0
  32. agentforge_graph/enrich/governs_enricher.py +152 -0
  33. agentforge_graph/enrich/heuristics.py +224 -0
  34. agentforge_graph/enrich/judge.py +63 -0
  35. agentforge_graph/enrich/registry.py +133 -0
  36. agentforge_graph/enrich/report.py +60 -0
  37. agentforge_graph/enrich/summarizer.py +62 -0
  38. agentforge_graph/enrich/summary_enricher.py +211 -0
  39. agentforge_graph/enrich/taxonomy.py +38 -0
  40. agentforge_graph/frameworks/__init__.py +29 -0
  41. agentforge_graph/frameworks/base.py +75 -0
  42. agentforge_graph/frameworks/detect.py +124 -0
  43. agentforge_graph/frameworks/extractor.py +63 -0
  44. agentforge_graph/frameworks/orm.py +93 -0
  45. agentforge_graph/frameworks/packs/_js_ast.py +56 -0
  46. agentforge_graph/frameworks/packs/_python_ast.py +157 -0
  47. agentforge_graph/frameworks/packs/django/__init__.py +240 -0
  48. agentforge_graph/frameworks/packs/django/models.scm +7 -0
  49. agentforge_graph/frameworks/packs/express/__init__.py +133 -0
  50. agentforge_graph/frameworks/packs/express/routes.scm +8 -0
  51. agentforge_graph/frameworks/packs/fastapi/__init__.py +210 -0
  52. agentforge_graph/frameworks/packs/fastapi/depends.scm +6 -0
  53. agentforge_graph/frameworks/packs/fastapi/routes.scm +10 -0
  54. agentforge_graph/frameworks/packs/flask/__init__.py +143 -0
  55. agentforge_graph/frameworks/packs/flask/routes.scm +11 -0
  56. agentforge_graph/frameworks/packs/nestjs/__init__.py +205 -0
  57. agentforge_graph/frameworks/packs/nestjs/routes.scm +6 -0
  58. agentforge_graph/frameworks/packs/spring/__init__.py +267 -0
  59. agentforge_graph/frameworks/packs/spring/routes.scm +6 -0
  60. agentforge_graph/frameworks/packs/sqlalchemy/__init__.py +250 -0
  61. agentforge_graph/frameworks/packs/sqlalchemy/models.scm +7 -0
  62. agentforge_graph/frameworks/registry.py +44 -0
  63. agentforge_graph/ingest/__init__.py +30 -0
  64. agentforge_graph/ingest/codegraph.py +847 -0
  65. agentforge_graph/ingest/extractor.py +353 -0
  66. agentforge_graph/ingest/incremental/__init__.py +25 -0
  67. agentforge_graph/ingest/incremental/detect.py +118 -0
  68. agentforge_graph/ingest/incremental/dirty.py +61 -0
  69. agentforge_graph/ingest/incremental/indexer.py +218 -0
  70. agentforge_graph/ingest/incremental/meta.py +72 -0
  71. agentforge_graph/ingest/incremental/ports.py +39 -0
  72. agentforge_graph/ingest/pack.py +160 -0
  73. agentforge_graph/ingest/packs/__init__.py +34 -0
  74. agentforge_graph/ingest/packs/cpp/__init__.py +35 -0
  75. agentforge_graph/ingest/packs/cpp/references.scm +15 -0
  76. agentforge_graph/ingest/packs/cpp/structure.scm +49 -0
  77. agentforge_graph/ingest/packs/csharp/__init__.py +35 -0
  78. agentforge_graph/ingest/packs/csharp/references.scm +12 -0
  79. agentforge_graph/ingest/packs/csharp/structure.scm +45 -0
  80. agentforge_graph/ingest/packs/go/__init__.py +38 -0
  81. agentforge_graph/ingest/packs/go/references.scm +12 -0
  82. agentforge_graph/ingest/packs/go/structure.scm +64 -0
  83. agentforge_graph/ingest/packs/java/__init__.py +35 -0
  84. agentforge_graph/ingest/packs/java/references.scm +12 -0
  85. agentforge_graph/ingest/packs/java/structure.scm +38 -0
  86. agentforge_graph/ingest/packs/javascript/__init__.py +34 -0
  87. agentforge_graph/ingest/packs/javascript/references.scm +11 -0
  88. agentforge_graph/ingest/packs/javascript/structure.scm +166 -0
  89. agentforge_graph/ingest/packs/php/__init__.py +35 -0
  90. agentforge_graph/ingest/packs/php/references.scm +15 -0
  91. agentforge_graph/ingest/packs/php/structure.scm +44 -0
  92. agentforge_graph/ingest/packs/python/__init__.py +25 -0
  93. agentforge_graph/ingest/packs/python/references.scm +14 -0
  94. agentforge_graph/ingest/packs/python/structure.scm +57 -0
  95. agentforge_graph/ingest/packs/ruby/__init__.py +37 -0
  96. agentforge_graph/ingest/packs/ruby/references.scm +12 -0
  97. agentforge_graph/ingest/packs/ruby/structure.scm +37 -0
  98. agentforge_graph/ingest/packs/rust/__init__.py +39 -0
  99. agentforge_graph/ingest/packs/rust/references.scm +12 -0
  100. agentforge_graph/ingest/packs/rust/structure.scm +46 -0
  101. agentforge_graph/ingest/packs/typescript/__init__.py +31 -0
  102. agentforge_graph/ingest/packs/typescript/references.scm +11 -0
  103. agentforge_graph/ingest/packs/typescript/structure.scm +99 -0
  104. agentforge_graph/ingest/pipeline.py +134 -0
  105. agentforge_graph/ingest/report.py +84 -0
  106. agentforge_graph/ingest/resolver.py +467 -0
  107. agentforge_graph/ingest/source.py +79 -0
  108. agentforge_graph/knowledge/__init__.py +28 -0
  109. agentforge_graph/knowledge/adr.py +136 -0
  110. agentforge_graph/knowledge/commits.py +152 -0
  111. agentforge_graph/knowledge/ingest.py +312 -0
  112. agentforge_graph/knowledge/mentions.py +71 -0
  113. agentforge_graph/knowledge/report.py +32 -0
  114. agentforge_graph/main.py +21 -0
  115. agentforge_graph/providers.py +36 -0
  116. agentforge_graph/repomap/__init__.py +14 -0
  117. agentforge_graph/repomap/rank.py +161 -0
  118. agentforge_graph/repomap/render.py +55 -0
  119. agentforge_graph/repomap/repomap.py +66 -0
  120. agentforge_graph/retrieve/__init__.py +21 -0
  121. agentforge_graph/retrieve/pack.py +76 -0
  122. agentforge_graph/retrieve/rerank.py +251 -0
  123. agentforge_graph/retrieve/retriever.py +286 -0
  124. agentforge_graph/retrieve/scoring.py +36 -0
  125. agentforge_graph/serve/__init__.py +19 -0
  126. agentforge_graph/serve/engine.py +204 -0
  127. agentforge_graph/serve/http_runner.py +133 -0
  128. agentforge_graph/serve/server.py +110 -0
  129. agentforge_graph/serve/tools.py +307 -0
  130. agentforge_graph/store/__init__.py +32 -0
  131. agentforge_graph/store/_rowmap.py +102 -0
  132. agentforge_graph/store/errors.py +22 -0
  133. agentforge_graph/store/facade.py +89 -0
  134. agentforge_graph/store/kuzu_store.py +380 -0
  135. agentforge_graph/store/lance_store.py +146 -0
  136. agentforge_graph/store/neo4j_store.py +294 -0
  137. agentforge_graph/store/pgvector_store.py +170 -0
  138. agentforge_graph/store/registry.py +45 -0
  139. agentforge_graph/temporal/__init__.py +36 -0
  140. agentforge_graph/temporal/backfill.py +338 -0
  141. agentforge_graph/temporal/events.py +82 -0
  142. agentforge_graph/temporal/index.py +190 -0
  143. agentforge_graph/temporal/mining.py +190 -0
  144. agentforge_graph/temporal/recorder.py +114 -0
  145. agentforge_graph/temporal/store.py +282 -0
  146. agentforge_graph-0.3.2.dist-info/METADATA +291 -0
  147. agentforge_graph-0.3.2.dist-info/RECORD +151 -0
  148. agentforge_graph-0.3.2.dist-info/WHEEL +4 -0
  149. agentforge_graph-0.3.2.dist-info/entry_points.txt +3 -0
  150. agentforge_graph-0.3.2.dist-info/licenses/LICENSE +202 -0
  151. agentforge_graph-0.3.2.dist-info/licenses/NOTICE +14 -0
@@ -0,0 +1,259 @@
1
+ """Typed reader for ``ckg.yaml`` — this agent's *own* engine config (NOT
2
+ the framework's ``agentforge.yaml``, which has a strict validator).
3
+
4
+ Unlike the framework file, ours is intentionally lenient: unknown keys are
5
+ ignored (``extra='ignore'``) so a config written for a later feature still
6
+ loads for an earlier one. The ``store:`` (feat-003) and ``ingest:``
7
+ (feat-002) blocks are modelled today; chunking/retrieve/… sections gain
8
+ their own models as those features land.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from pathlib import Path
14
+ from typing import Any, ClassVar, Self
15
+
16
+ import yaml
17
+ from pydantic import BaseModel, Field, ValidationError, field_validator
18
+
19
+ # Default directories excluded from ingestion (mirrors ckg.yaml's ingest.exclude).
20
+ DEFAULT_EXCLUDES = [
21
+ "**/node_modules/**",
22
+ "**/.venv/**",
23
+ "**/dist/**",
24
+ "**/.git/**",
25
+ "**/.ckg/**",
26
+ ]
27
+
28
+
29
+ def _read_block[T: _Block](model: type[T], key: str, ckg_yaml: str | Path | None) -> T:
30
+ """Parse one top-level block of ckg.yaml into ``model``. Missing file or
31
+ ``None`` → defaults; malformed YAML / block → ``StoreConfigError``."""
32
+ # Imported lazily to avoid an import cycle (store.facade imports this).
33
+ from agentforge_graph.store.errors import StoreConfigError
34
+
35
+ if ckg_yaml is None:
36
+ return model()
37
+ p = Path(ckg_yaml)
38
+ if not p.exists():
39
+ return model()
40
+ try:
41
+ data = yaml.safe_load(p.read_text()) or {}
42
+ except yaml.YAMLError as exc:
43
+ raise StoreConfigError(f"could not parse {p}: {exc}") from exc
44
+ try:
45
+ return model.model_validate(data.get(key) or {})
46
+ except ValidationError as exc:
47
+ raise StoreConfigError(f"invalid {key} config in {p}: {exc}") from exc
48
+
49
+
50
+ class _Block(BaseModel):
51
+ """Base for a ckg.yaml section that knows its top-level key."""
52
+
53
+ KEY: ClassVar[str] = ""
54
+
55
+ @classmethod
56
+ def load(cls, ckg_yaml: str | Path | None = None) -> Self:
57
+ return _read_block(cls, cls.KEY, ckg_yaml)
58
+
59
+
60
+ class GraphCfg(BaseModel):
61
+ driver: str = "kuzu"
62
+ config: dict[str, Any] = Field(default_factory=dict)
63
+
64
+
65
+ class VectorCfg(BaseModel):
66
+ driver: str = "lancedb"
67
+ config: dict[str, Any] = Field(default_factory=dict)
68
+
69
+
70
+ class StoreConfig(_Block):
71
+ """The ``store:`` block of ckg.yaml (ADR-0006)."""
72
+
73
+ KEY: ClassVar[str] = "store"
74
+ path: str = ".ckg"
75
+ graph: GraphCfg = Field(default_factory=GraphCfg)
76
+ vectors: VectorCfg = Field(default_factory=VectorCfg)
77
+
78
+
79
+ class IngestConfig(_Block):
80
+ """The ``ingest:`` block of ckg.yaml (feat-002 / ADR-0009)."""
81
+
82
+ KEY: ClassVar[str] = "ingest"
83
+ languages: str | list[str] = "auto" # "auto" or an explicit list of pack names
84
+ exclude: list[str] = Field(default_factory=lambda: list(DEFAULT_EXCLUDES))
85
+ max_file_kb: int = 512
86
+ lsp_assist: bool = False # opt-in resolution escalation (Tier B); inert at 0.1
87
+ incremental: bool = True # feat-004: re-index only the diff when a prior index exists
88
+ resolve_scope_hops: int = 1 # import-graph hops to re-resolve around a changed file
89
+
90
+
91
+ class ChunkingConfig(_Block):
92
+ """The ``chunking:`` block of ckg.yaml (feat-005 / ADR-0007)."""
93
+
94
+ KEY: ClassVar[str] = "chunking"
95
+ max_tokens: int = 512
96
+ min_tokens: int = 64
97
+
98
+
99
+ class EmbedConfig(_Block):
100
+ """The ``embed:`` block of ckg.yaml (feat-005). Default driver is
101
+ ``bedrock`` (Cohere embed-v4); tests/CI use ``fake``."""
102
+
103
+ KEY: ClassVar[str] = "embed"
104
+ # ENH-003: bedrock | fake | openai | <entry-point>. `openai` also covers
105
+ # OpenAI-compatible local servers via `base_url` (Ollama/vLLM/LM Studio).
106
+ driver: str = "bedrock"
107
+ model: str = "cohere.embed-v4:0"
108
+ region: str = "us-east-1"
109
+ dim: int = 1024
110
+ batch_size: int = 96
111
+ assume_role_arn: str = "" # set for CI; empty = default AWS credential chain
112
+ base_url: str = "" # ENH-003: OpenAI-compatible endpoint (empty = provider default)
113
+ api_key_env: str = "" # ENH-003: env var holding the API key (empty = provider default)
114
+
115
+
116
+ def _default_edge_weights() -> dict[str, float]:
117
+ # By provenance: resolved facts outrank parsed; llm is second-class (ADR-0004).
118
+ return {"resolved": 1.0, "manual": 0.8, "parsed": 0.5, "llm": 0.3}
119
+
120
+
121
+ class RetrieveConfig(_Block):
122
+ """The ``retrieve:`` block of ckg.yaml (feat-006 / ADR-0008)."""
123
+
124
+ KEY: ClassVar[str] = "retrieve"
125
+ k: int = 8
126
+ depth: int = 1
127
+ decay: float = 0.6
128
+ fanout_cap: int = 25 # max neighbors expanded per hop (overflow noted, not silent)
129
+ # ENH-009: off (default) | lexical | cross_encoder. `lexical` is a
130
+ # deterministic subtoken blend (helps keyword/symbol-naming queries, mixed on
131
+ # prose). `cross_encoder` is a real semantic re-score via sentence-transformers
132
+ # (the `rerank` extra; lazy-loaded). Both opt-in (measure, don't blind-flip).
133
+ rerank: str = "off"
134
+ rerank_weight: float = 0.5 # final = (1-w)*base + w*signal (overlap | σ(cross))
135
+ rerank_model: str = "" # cross_encoder model id (empty = a small ms-marco default)
136
+ edge_weights: dict[str, float] = Field(default_factory=_default_edge_weights)
137
+ # feat-010: scale ADR/doc (`source_type: doc`) vector hits so code outranks
138
+ # equally-similar prose by default, mitigating doc-volume dilution. Neutralised
139
+ # (→ 1.0) when the query smells architectural (decision/why/design/…).
140
+ doc_weight: float = 0.7
141
+
142
+ @field_validator("rerank", mode="before")
143
+ @classmethod
144
+ def _coerce_rerank(cls, v: Any) -> Any:
145
+ # YAML 1.1 parses bare `off`/`on` as booleans, so `rerank: off` (as shipped
146
+ # in ckg.yaml) arrives as False and would fail string validation. Map the
147
+ # booleans back to the canonical modes: off -> disabled, on -> the lexical
148
+ # reranker (the only enabled mode).
149
+ if isinstance(v, bool):
150
+ return "lexical" if v else "off"
151
+ return v
152
+
153
+
154
+ class RepoMapConfig(_Block):
155
+ """The ``repomap:`` block of ckg.yaml (feat-007)."""
156
+
157
+ KEY: ClassVar[str] = "repomap"
158
+ default_budget: int = 2000
159
+ damping: float = 0.85
160
+ kinds: list[str] = Field(default_factory=lambda: ["Class", "Function", "Method"])
161
+ edge_weights: dict[str, float] = Field(default_factory=_default_edge_weights)
162
+ # ENH-007: down-weight clearly-private symbols (leading-underscore names or
163
+ # `_`-prefixed modules) so the map surfaces the public API first. A weight,
164
+ # not a filter: private hubs can still appear when genuinely central. In
165
+ # [0, 1]; 0.0 = pure centrality, higher demotes private harder. Private
166
+ # symbols are multiplied by (1 - public_bias).
167
+ public_bias: float = 0.5
168
+
169
+
170
+ class ServeConfig(_Block):
171
+ """The ``serve:`` block of ckg.yaml (feat-008 — MCP server + guardrails)."""
172
+
173
+ KEY: ClassVar[str] = "serve"
174
+ transport: str = "stdio" # feat-008: stdio | http (streamable-HTTP at /mcp)
175
+ host: str = "127.0.0.1" # http transport bind host
176
+ port: int = 8765 # http transport port
177
+ # ENH-005: bearer token for the HTTP transport (empty = no auth, localhost
178
+ # default). Prefer $CKG_HTTP_AUTH_TOKEN over putting the secret in ckg.yaml.
179
+ http_auth_token: str = ""
180
+ max_depth: int = 3
181
+ max_k: int = 50
182
+ response_token_cap: int = 6000
183
+ refresh_on_call: bool = False
184
+
185
+
186
+ class FrameworksConfig(_Block):
187
+ """The ``frameworks:`` block of ckg.yaml (feat-011)."""
188
+
189
+ KEY: ClassVar[str] = "frameworks"
190
+ # "auto" → detect per repo; "off" → none; or an explicit list of pack names.
191
+ enabled: str | list[str] = "auto"
192
+ packs: list[str] = Field(default_factory=list) # force-enable, e.g. ["fastapi"]
193
+
194
+
195
+ class EnrichConfig(_Block):
196
+ """The ``enrich:`` block of ckg.yaml (feat-012 — LLM enrichment).
197
+
198
+ Claude runs on **AWS Bedrock** (default) or the **direct Anthropic API**
199
+ (``provider: anthropic`` — ENH-003 phase 2, the non-AWS path). Never runs
200
+ implicitly — only ``ckg enrich`` / ``CodeGraph.enrich()``."""
201
+
202
+ KEY: ClassVar[str] = "enrich"
203
+ enabled: bool = True
204
+ # ENH-003: provider for BOTH judge + summarizer. bedrock | anthropic |
205
+ # scripted | <entry-point>. `anthropic` = direct Anthropic API (needs
206
+ # ANTHROPIC_API_KEY); `scripted` is the credential-free deterministic one.
207
+ provider: str = "bedrock"
208
+ # Default is a Bedrock inference-profile id (the `us.` prefix). The
209
+ # `anthropic` provider normalises it to the bare API id automatically.
210
+ model: str = "us.anthropic.claude-haiku-4-5-20251001-v1:0"
211
+ region: str = "us-east-1"
212
+ assume_role_arn: str = "" # set for CI; empty = default AWS credential chain
213
+ base_url: str = "" # ENH-003: Anthropic-compatible endpoint (empty = default)
214
+ api_key_env: str = "" # ENH-003: env var holding the API key (empty = ANTHROPIC_API_KEY)
215
+ budget_usd: float = 2.0 # per-run LLM judge cap (breaker)
216
+ confidence_floor: float = 0.7 # drop tags below this
217
+ taxonomy: str = "v1"
218
+ patterns_recall: str = "conservative" # ENH-001: conservative | broad
219
+ concurrency: int = 6 # ENH-002: in-flight LLM calls per enrich run
220
+ summary_max_words: int = 120 # feat-012 summaries
221
+ summary_levels: list[str] = Field(default_factory=lambda: ["file", "repo"])
222
+
223
+
224
+ def _default_adr_globs() -> list[str]:
225
+ return ["docs/adr/**/*.md", "docs/decisions/**/*.md"]
226
+
227
+
228
+ class KnowledgeConfig(_Block):
229
+ """The ``knowledge:`` block of ckg.yaml (feat-010 — ADR & docs ingestion).
230
+
231
+ Reads ``enabled`` + ``adr_globs`` (deterministic pass) and ``infer_budget_usd``
232
+ (the ``ckg enrich --decisions`` LLM matcher's USD cap). ``doc_globs``/
233
+ ``commit_messages`` are declared for follow-ups; ``infer_governs`` is the
234
+ default for the LLM pass (the CLI flag runs it on demand regardless)."""
235
+
236
+ KEY: ClassVar[str] = "knowledge"
237
+ enabled: bool = True
238
+ adr_globs: list[str] = Field(default_factory=_default_adr_globs)
239
+ doc_globs: list[str] = Field(default_factory=list) # general docs → DocChunks+DESCRIBES
240
+ commit_messages: bool = False # ingest conventional/issue-ref commit messages
241
+ commit_messages_limit: int = 50 # how many recent commits to scan
242
+ infer_governs: bool = False # default for the LLM matcher (CLI flag overrides)
243
+ infer_budget_usd: float = 1.0 # USD cap for the infer_governs pass
244
+
245
+
246
+ class TemporalConfig(_Block):
247
+ """The ``temporal:`` block of ckg.yaml (feat-009 — git-evolution layer).
248
+
249
+ **Opt-in (default off).** When on (and the source is a git repo), the
250
+ feat-004 refresh records symbol lifecycle into a ``.ckg/temporal.db``
251
+ sidecar — the basis for history / changed-since / as-of and churn ranking
252
+ signals. Off means delete-on-refresh, exactly as before. See
253
+ ``docs/design/design-009-temporal-evolution-layer.md``."""
254
+
255
+ KEY: ClassVar[str] = "temporal"
256
+ enabled: bool = False
257
+ history_backfill: int = 0 # commits to replay at first index (chunk 4)
258
+ retention_commits: int = 1000 # prune closed events beyond this horizon (chunk 5)
259
+ rename_detection: str = "file" # file (exact git renames) | signature (intra-file, chunk 6)
@@ -0,0 +1,54 @@
1
+ """agentforge_graph.core — the locked schema and contracts.
2
+
3
+ The stable surface every other feature plugs into: typed node/edge
4
+ kinds, value types (with provenance + symbol-ID validation enforced at
5
+ construction), the symbol-ID grammar, and the ABCs. This package imports
6
+ nothing from ``agentforge`` (ADR-0001) — it is the deterministic engine
7
+ core and is usable standalone.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from .contracts import Direction, Enricher, Extractor, GraphStore, VectorStore
13
+ from .kinds import EdgeKind, NodeKind
14
+ from .models import (
15
+ Edge,
16
+ Embedded,
17
+ FileSubgraph,
18
+ GraphQuery,
19
+ Node,
20
+ QueryResult,
21
+ ScoredRef,
22
+ SourceFile,
23
+ )
24
+ from .provenance import Provenance, Source
25
+ from .symbols import Descriptor, ParsedSymbol, SymbolID, normalize_path
26
+
27
+ __all__ = [
28
+ # kinds
29
+ "NodeKind",
30
+ "EdgeKind",
31
+ # provenance
32
+ "Provenance",
33
+ "Source",
34
+ # symbols
35
+ "SymbolID",
36
+ "ParsedSymbol",
37
+ "Descriptor",
38
+ "normalize_path",
39
+ # models
40
+ "Node",
41
+ "Edge",
42
+ "FileSubgraph",
43
+ "SourceFile",
44
+ "GraphQuery",
45
+ "QueryResult",
46
+ "Embedded",
47
+ "ScoredRef",
48
+ # contracts
49
+ "Extractor",
50
+ "GraphStore",
51
+ "VectorStore",
52
+ "Enricher",
53
+ "Direction",
54
+ ]
@@ -0,0 +1,270 @@
1
+ """Reusable conformance suites for the core ABCs.
2
+
3
+ A feat-003 storage adapter or a feat-002 extractor proves it honours
4
+ the contract by subclassing the matching ``*Conformance`` base class and
5
+ providing the required fixture. The same suite runs against every
6
+ implementer, so they're interchangeable. Pytest-free at import time (the
7
+ async test methods are collected by pytest-asyncio in the *test* package;
8
+ this module never imports pytest, keeping the core import light).
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from .contracts import Extractor, GraphStore, VectorStore
14
+ from .kinds import EdgeKind, NodeKind
15
+ from .models import Edge, Embedded, FileSubgraph, GraphQuery, Node, SourceFile
16
+ from .provenance import Provenance
17
+ from .symbols import Descriptor, SymbolID
18
+
19
+ _LANG = "py"
20
+ _REPO = "sample"
21
+ _PATH = "src/app/auth.py"
22
+
23
+
24
+ def make_sample_subgraph(commit: str = "c0") -> FileSubgraph:
25
+ """A tiny but valid subgraph: File ▸ Class ▸ Method, with CONTAINS."""
26
+ prov = Provenance.parsed("conformance", commit)
27
+ file_id = SymbolID.for_symbol(_LANG, _REPO, _PATH, "")
28
+ class_id = SymbolID.for_symbol(_LANG, _REPO, _PATH, Descriptor.type("Auth"))
29
+ method_id = SymbolID.for_symbol(
30
+ _LANG, _REPO, _PATH, Descriptor.type("Auth") + Descriptor.method("login")
31
+ )
32
+ nodes = [
33
+ Node(id=file_id, kind=NodeKind.FILE, name="auth.py", provenance=prov),
34
+ Node(id=class_id, kind=NodeKind.CLASS, name="Auth", span=(1, 20), provenance=prov),
35
+ Node(id=method_id, kind=NodeKind.METHOD, name="login", span=(2, 10), provenance=prov),
36
+ ]
37
+ edges = [
38
+ Edge(src=file_id, dst=class_id, kind=EdgeKind.CONTAINS, provenance=prov),
39
+ Edge(src=class_id, dst=method_id, kind=EdgeKind.CONTAINS, provenance=prov),
40
+ ]
41
+ return FileSubgraph(path=_PATH, content_hash=f"hash-{commit}", nodes=nodes, edges=edges)
42
+
43
+
44
+ class GraphStoreConformance:
45
+ """Subclass in a feat-003 adapter; provide an async ``store`` fixture
46
+ yielding a fresh, empty ``GraphStore``."""
47
+
48
+ async def test_upsert_then_get(self, store: GraphStore) -> None:
49
+ sg = make_sample_subgraph()
50
+ await store.upsert(sg)
51
+ got = await store.get(sg.nodes[0].id)
52
+ assert got is not None
53
+ assert got.id == sg.nodes[0].id
54
+
55
+ async def test_reupsert_is_idempotent(self, store: GraphStore) -> None:
56
+ sg = make_sample_subgraph()
57
+ await store.upsert(sg)
58
+ await store.upsert(sg)
59
+ res = await store.query(GraphQuery(path_prefix="src/app"))
60
+ assert len(res.nodes) == len(sg.nodes)
61
+
62
+ async def test_delete_file_removes_nodes(self, store: GraphStore) -> None:
63
+ sg = make_sample_subgraph()
64
+ await store.upsert(sg)
65
+ await store.delete_file(sg.path)
66
+ assert await store.get(sg.nodes[0].id) is None
67
+
68
+ async def test_enrichment_survives_file_reupsert(self, store: GraphStore) -> None:
69
+ sg = make_sample_subgraph()
70
+ await store.upsert(sg)
71
+ target = sg.nodes[1].id # the class
72
+ summary_id = SymbolID.for_symbol(_LANG, _REPO, _PATH, "Auth#summary.")
73
+ llm = Provenance.llm("enricher", 0.9)
74
+ await store.add(
75
+ [
76
+ Node(id=summary_id, kind=NodeKind.SUMMARY, name="summary", provenance=llm),
77
+ Edge(src=summary_id, dst=target, kind=EdgeKind.SUMMARIZES, provenance=llm),
78
+ ]
79
+ )
80
+ # the file changes and is re-indexed; the enrichment must survive
81
+ await store.upsert(make_sample_subgraph(commit="c1"))
82
+ assert await store.get(summary_id) is not None
83
+
84
+ async def test_set_attrs_merges_and_keeps_file_ownership(self, store: GraphStore) -> None:
85
+ sg = make_sample_subgraph()
86
+ await store.upsert(sg)
87
+ target = sg.nodes[1].id # the class
88
+ await store.set_attrs(target, {"churn_90d": 7})
89
+ await store.set_attrs(target, {"introduced": "abc123"}) # second merge
90
+ got = await store.get(target)
91
+ assert got is not None
92
+ assert got.attrs.get("churn_90d") == 7 # first patch survives the second
93
+ assert got.attrs.get("introduced") == "abc123"
94
+ # the patched node is still file-owned: delete_file removes it (the
95
+ # merge must not have clobbered origin_path, feat-009).
96
+ await store.delete_file(sg.path)
97
+ assert await store.get(target) is None
98
+ # patching an absent node is a no-op, not an error
99
+ await store.set_attrs(target, {"churn_90d": 1})
100
+ assert await store.get(target) is None
101
+
102
+ async def test_reserved_kind_preserved(self, store: GraphStore) -> None:
103
+ route_id = SymbolID.for_symbol(_LANG, _REPO, "src/app/api.py", "route(GET_x).")
104
+ await store.add(
105
+ [
106
+ Node(
107
+ id=route_id,
108
+ kind=NodeKind.ROUTE,
109
+ name="GET /x",
110
+ provenance=Provenance.parsed("conformance"),
111
+ )
112
+ ]
113
+ )
114
+ got = await store.get(route_id)
115
+ assert got is not None
116
+ assert got.kind is NodeKind.ROUTE
117
+
118
+ async def test_neighbors_walks_contains(self, store: GraphStore) -> None:
119
+ sg = make_sample_subgraph()
120
+ await store.upsert(sg)
121
+ file_id, class_id, method_id = (n.id for n in sg.nodes)
122
+ one_hop = {n.id for n in await store.neighbors(file_id, [EdgeKind.CONTAINS], depth=1)}
123
+ assert class_id in one_hop
124
+ two_hop = {n.id for n in await store.neighbors(file_id, [EdgeKind.CONTAINS], depth=2)}
125
+ assert method_id in two_hop
126
+
127
+ async def test_clear_resolved_invalidates_and_gcs_packages(self, store: GraphStore) -> None:
128
+ sg = make_sample_subgraph()
129
+ await store.upsert(sg)
130
+ caller = sg.nodes[2].id # the method, in _PATH
131
+ # A resolved CALLS edge to a parsed symbol + a resolved IMPORTS edge to
132
+ # an external package stub, both owned by _PATH (feat-004 tagging).
133
+ pkg_id = SymbolID.for_symbol(_LANG, _REPO, "<external>", "react/namespace.")
134
+ resolved = Provenance.resolved("resolver")
135
+ await store.add(
136
+ [
137
+ Node(id=pkg_id, kind=NodeKind.PACKAGE, name="react", provenance=resolved),
138
+ Edge(
139
+ src=caller,
140
+ dst=sg.nodes[1].id,
141
+ kind=EdgeKind.CALLS,
142
+ provenance=resolved,
143
+ origin_path=_PATH,
144
+ ),
145
+ Edge(
146
+ src=SymbolID.for_symbol(_LANG, _REPO, _PATH, ""),
147
+ dst=pkg_id,
148
+ kind=EdgeKind.IMPORTS,
149
+ provenance=resolved,
150
+ origin_path=_PATH,
151
+ ),
152
+ ]
153
+ )
154
+ assert await store.get(pkg_id) is not None
155
+ await store.clear_resolved([_PATH])
156
+ # resolved edges gone, the now-orphan package GC'd, parsed nodes intact
157
+ assert await store.adjacent(caller, [EdgeKind.CALLS], "out") == []
158
+ assert await store.get(pkg_id) is None
159
+ assert await store.get(sg.nodes[1].id) is not None # the class survives
160
+
161
+ async def test_clear_outgoing_removes_kind_from_sources(self, store: GraphStore) -> None:
162
+ sg = make_sample_subgraph()
163
+ await store.upsert(sg)
164
+ cls = sg.nodes[1].id
165
+ tag_id = SymbolID.for_symbol(_LANG, _REPO, "<taxonomy>", "Repository.")
166
+ llm = Provenance.llm("pattern-tags", 0.9)
167
+ await store.add(
168
+ [
169
+ Node(id=tag_id, kind=NodeKind.PATTERN_TAG, name="Repository", provenance=llm),
170
+ Edge(src=cls, dst=tag_id, kind=EdgeKind.TAGGED, provenance=llm),
171
+ ]
172
+ )
173
+ assert await store.adjacent(cls, [EdgeKind.TAGGED], "out")
174
+ await store.clear_outgoing([cls], EdgeKind.TAGGED)
175
+ assert await store.adjacent(cls, [EdgeKind.TAGGED], "out") == []
176
+ # the CONTAINS edge from the same source is untouched (kind-scoped)
177
+ assert await store.adjacent(cls, [EdgeKind.CONTAINS], "out")
178
+
179
+ async def test_adjacent_directed(self, store: GraphStore) -> None:
180
+ sg = make_sample_subgraph() # File -CONTAINS-> Class -CONTAINS-> Method
181
+ await store.upsert(sg)
182
+ file_id, class_id, method_id = (n.id for n in sg.nodes)
183
+ out = await store.adjacent(class_id, [EdgeKind.CONTAINS], "out")
184
+ assert [(e.src, e.dst) for e in out] == [(class_id, method_id)]
185
+ incoming = await store.adjacent(class_id, [EdgeKind.CONTAINS], "in")
186
+ assert [(e.src, e.dst) for e in incoming] == [(file_id, class_id)]
187
+ both = await store.adjacent(class_id, [EdgeKind.CONTAINS], "both")
188
+ assert {(e.src, e.dst) for e in both} == {(file_id, class_id), (class_id, method_id)}
189
+ # kind filter excludes non-matching edges
190
+ assert await store.adjacent(class_id, [EdgeKind.CALLS], "both") == []
191
+
192
+
193
+ # Distinct kinds so a kind-filter discriminates; refs are valid SymbolIDs.
194
+ _SAMPLE_KINDS = (NodeKind.CHUNK, NodeKind.DOC_CHUNK, NodeKind.SUMMARY)
195
+
196
+
197
+ def make_sample_embeddings(dim: int = 8) -> list[Embedded]:
198
+ """Three tiny one-hot vectors of dimension ``dim`` over distinct kinds,
199
+ for exercising a ``VectorStore`` without a real embedder."""
200
+ base = SymbolID.for_symbol(_LANG, _REPO, _PATH, Descriptor.type("Auth"))
201
+ return [
202
+ Embedded(
203
+ ref=f"{base}chunk{i}.",
204
+ vector=[1.0 if j == i else 0.0 for j in range(dim)],
205
+ kind=_SAMPLE_KINDS[i],
206
+ attrs={"ordinal": i},
207
+ )
208
+ for i in range(3)
209
+ ]
210
+
211
+
212
+ class VectorStoreConformance:
213
+ """Subclass in a feat-003 vector adapter; provide an async ``vectors``
214
+ fixture yielding a fresh, empty ``VectorStore``.
215
+
216
+ The ``filter`` contract targets first-class columns (``ref``, ``kind``,
217
+ ``path``) — the portable subset every backend can honour — not nested
218
+ ``attrs`` keys."""
219
+
220
+ async def test_upsert_then_search_finds_nearest(self, vectors: VectorStore) -> None:
221
+ items = make_sample_embeddings()
222
+ await vectors.upsert(items)
223
+ hits = await vectors.search(items[1].vector, k=1)
224
+ assert hits
225
+ assert hits[0].ref == items[1].ref
226
+
227
+ async def test_reupsert_is_idempotent(self, vectors: VectorStore) -> None:
228
+ items = make_sample_embeddings()
229
+ await vectors.upsert(items)
230
+ await vectors.upsert(items)
231
+ hits = await vectors.search(items[0].vector, k=10)
232
+ assert len({h.ref for h in hits}) == len(items)
233
+
234
+ async def test_search_respects_k(self, vectors: VectorStore) -> None:
235
+ items = make_sample_embeddings()
236
+ await vectors.upsert(items)
237
+ hits = await vectors.search(items[0].vector, k=2)
238
+ assert len(hits) <= 2
239
+
240
+ async def test_filter_constrains_results(self, vectors: VectorStore) -> None:
241
+ items = make_sample_embeddings()
242
+ await vectors.upsert(items)
243
+ hits = await vectors.search(items[0].vector, k=10, filter={"kind": NodeKind.CHUNK.value})
244
+ assert hits
245
+ assert all(h.ref == items[0].ref for h in hits)
246
+
247
+ async def test_delete_where_removes(self, vectors: VectorStore) -> None:
248
+ items = make_sample_embeddings()
249
+ await vectors.upsert(items)
250
+ await vectors.delete_where({"kind": NodeKind.DOC_CHUNK.value})
251
+ hits = await vectors.search(items[1].vector, k=10)
252
+ assert items[1].ref not in {h.ref for h in hits}
253
+
254
+
255
+ class ExtractorConformance:
256
+ """Subclass in a feat-002/011 pack; provide ``extractor`` and
257
+ ``sample_file`` fixtures."""
258
+
259
+ def test_output_is_valid_subgraph(self, extractor: Extractor, sample_file: SourceFile) -> None:
260
+ sg = extractor.extract(sample_file)
261
+ assert isinstance(sg, FileSubgraph)
262
+ assert sg.path
263
+ assert sg.content_hash
264
+
265
+ def test_extraction_is_deterministic(
266
+ self, extractor: Extractor, sample_file: SourceFile
267
+ ) -> None:
268
+ first = extractor.extract(sample_file)
269
+ second = extractor.extract(sample_file)
270
+ assert first.model_dump() == second.model_dump()