agentforge-graph 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. agentforge_graph/__init__.py +6 -0
  2. agentforge_graph/chunking/__init__.py +12 -0
  3. agentforge_graph/chunking/cast.py +159 -0
  4. agentforge_graph/chunking/chunk.py +19 -0
  5. agentforge_graph/chunking/tokens.py +15 -0
  6. agentforge_graph/cli.py +607 -0
  7. agentforge_graph/config.py +259 -0
  8. agentforge_graph/core/__init__.py +54 -0
  9. agentforge_graph/core/conformance.py +270 -0
  10. agentforge_graph/core/contracts.py +163 -0
  11. agentforge_graph/core/kinds.py +68 -0
  12. agentforge_graph/core/models.py +134 -0
  13. agentforge_graph/core/provenance.py +62 -0
  14. agentforge_graph/core/symbols.py +116 -0
  15. agentforge_graph/embed/__init__.py +28 -0
  16. agentforge_graph/embed/base.py +22 -0
  17. agentforge_graph/embed/bedrock.py +85 -0
  18. agentforge_graph/embed/fake.py +34 -0
  19. agentforge_graph/embed/openai.py +67 -0
  20. agentforge_graph/embed/pipeline.py +184 -0
  21. agentforge_graph/embed/registry.py +66 -0
  22. agentforge_graph/embed/report.py +15 -0
  23. agentforge_graph/enrich/__init__.py +70 -0
  24. agentforge_graph/enrich/anthropic.py +38 -0
  25. agentforge_graph/enrich/anthropic_client.py +109 -0
  26. agentforge_graph/enrich/bedrock.py +24 -0
  27. agentforge_graph/enrich/bedrock_client.py +115 -0
  28. agentforge_graph/enrich/bedrock_summarizer.py +23 -0
  29. agentforge_graph/enrich/claude.py +172 -0
  30. agentforge_graph/enrich/enricher.py +108 -0
  31. agentforge_graph/enrich/governs.py +173 -0
  32. agentforge_graph/enrich/governs_enricher.py +152 -0
  33. agentforge_graph/enrich/heuristics.py +224 -0
  34. agentforge_graph/enrich/judge.py +63 -0
  35. agentforge_graph/enrich/registry.py +133 -0
  36. agentforge_graph/enrich/report.py +60 -0
  37. agentforge_graph/enrich/summarizer.py +62 -0
  38. agentforge_graph/enrich/summary_enricher.py +211 -0
  39. agentforge_graph/enrich/taxonomy.py +38 -0
  40. agentforge_graph/frameworks/__init__.py +29 -0
  41. agentforge_graph/frameworks/base.py +75 -0
  42. agentforge_graph/frameworks/detect.py +124 -0
  43. agentforge_graph/frameworks/extractor.py +63 -0
  44. agentforge_graph/frameworks/orm.py +93 -0
  45. agentforge_graph/frameworks/packs/_js_ast.py +56 -0
  46. agentforge_graph/frameworks/packs/_python_ast.py +157 -0
  47. agentforge_graph/frameworks/packs/django/__init__.py +240 -0
  48. agentforge_graph/frameworks/packs/django/models.scm +7 -0
  49. agentforge_graph/frameworks/packs/express/__init__.py +133 -0
  50. agentforge_graph/frameworks/packs/express/routes.scm +8 -0
  51. agentforge_graph/frameworks/packs/fastapi/__init__.py +210 -0
  52. agentforge_graph/frameworks/packs/fastapi/depends.scm +6 -0
  53. agentforge_graph/frameworks/packs/fastapi/routes.scm +10 -0
  54. agentforge_graph/frameworks/packs/flask/__init__.py +143 -0
  55. agentforge_graph/frameworks/packs/flask/routes.scm +11 -0
  56. agentforge_graph/frameworks/packs/nestjs/__init__.py +205 -0
  57. agentforge_graph/frameworks/packs/nestjs/routes.scm +6 -0
  58. agentforge_graph/frameworks/packs/spring/__init__.py +267 -0
  59. agentforge_graph/frameworks/packs/spring/routes.scm +6 -0
  60. agentforge_graph/frameworks/packs/sqlalchemy/__init__.py +250 -0
  61. agentforge_graph/frameworks/packs/sqlalchemy/models.scm +7 -0
  62. agentforge_graph/frameworks/registry.py +44 -0
  63. agentforge_graph/ingest/__init__.py +30 -0
  64. agentforge_graph/ingest/codegraph.py +847 -0
  65. agentforge_graph/ingest/extractor.py +353 -0
  66. agentforge_graph/ingest/incremental/__init__.py +25 -0
  67. agentforge_graph/ingest/incremental/detect.py +118 -0
  68. agentforge_graph/ingest/incremental/dirty.py +61 -0
  69. agentforge_graph/ingest/incremental/indexer.py +218 -0
  70. agentforge_graph/ingest/incremental/meta.py +72 -0
  71. agentforge_graph/ingest/incremental/ports.py +39 -0
  72. agentforge_graph/ingest/pack.py +160 -0
  73. agentforge_graph/ingest/packs/__init__.py +34 -0
  74. agentforge_graph/ingest/packs/cpp/__init__.py +35 -0
  75. agentforge_graph/ingest/packs/cpp/references.scm +15 -0
  76. agentforge_graph/ingest/packs/cpp/structure.scm +49 -0
  77. agentforge_graph/ingest/packs/csharp/__init__.py +35 -0
  78. agentforge_graph/ingest/packs/csharp/references.scm +12 -0
  79. agentforge_graph/ingest/packs/csharp/structure.scm +45 -0
  80. agentforge_graph/ingest/packs/go/__init__.py +38 -0
  81. agentforge_graph/ingest/packs/go/references.scm +12 -0
  82. agentforge_graph/ingest/packs/go/structure.scm +64 -0
  83. agentforge_graph/ingest/packs/java/__init__.py +35 -0
  84. agentforge_graph/ingest/packs/java/references.scm +12 -0
  85. agentforge_graph/ingest/packs/java/structure.scm +38 -0
  86. agentforge_graph/ingest/packs/javascript/__init__.py +34 -0
  87. agentforge_graph/ingest/packs/javascript/references.scm +11 -0
  88. agentforge_graph/ingest/packs/javascript/structure.scm +166 -0
  89. agentforge_graph/ingest/packs/php/__init__.py +35 -0
  90. agentforge_graph/ingest/packs/php/references.scm +15 -0
  91. agentforge_graph/ingest/packs/php/structure.scm +44 -0
  92. agentforge_graph/ingest/packs/python/__init__.py +25 -0
  93. agentforge_graph/ingest/packs/python/references.scm +14 -0
  94. agentforge_graph/ingest/packs/python/structure.scm +57 -0
  95. agentforge_graph/ingest/packs/ruby/__init__.py +37 -0
  96. agentforge_graph/ingest/packs/ruby/references.scm +12 -0
  97. agentforge_graph/ingest/packs/ruby/structure.scm +37 -0
  98. agentforge_graph/ingest/packs/rust/__init__.py +39 -0
  99. agentforge_graph/ingest/packs/rust/references.scm +12 -0
  100. agentforge_graph/ingest/packs/rust/structure.scm +46 -0
  101. agentforge_graph/ingest/packs/typescript/__init__.py +31 -0
  102. agentforge_graph/ingest/packs/typescript/references.scm +11 -0
  103. agentforge_graph/ingest/packs/typescript/structure.scm +99 -0
  104. agentforge_graph/ingest/pipeline.py +134 -0
  105. agentforge_graph/ingest/report.py +84 -0
  106. agentforge_graph/ingest/resolver.py +467 -0
  107. agentforge_graph/ingest/source.py +79 -0
  108. agentforge_graph/knowledge/__init__.py +28 -0
  109. agentforge_graph/knowledge/adr.py +136 -0
  110. agentforge_graph/knowledge/commits.py +152 -0
  111. agentforge_graph/knowledge/ingest.py +312 -0
  112. agentforge_graph/knowledge/mentions.py +71 -0
  113. agentforge_graph/knowledge/report.py +32 -0
  114. agentforge_graph/main.py +21 -0
  115. agentforge_graph/providers.py +36 -0
  116. agentforge_graph/repomap/__init__.py +14 -0
  117. agentforge_graph/repomap/rank.py +161 -0
  118. agentforge_graph/repomap/render.py +55 -0
  119. agentforge_graph/repomap/repomap.py +66 -0
  120. agentforge_graph/retrieve/__init__.py +21 -0
  121. agentforge_graph/retrieve/pack.py +76 -0
  122. agentforge_graph/retrieve/rerank.py +251 -0
  123. agentforge_graph/retrieve/retriever.py +286 -0
  124. agentforge_graph/retrieve/scoring.py +36 -0
  125. agentforge_graph/serve/__init__.py +19 -0
  126. agentforge_graph/serve/engine.py +204 -0
  127. agentforge_graph/serve/http_runner.py +133 -0
  128. agentforge_graph/serve/server.py +110 -0
  129. agentforge_graph/serve/tools.py +307 -0
  130. agentforge_graph/store/__init__.py +32 -0
  131. agentforge_graph/store/_rowmap.py +102 -0
  132. agentforge_graph/store/errors.py +22 -0
  133. agentforge_graph/store/facade.py +89 -0
  134. agentforge_graph/store/kuzu_store.py +380 -0
  135. agentforge_graph/store/lance_store.py +146 -0
  136. agentforge_graph/store/neo4j_store.py +294 -0
  137. agentforge_graph/store/pgvector_store.py +170 -0
  138. agentforge_graph/store/registry.py +45 -0
  139. agentforge_graph/temporal/__init__.py +36 -0
  140. agentforge_graph/temporal/backfill.py +338 -0
  141. agentforge_graph/temporal/events.py +82 -0
  142. agentforge_graph/temporal/index.py +190 -0
  143. agentforge_graph/temporal/mining.py +190 -0
  144. agentforge_graph/temporal/recorder.py +114 -0
  145. agentforge_graph/temporal/store.py +282 -0
  146. agentforge_graph-0.3.2.dist-info/METADATA +291 -0
  147. agentforge_graph-0.3.2.dist-info/RECORD +151 -0
  148. agentforge_graph-0.3.2.dist-info/WHEEL +4 -0
  149. agentforge_graph-0.3.2.dist-info/entry_points.txt +3 -0
  150. agentforge_graph-0.3.2.dist-info/licenses/LICENSE +202 -0
  151. agentforge_graph-0.3.2.dist-info/licenses/NOTICE +14 -0
@@ -0,0 +1,6 @@
1
+ # AGENTFORGE-MANAGED: template:minimal@0.2.4 hash:b6f9eb202d19
2
+ """agentforge-graph: a Code Knowledge Graph (CKG) engine + agent toolset."""
3
+
4
+ from agentforge_graph.cli import main
5
+
6
+ __all__ = ["main"]
@@ -0,0 +1,12 @@
1
+ """agentforge_graph.chunking — AST-aware (cAST) chunking of code symbols
2
+ into retrieval units linked back to the graph (feat-005). Deterministic;
3
+ imports nothing from ``agentforge`` (ADR-0001).
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from .cast import CASTChunker, Chunker
9
+ from .chunk import Chunk
10
+ from .tokens import estimate_tokens
11
+
12
+ __all__ = ["Chunk", "Chunker", "CASTChunker", "estimate_tokens"]
@@ -0,0 +1,159 @@
1
+ """``CASTChunker`` — AST-aware chunking via split-then-merge over the symbol
2
+ spans feat-002 already extracted (no re-parse). Partitions a file's lines
3
+ into contiguous chunks that honour symbol boundaries: a symbol that fits the
4
+ budget is never split and never fused with another; oversized symbols recurse
5
+ into nested children (a class → per-method chunks) and finally line windows;
6
+ small inter-symbol gaps (imports, module code) merge up to the budget.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import hashlib
12
+ from abc import ABC, abstractmethod
13
+
14
+ from agentforge_graph.core import Node, SourceFile, SymbolID
15
+
16
+ from .chunk import Chunk
17
+ from .tokens import estimate_tokens
18
+
19
+ _Range = tuple[int, int]
20
+
21
+
22
+ class Chunker(ABC):
23
+ @abstractmethod
24
+ def chunk(self, file: SourceFile, symbols: list[Node]) -> list[Chunk]:
25
+ """Chunks for ``file``, given its symbol nodes (with spans)."""
26
+
27
+
28
+ class CASTChunker(Chunker):
29
+ def __init__(self, max_tokens: int = 512, min_tokens: int = 64) -> None:
30
+ self.max_tokens = max_tokens
31
+ self.min_tokens = min_tokens
32
+
33
+ def chunk(self, file: SourceFile, symbols: list[Node]) -> list[Chunk]:
34
+ lines = file.text.splitlines()
35
+ n = len(lines)
36
+ spanned = [s for s in symbols if s.span is not None]
37
+ if not spanned or n == 0:
38
+ return []
39
+ repo = SymbolID.parse(spanned[0].id).repo
40
+ lang = file.language
41
+
42
+ toplevel = [s for s in spanned if not any(_contains(o, s) for o in spanned)]
43
+ toplevel.sort(key=lambda s: _span(s)[0])
44
+
45
+ ranges: list[_Range] = []
46
+ cursor = 1
47
+ for sym in toplevel:
48
+ start, end = _span(sym)
49
+ if start > cursor:
50
+ self._window(cursor, start - 1, lines, ranges)
51
+ self._emit_symbol(start, end, lines, spanned, ranges)
52
+ cursor = end + 1
53
+ if cursor <= n:
54
+ self._window(cursor, n, lines, ranges)
55
+
56
+ ranges = [(a, b) for (a, b) in ranges if self._slice(lines, a, b).strip()]
57
+ ranges = self._merge_gaps(ranges, lines, spanned)
58
+
59
+ chunks: list[Chunk] = []
60
+ for seq, (a, b) in enumerate(ranges):
61
+ code = self._slice(lines, a, b)
62
+ sym_ids = [s.id for s in spanned if _overlaps((a, b), _span(s))]
63
+ text = f"{file.path} | {self._qualify(sym_ids)}\n{code}"
64
+ content_hash = hashlib.sha256(
65
+ f"{text}|{self.max_tokens}|{self.min_tokens}".encode()
66
+ ).hexdigest()
67
+ chunks.append(
68
+ Chunk(
69
+ id=SymbolID.for_symbol(lang, repo, file.path, f"chunk({seq})."),
70
+ text=text,
71
+ code=code,
72
+ token_count=estimate_tokens(code),
73
+ path=file.path,
74
+ span=(a, b),
75
+ content_hash=content_hash,
76
+ symbol_ids=sym_ids,
77
+ seq=seq,
78
+ )
79
+ )
80
+ return chunks
81
+
82
+ # --- range production -----------------------------------------------
83
+
84
+ def _emit_symbol(
85
+ self, start: int, end: int, lines: list[str], symbols: list[Node], out: list[_Range]
86
+ ) -> None:
87
+ if estimate_tokens(self._slice(lines, start, end)) <= self.max_tokens:
88
+ out.append((start, end))
89
+ return
90
+ within = [
91
+ s
92
+ for s in symbols
93
+ if start <= _span(s)[0] and _span(s)[1] <= end and _span(s) != (start, end)
94
+ ]
95
+ direct = [c for c in within if not any(o is not c and _contains(o, c) for o in within)]
96
+ direct.sort(key=lambda s: _span(s)[0])
97
+ if not direct: # leaf symbol still too big -> line windows (logged by report)
98
+ self._window(start, end, lines, out)
99
+ return
100
+ cursor = start
101
+ for child in direct:
102
+ cs, ce = _span(child)
103
+ if cs > cursor:
104
+ self._window(cursor, cs - 1, lines, out) # header / gap before child
105
+ self._emit_symbol(cs, ce, lines, symbols, out)
106
+ cursor = ce + 1
107
+ if cursor <= end:
108
+ self._window(cursor, end, lines, out)
109
+
110
+ def _window(self, start: int, end: int, lines: list[str], out: list[_Range]) -> None:
111
+ acc = start
112
+ for ln in range(start, end + 1):
113
+ if ln > acc and estimate_tokens(self._slice(lines, acc, ln)) > self.max_tokens:
114
+ out.append((acc, ln - 1))
115
+ acc = ln
116
+ out.append((acc, end))
117
+
118
+ def _merge_gaps(
119
+ self, ranges: list[_Range], lines: list[str], symbols: list[Node]
120
+ ) -> list[_Range]:
121
+ def is_gap(r: _Range) -> bool:
122
+ # a gap overlaps no symbol — so function-body windows are NOT gaps
123
+ return not any(_overlaps(r, _span(s)) for s in symbols)
124
+
125
+ out: list[_Range] = []
126
+ for r in ranges:
127
+ if out and is_gap(out[-1]) and is_gap(r):
128
+ merged = (out[-1][0], r[1])
129
+ if estimate_tokens(self._slice(lines, *merged)) <= self.max_tokens:
130
+ out[-1] = merged
131
+ continue
132
+ out.append(r)
133
+ return out
134
+
135
+ # --- helpers --------------------------------------------------------
136
+
137
+ @staticmethod
138
+ def _slice(lines: list[str], a: int, b: int) -> str:
139
+ return "\n".join(lines[a - 1 : b])
140
+
141
+ @staticmethod
142
+ def _qualify(symbol_ids: list[str]) -> str:
143
+ if not symbol_ids:
144
+ return "module"
145
+ return SymbolID.parse(symbol_ids[0]).descriptor or "module"
146
+
147
+
148
+ def _span(node: Node) -> tuple[int, int]:
149
+ assert node.span is not None
150
+ return node.span
151
+
152
+
153
+ def _contains(outer: Node, inner: Node) -> bool:
154
+ o, i = _span(outer), _span(inner)
155
+ return o[0] <= i[0] and i[1] <= o[1] and o != i
156
+
157
+
158
+ def _overlaps(a: tuple[int, int], b: tuple[int, int]) -> bool:
159
+ return not (a[1] < b[0] or b[1] < a[0])
@@ -0,0 +1,19 @@
1
+ """The ``Chunk`` value — a retrieval artifact distinct from the symbol nodes
2
+ it covers (the chunk↔symbol separation that lets a vector hit expand into the
3
+ graph; feat-006). Converts to a ``CHUNK`` node + ``CHUNK_OF`` edges."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from pydantic import BaseModel, Field
8
+
9
+
10
+ class Chunk(BaseModel):
11
+ id: str # SymbolID with a chunk(<seq>). descriptor on the file path
12
+ text: str # embedding text: "<path> | <symbol>\n<code>"
13
+ code: str # raw source slice (for display)
14
+ token_count: int
15
+ path: str
16
+ span: tuple[int, int] # 1-based inclusive line range
17
+ content_hash: str # sha256(text + chunker params) — the vector key
18
+ symbol_ids: list[str] = Field(default_factory=list) # CHUNK_OF targets
19
+ seq: int # order within the file
@@ -0,0 +1,15 @@
1
+ """Token budgeting for the chunker.
2
+
3
+ A fast, model-independent heuristic — exactness doesn't matter, only that
4
+ budgeting and the boundary tests use the *same* estimate. A real tokenizer
5
+ is a drop-in replacement behind this function (ADR-0007 risk note).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+
11
+ def estimate_tokens(text: str) -> int:
12
+ """Approximate token count: ~4 chars/token, floored to the word count."""
13
+ if not text.strip():
14
+ return 0
15
+ return max(len(text) // 4, len(text.split()), 1)