bearking 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. bearking-0.1.0/LICENSE +7 -0
  2. bearking-0.1.0/PKG-INFO +30 -0
  3. bearking-0.1.0/README.md +1 -0
  4. bearking-0.1.0/bearking/__init__.py +0 -0
  5. bearking-0.1.0/bearking/backboneagent/__init__.py +14 -0
  6. bearking-0.1.0/bearking/backboneagent/__main__.py +52 -0
  7. bearking-0.1.0/bearking/backboneagent/runner.py +488 -0
  8. bearking-0.1.0/bearking/fileatlas/__init__.py +16 -0
  9. bearking-0.1.0/bearking/fileatlas/__main__.py +20 -0
  10. bearking-0.1.0/bearking/fileatlas/llm.py +190 -0
  11. bearking-0.1.0/bearking/fileatlas/runner.py +487 -0
  12. bearking-0.1.0/bearking/fileatlas/schema.py +89 -0
  13. bearking-0.1.0/bearking/fileatlas/tools/__init__.py +8 -0
  14. bearking-0.1.0/bearking/fileatlas/tools/ast_parser.py +245 -0
  15. bearking-0.1.0/bearking/fileatlas/tools/chunk_slicer.py +117 -0
  16. bearking-0.1.0/bearking/fileatlas/tools/file_walker.py +194 -0
  17. bearking-0.1.0/bearking/fileatlas/tools/regex_skeleton.py +138 -0
  18. bearking-0.1.0/bearking/fileatlas/tools/simple_indexer.py +52 -0
  19. bearking-0.1.0/bearking/fileatlas/utils.py +127 -0
  20. bearking-0.1.0/bearking/locateagent/__init__.py +16 -0
  21. bearking-0.1.0/bearking/locateagent/__main__.py +74 -0
  22. bearking-0.1.0/bearking/locateagent/runner.py +1620 -0
  23. bearking-0.1.0/bearking/moduleatlas/__init__.py +17 -0
  24. bearking-0.1.0/bearking/moduleatlas/__main__.py +56 -0
  25. bearking-0.1.0/bearking/moduleatlas/runner.py +187 -0
  26. bearking-0.1.0/bearking/moduleatlas/schema.py +101 -0
  27. bearking-0.1.0/bearking/moduleatlas/tools/__init__.py +30 -0
  28. bearking-0.1.0/bearking/moduleatlas/tools/graph_builder.py +123 -0
  29. bearking-0.1.0/bearking/moduleatlas/tools/graph_metrics.py +172 -0
  30. bearking-0.1.0/bearking/moduleatlas/tools/import_resolver.py +123 -0
  31. bearking-0.1.0/bearking/moduleatlas/tools/module_clusterer.py +400 -0
  32. bearking-0.1.0/bearking/moduleatlas/tools/project_map_loader.py +117 -0
  33. bearking-0.1.0/bearking/moduleatlas/tools/public_surface.py +98 -0
  34. bearking-0.1.0/bearking/moduleatlas/tools/renderer.py +18 -0
  35. bearking-0.1.0/bearking/moduleatlas/tools/suggested_flows.py +120 -0
  36. bearking-0.1.0/bearking/moduleatlas/tools/summarizer.py +703 -0
  37. bearking-0.1.0/bearking.egg-info/PKG-INFO +30 -0
  38. bearking-0.1.0/bearking.egg-info/SOURCES.txt +42 -0
  39. bearking-0.1.0/bearking.egg-info/dependency_links.txt +1 -0
  40. bearking-0.1.0/bearking.egg-info/entry_points.txt +6 -0
  41. bearking-0.1.0/bearking.egg-info/requires.txt +2 -0
  42. bearking-0.1.0/bearking.egg-info/top_level.txt +1 -0
  43. bearking-0.1.0/setup.cfg +4 -0
  44. bearking-0.1.0/setup.py +69 -0
bearking-0.1.0/LICENSE ADDED
@@ -0,0 +1,7 @@
1
+ Copyright © 2021 <copyright holders>
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,30 @@
1
+ Metadata-Version: 2.1
2
+ Name: bearking
3
+ Version: 0.1.0
4
+ Summary: Code First, AI Second — 轻量级代码仓库结构分析工具套件,集成 FileAtlas / ModuleAtlas / LocateAgent / SpineAgent / BackboneAgent
5
+ Home-page: https://github.com/bearking-home/bearking
6
+ Author: bearking
7
+ Author-email:
8
+ License: MIT
9
+ Project-URL: Bug Tracker, https://github.com/bearking-home/bearking/issues
10
+ Project-URL: Source, https://github.com/bearking-home/bearking
11
+ Keywords: code-analysis ast project-map architecture agent llm
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
15
+ Classifier: Topic :: Software Development :: Quality Assurance
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.9
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Programming Language :: Python :: 3.13
23
+ Classifier: Operating System :: OS Independent
24
+ Requires-Python: >=3.9
25
+ Description-Content-Type: text/markdown
26
+ License-File: LICENSE
27
+ Requires-Dist: pathspec>=0.12.1
28
+ Requires-Dist: tqdm>=4.66.0
29
+
30
+ bearking.home
@@ -0,0 +1 @@
1
+ bearking.home
File without changes
@@ -0,0 +1,14 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+
5
+ __all__ = ["__version__", "INTERNAL_NAME", "DISPLAY_NAME"]
6
+
7
+ __version__ = "0.1.0"
8
+
9
+ INTERNAL_NAME = "BackboneAgent"
10
+
11
+ # Override per-run if desired:
12
+ # export BEARKING_BACKBONEAGENT_NAME="YourName"
13
+ DISPLAY_NAME = os.getenv("BEARKING_BACKBONEAGENT_NAME", "BackboneAgent")
14
+
@@ -0,0 +1,52 @@
1
+ """
2
+ 模块入口:
3
+
4
+ python -m backboneagent
5
+
6
+ 默认读取当前目录的:
7
+ - project_report.txt
8
+
9
+ 并输出一个 markdown 文件(默认 backbone_from_report.md)。
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import argparse
15
+ import json
16
+
17
+ from .runner import BackboneAgentConfig, generate_backbone_md
18
+
19
+
20
+ def _parse_args() -> BackboneAgentConfig:
21
+ p = argparse.ArgumentParser(prog="backboneagent", add_help=True)
22
+ p.add_argument("--report", default="project_report.txt", help="Path to project_report.txt")
23
+ p.add_argument("--out", default="backbone_from_report.md", help="Output markdown path")
24
+ p.add_argument("--architecture-json", default="architecture.json", help="Path to architecture.json (from ModuleAtlas)")
25
+ p.add_argument("--architecture-md", default="architecture.md", help="Path to architecture.md (from ModuleAtlas)")
26
+ p.add_argument("--max-input-chars", type=int, default=220000, help="Cap filtered report chars sent to LLM")
27
+ p.add_argument("--format", choices=["json", "quiet"], default="quiet", help="Print JSON meta or stay quiet")
28
+ args = p.parse_args()
29
+ cfg = BackboneAgentConfig(
30
+ report_path=str(args.report),
31
+ out_md_path=str(args.out),
32
+ architecture_json_path=str(args.architecture_json),
33
+ architecture_md_path=str(args.architecture_md),
34
+ max_input_chars=int(args.max_input_chars),
35
+ )
36
+ # stash for main printing
37
+ cfg._print_format = str(args.format) # type: ignore[attr-defined]
38
+ return cfg
39
+
40
+
41
+ def main() -> int:
42
+ cfg = _parse_args()
43
+ res = generate_backbone_md(cfg)
44
+ fmt = getattr(cfg, "_print_format", "quiet")
45
+ if fmt == "json":
46
+ print(json.dumps(res, ensure_ascii=False, indent=2))
47
+ return 0
48
+
49
+
50
+ if __name__ == "__main__":
51
+ raise SystemExit(main())
52
+
@@ -0,0 +1,488 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ import re
6
+ import sys
7
+ import time
8
+ import urllib.error
9
+ import urllib.request
10
+ from dataclasses import dataclass, field
11
+ from typing import Any, Iterable
12
+
13
+ from fileatlas.utils import read_text_file
14
+
15
+ from . import DISPLAY_NAME, INTERNAL_NAME
16
+
17
+ try:
18
+ from tqdm import tqdm # type: ignore
19
+ except Exception: # pragma: no cover
20
+ tqdm = None # type: ignore[assignment]
21
+
22
+
23
+ # =========================
24
+ # Prompt
25
+ # =========================
26
+
27
+ PROMPT = (
28
+ "Role:\n"
29
+ "你是一位拥有深厚代码阅读能力的高级系统架构师。"
30
+ "你擅长通过阅读文件目录、文件摘要和代码片段,快速逆向工程出整个项目的核心架构。\n\n"
31
+ "Context:\n"
32
+ "你将收到两份材料:\n"
33
+ "1. **architecture.md** —— 由 ModuleAtlas 自动生成的模块级架构概览,"
34
+ "包含模块划分、职责摘要、依赖关系图与 fan-in/fan-out 指标。这是你的「全局地图」。\n"
35
+ "2. **project_report.txt(已过滤)** —— 对项目中核心模块下各文件的扫描报告,"
36
+ "包含文件路径、摘要、主要接口和依赖关系。这是你的「文件级证据」。\n\n"
37
+ "请结合这两份材料进行分析。architecture.md 帮你快速定位哪些模块是核心、哪些是辅助;"
38
+ "project_report.txt 提供每个文件的具体细节。\n\n"
39
+ "Task:\n"
40
+ "请分析这些材料,帮我剔除噪音(工具脚本、测试文件、无关配置),"
41
+ "只提取出项目的「脊柱」(Backbone)和「金线」(Golden Thread)。\n"
42
+ "请按照以下结构进行输出:\n\n"
43
+ "第一部分:主线代码文件(The Backbone)\n"
44
+ "请找出支撑项目运行的核心代码链路。不要列出所有文件,只列出决定执行流程的关键节点。"
45
+ "请按逻辑执行顺序排列(例如:从启动入口 -> 调度中心 -> 算法核心 -> 底层Worker),"
46
+ "并为每个文件标注:\n"
47
+ "- 文件路径\n"
48
+ "- 架构角色(例如:Entrypoint, Orchestrator, Algorithm Core, Worker/Engine)\n"
49
+ "- 核心作用(一句话解释它在整个链路中起什么承上启下的作用)\n\n"
50
+ "第二部分:核心贯通示例(The Golden Thread Example)\n"
51
+ "在项目的示例或脚本目录下,找出一个最具代表性的启动脚本。\n"
52
+ "- 选择标准:这个脚本必须能串联起上述的「主线代码」,包含完整的参数配置、模型加载和启动命令。\n"
53
+ "- 解读:请解释为什么选它,并分析它如何激活了上述的主线代码"
54
+ "(例如:它调用了哪个入口?配置了什么核心引擎?)。\n\n"
55
+ "第三部分:逻辑流向图解\n"
56
+ "请用 ASCII 流程图或清晰的层级列表,画出从「核心示例脚本」开始,"
57
+ "数据和控制流是如何流经上述「主线代码」的。\n"
58
+ )
59
+
60
+
61
+ @dataclass
62
+ class BackboneAgentConfig:
63
+ report_path: str = "project_report.txt"
64
+ out_md_path: str = "backbone_from_report.md"
65
+ # architecture.json / architecture.md produced by ModuleAtlas
66
+ architecture_json_path: str = "architecture.json"
67
+ architecture_md_path: str = "architecture.md"
68
+ # Hard cap LLM input size (chars) to avoid huge prompts.
69
+ max_input_chars: int = 220_000
70
+ # Progress
71
+ show_progress: bool = True
72
+ progress_to_stderr: bool = True
73
+
74
+
75
+ def _p(cfg: BackboneAgentConfig, msg: str) -> None:
76
+ if not bool(getattr(cfg, "show_progress", True)):
77
+ return
78
+ stream = sys.stderr if bool(getattr(cfg, "progress_to_stderr", True)) else sys.stdout
79
+ print(f"[{DISPLAY_NAME}] {msg}", file=stream)
80
+
81
+
82
+ def _wrap_iter(cfg: BackboneAgentConfig, it: Iterable[Any], *, total: int | None, desc: str) -> Iterable[Any]:
83
+ if not bool(getattr(cfg, "show_progress", True)):
84
+ return it
85
+ if tqdm is None:
86
+ return it
87
+ try:
88
+ return tqdm(it, total=total, desc=f"{DISPLAY_NAME}: {desc}", unit="item") # type: ignore[return-value]
89
+ except Exception:
90
+ return it
91
+
92
+
93
+ def _norm_path(p: str) -> str:
94
+ p = (p or "").replace("\\", "/")
95
+ while p.startswith("./"):
96
+ p = p[2:]
97
+ return p.strip("/")
98
+
99
+
100
+ def _load_llm_from_env_plaintext() -> tuple[str, dict[str, str], str]:
101
+ """
102
+ Returns (provider, settings, model).
103
+ provider:
104
+ - "ollama" (uses /api/chat)
105
+ - "openai_compat" (uses /chat/completions)
106
+ settings includes base_url/api_key when relevant.
107
+ """
108
+ provider = os.environ.get("LLM_PROVIDER", "openai_compat").strip().lower()
109
+ if provider == "ollama":
110
+ return (
111
+ provider,
112
+ {
113
+ "base_url": os.environ.get("OLLAMA_BASE_URL", "http://127.0.0.1:11434").strip(),
114
+ },
115
+ os.environ.get("OLLAMA_MODEL", "qwen3:4b").strip(),
116
+ )
117
+ api_key = os.environ.get("OPENAI_API_KEY", "").strip()
118
+ if not api_key:
119
+ raise RuntimeError("Missing OPENAI_API_KEY (set LLM_PROVIDER=ollama to use Ollama without a key)")
120
+ return (
121
+ "openai_compat",
122
+ {
123
+ "base_url": os.environ.get("OPENAI_BASE_URL", "https://api.openai.com/v1").strip(),
124
+ "api_key": api_key,
125
+ },
126
+ os.environ.get("OPENAI_MODEL", "gpt-4.1-mini").strip(),
127
+ )
128
+
129
+
130
+ def _http_post_json(url: str, payload: dict[str, Any], *, headers: dict[str, str], timeout_s: int) -> dict[str, Any]:
131
+ req = urllib.request.Request(
132
+ url=url,
133
+ method="POST",
134
+ data=json.dumps(payload).encode("utf-8"),
135
+ headers={"Content-Type": "application/json", **(headers or {})},
136
+ )
137
+ try:
138
+ with urllib.request.urlopen(req, timeout=timeout_s) as resp:
139
+ data = resp.read().decode("utf-8", errors="replace")
140
+ obj = json.loads(data)
141
+ return obj if isinstance(obj, dict) else {"raw": obj}
142
+ except urllib.error.HTTPError as e:
143
+ body = ""
144
+ try:
145
+ body = e.read().decode("utf-8", errors="replace")
146
+ except Exception:
147
+ body = ""
148
+ raise RuntimeError(f"LLM HTTPError {e.code}: {body}") from e
149
+ except Exception as e:
150
+ raise RuntimeError(f"LLM request failed: {e}") from e
151
+
152
+
153
+ def call_llm_markdown(*, prompt: str) -> str:
154
+ """
155
+ Call provider with a single user prompt, expecting markdown output.
156
+ (We intentionally do NOT force JSON response_format / format=json.)
157
+ """
158
+ provider, settings, model = _load_llm_from_env_plaintext()
159
+ messages = [{"role": "user", "content": prompt}]
160
+
161
+ if provider == "ollama":
162
+ base = settings["base_url"].rstrip("/")
163
+ url = f"{base}/api/chat"
164
+ payload: dict[str, Any] = {
165
+ "model": model,
166
+ "messages": messages,
167
+ "stream": False,
168
+ }
169
+ resp = _http_post_json(url, payload, headers={}, timeout_s=240)
170
+ try:
171
+ return str(resp["message"]["content"])
172
+ except Exception:
173
+ return json.dumps(resp, ensure_ascii=False, indent=2)
174
+
175
+ base = settings["base_url"].rstrip("/")
176
+ url = f"{base}/chat/completions"
177
+ payload2: dict[str, Any] = {"model": model, "messages": messages}
178
+ resp2 = _http_post_json(url, payload2, headers={"Authorization": f"Bearer {settings['api_key']}"}, timeout_s=240)
179
+ try:
180
+ return str(resp2["choices"][0]["message"]["content"])
181
+ except Exception:
182
+ return json.dumps(resp2, ensure_ascii=False, indent=2)
183
+
184
+
185
+ _SECTION_RE = re.compile(r"(?m)^##\s+(.+?)\s*$")
186
+
187
+
188
+ def _iter_sections(text: str) -> Iterable[tuple[str, str]]:
189
+ """
190
+ Yield (path, section_text) for each "## path" block.
191
+ """
192
+ if not text:
193
+ return
194
+ matches = list(_SECTION_RE.finditer(text))
195
+ for i, m in enumerate(matches):
196
+ path = _norm_path(m.group(1))
197
+ start = m.start()
198
+ end = matches[i + 1].start() if i + 1 < len(matches) else len(text)
199
+ yield path, text[start:end].rstrip()
200
+
201
+
202
+ # ---------------------------------------------------------------------------
203
+ # Architecture-driven noise filtering (replaces hardcoded include/exclude)
204
+ # ---------------------------------------------------------------------------
205
+
206
+ _MODULE_FILTER_PROMPT = (
207
+ "你是软件架构分析师。下面是一个项目中所有模块的摘要信息(由 ModuleAtlas 自动生成)。\n"
208
+ "请判断哪些模块属于项目的「核心执行主线」(Backbone)——即支撑项目核心功能运行的模块。\n\n"
209
+ "排除以下类型的模块:\n"
210
+ "- 纯文档 / CI / 构建配置模块(如 docs、.github、根目录配置文件)\n"
211
+ "- 纯测试模块(如 tests)\n"
212
+ "- 不参与任何代码依赖的孤立模块(fan_in=0 且 fan_out=0)\n\n"
213
+ "保留以下类型的模块:\n"
214
+ "- 项目核心业务逻辑、训练/推理/服务等主线执行模块\n"
215
+ "- 入口/编排/调度模块(即使 fan_in=0,只要 fan_out>0 说明它驱动其他模块)\n"
216
+ "- 被主线模块依赖的关键基础设施模块\n"
217
+ "- 示例/脚本模块(它们可能包含启动入口和「金线」示例)\n\n"
218
+ "注意:fan_in 高的模块可能只是通用工具库,不一定是主线。请根据 purpose 和 summary 的语义来判断,"
219
+ "不要简单按 fan_in/fan_out 数值排序。\n\n"
220
+ "请只返回一个 JSON 数组,包含你认为属于核心主线的 module_id。不要包含任何其他文字。\n"
221
+ "例如: [\"module_a\", \"module_b/sub\"]\n\n"
222
+ "=== 模块列表 ===\n\n"
223
+ )
224
+
225
+
226
+ def _load_arch_data(architecture_json_path: str) -> dict[str, Any]:
227
+ """Load and return the parsed architecture.json, or empty dict on failure."""
228
+ try:
229
+ raw = read_text_file(architecture_json_path, max_bytes=50_000_000)
230
+ return json.loads(raw)
231
+ except Exception:
232
+ return {}
233
+
234
+
235
+ def _build_module_summary_table(arch: dict[str, Any]) -> str:
236
+ """
237
+ Build a compact text table of all modules from architecture.json,
238
+ suitable for a lightweight LLM filtering call.
239
+ """
240
+ modules: list[dict[str, Any]] = arch.get("modules") or []
241
+ mg = arch.get("module_graph") or {}
242
+ fan_map: dict[str, dict[str, int]] = {}
243
+ for entry in mg.get("fan_in_out") or []:
244
+ mid = str(entry.get("module_id", ""))
245
+ fan_map[mid] = {
246
+ "fan_in": int(entry.get("fan_in", 0)),
247
+ "fan_out": int(entry.get("fan_out", 0)),
248
+ }
249
+
250
+ # Also extract dependency edges for context
251
+ edges: list[dict[str, Any]] = mg.get("edges") or []
252
+ dep_in: dict[str, list[str]] = {} # module_id -> list of modules that depend on it
253
+ dep_out: dict[str, list[str]] = {} # module_id -> list of modules it depends on
254
+ for e in edges:
255
+ src = str(e.get("source", ""))
256
+ tgt = str(e.get("target", ""))
257
+ dep_out.setdefault(src, []).append(tgt)
258
+ dep_in.setdefault(tgt, []).append(src)
259
+
260
+ lines: list[str] = []
261
+ for mod in modules:
262
+ mid = str(mod.get("module_id", ""))
263
+ purpose = str(mod.get("purpose", ""))
264
+ summary = str(mod.get("summary", ""))
265
+ # Truncate summary to keep the table compact
266
+ if len(summary) > 200:
267
+ summary = summary[:200] + "..."
268
+ fan = fan_map.get(mid, {"fan_in": 0, "fan_out": 0})
269
+ lines.append(f"module_id: {mid}")
270
+ lines.append(f" purpose: {purpose}")
271
+ lines.append(f" summary: {summary}")
272
+ lines.append(f" fan_in: {fan['fan_in']}, fan_out: {fan['fan_out']}")
273
+ dins = dep_in.get(mid, [])
274
+ douts = dep_out.get(mid, [])
275
+ if dins:
276
+ lines.append(f" depended_by: {dins}")
277
+ if douts:
278
+ lines.append(f" depends_on: {douts}")
279
+ lines.append("")
280
+
281
+ return "\n".join(lines)
282
+
283
+
284
+ def _extract_json_array(text: str) -> list[str]:
285
+ """Extract a JSON array from LLM response text, tolerating markdown fences."""
286
+ # Strip markdown code fences if present
287
+ cleaned = text.strip()
288
+ if cleaned.startswith("```"):
289
+ # Remove opening fence (```json or ```)
290
+ first_newline = cleaned.index("\n") if "\n" in cleaned else len(cleaned)
291
+ cleaned = cleaned[first_newline + 1:]
292
+ if cleaned.endswith("```"):
293
+ cleaned = cleaned[: cleaned.rfind("```")]
294
+ cleaned = cleaned.strip()
295
+ try:
296
+ result = json.loads(cleaned)
297
+ if isinstance(result, list):
298
+ return [str(x) for x in result]
299
+ except json.JSONDecodeError:
300
+ pass
301
+ # Try to find a JSON array anywhere in the text
302
+ match = re.search(r'\[.*?\]', cleaned, re.DOTALL)
303
+ if match:
304
+ try:
305
+ result = json.loads(match.group(0))
306
+ if isinstance(result, list):
307
+ return [str(x) for x in result]
308
+ except json.JSONDecodeError:
309
+ pass
310
+ return []
311
+
312
+
313
+ def _llm_filter_modules(cfg: BackboneAgentConfig, arch: dict[str, Any]) -> set[str] | None:
314
+ """
315
+ Phase 1: Send compact module summaries to LLM, ask it to pick backbone
316
+ candidate modules. Returns a set of module_id strings, or None on failure.
317
+ """
318
+ table = _build_module_summary_table(arch)
319
+ if not table.strip():
320
+ return None
321
+
322
+ prompt = _MODULE_FILTER_PROMPT + table
323
+ _p(cfg, f"Phase 1: LLM module filtering (prompt size: {len(prompt)} chars)")
324
+
325
+ try:
326
+ response = call_llm_markdown(prompt=prompt)
327
+ except Exception as e:
328
+ _p(cfg, f"Phase 1 LLM call failed: {e}, falling back to fan_in/fan_out rule")
329
+ return None
330
+
331
+ selected = _extract_json_array(response)
332
+ if not selected:
333
+ _p(cfg, f"Phase 1: could not parse module list from LLM response, falling back")
334
+ return None
335
+
336
+ return {_norm_path(m) for m in selected}
337
+
338
+
339
+ def _fallback_core_modules(arch: dict[str, Any]) -> set[str]:
340
+ """
341
+ Fallback: return modules with fan_in>0 or fan_out>0 (rule-based).
342
+ Used when LLM filtering is unavailable.
343
+ """
344
+ mg = arch.get("module_graph") or {}
345
+ core: set[str] = set()
346
+ for entry in mg.get("fan_in_out") or []:
347
+ mid = _norm_path(str(entry.get("module_id", "")))
348
+ fan_in = int(entry.get("fan_in", 0))
349
+ fan_out = int(entry.get("fan_out", 0))
350
+ if fan_in > 0 or fan_out > 0:
351
+ core.add(mid)
352
+ return core
353
+
354
+
355
+ def _keep_section_by_modules(path: str, core_prefixes: set[str]) -> bool:
356
+ """
357
+ Return True if `path` belongs to any core module prefix.
358
+ If core_prefixes is empty (fallback), keep everything.
359
+ """
360
+ p = _norm_path(path)
361
+ if not p:
362
+ return False
363
+ if not core_prefixes:
364
+ return True
365
+ for prefix in core_prefixes:
366
+ if not prefix:
367
+ continue
368
+ if p == prefix or p.startswith(prefix + "/"):
369
+ return True
370
+ return False
371
+
372
+
373
+ def _select_excerpts(cfg: BackboneAgentConfig, report_text: str, core_prefixes: set[str]) -> str:
374
+ """
375
+ Filter report down to sections belonging to core modules.
376
+ No artificial scoring or re-ordering — let the LLM decide importance.
377
+ Sections are kept in their original order, truncated at max_input_chars.
378
+ """
379
+ out_parts: list[str] = []
380
+ total = 0
381
+ for path, sec in _iter_sections(report_text):
382
+ if not _keep_section_by_modules(path, core_prefixes):
383
+ continue
384
+ chunk = sec + "\n\n"
385
+ if total + len(chunk) > int(cfg.max_input_chars):
386
+ break
387
+ out_parts.append(chunk)
388
+ total += len(chunk)
389
+
390
+ return "".join(out_parts).strip()
391
+
392
+
393
+ def generate_backbone_md(cfg: BackboneAgentConfig) -> dict[str, Any]:
394
+ # --- Load architecture data ---
395
+ _p(cfg, f"reading architecture: {cfg.architecture_json_path}")
396
+ arch = _load_arch_data(cfg.architecture_json_path)
397
+
398
+ # --- Phase 1: LLM-based module filtering ---
399
+ filter_method: str
400
+ if arch:
401
+ core_prefixes = _llm_filter_modules(cfg, arch)
402
+ if core_prefixes is not None:
403
+ filter_method = "llm"
404
+ _p(cfg, f"Phase 1 result — LLM selected {len(core_prefixes)} modules: {sorted(core_prefixes)}")
405
+ else:
406
+ core_prefixes = _fallback_core_modules(arch)
407
+ filter_method = "fan_in_out_rule"
408
+ _p(cfg, f"Fallback to fan_in/fan_out rule — {len(core_prefixes)} modules: {sorted(core_prefixes)}")
409
+ else:
410
+ core_prefixes = set()
411
+ filter_method = "none"
412
+ _p(cfg, "WARNING: no architecture.json found, keeping all report sections")
413
+
414
+ # --- Load architecture.md for context ---
415
+ arch_md = ""
416
+ try:
417
+ arch_md = read_text_file(cfg.architecture_md_path, max_bytes=5_000_000)
418
+ _p(cfg, f"loaded architecture.md: {len(arch_md)} chars")
419
+ except Exception:
420
+ _p(cfg, f"WARNING: could not read {cfg.architecture_md_path}, proceeding without it")
421
+
422
+ # --- Phase 2: Filter report and call main LLM ---
423
+ _p(cfg, f"reading report: {cfg.report_path}")
424
+ txt = read_text_file(cfg.report_path, max_bytes=25_000_000)
425
+ _p(cfg, "filtering report (using module selection from Phase 1)")
426
+ excerpts = _select_excerpts(cfg, txt, core_prefixes)
427
+ _p(cfg, f"excerpt size: {len(excerpts)} chars (cap={cfg.max_input_chars})")
428
+
429
+ # --- Assemble prompt ---
430
+ parts = [PROMPT]
431
+ if arch_md:
432
+ parts.append("\n\n=== architecture.md ===\n\n")
433
+ parts.append(arch_md)
434
+ parts.append("\n\n=== project_report.txt (filtered excerpts) ===\n\n")
435
+ parts.append(excerpts)
436
+ parts.append("\n")
437
+ full_prompt = "".join(parts)
438
+
439
+ _p(cfg, f"Phase 2: total prompt size: {len(full_prompt)} chars")
440
+ _p(cfg, "Phase 2: calling LLM (markdown output)")
441
+ md: str
442
+ llm_error: str | None = None
443
+ try:
444
+ md = call_llm_markdown(prompt=full_prompt)
445
+ except Exception as e:
446
+ llm_error = str(e)
447
+ md = (
448
+ "## BackboneAgent\n\n"
449
+ "LLM call failed, so no analysis was generated.\n\n"
450
+ f"- **Error**: `{llm_error}`\n\n"
451
+ "### How to fix\n\n"
452
+ "- **Ollama (no key required)**:\n\n"
453
+ "```bash\n"
454
+ "export LLM_PROVIDER=ollama\n"
455
+ "export OLLAMA_BASE_URL=http://127.0.0.1:11434\n"
456
+ "export OLLAMA_MODEL=qwen3:4b\n"
457
+ "python -m backboneagent --report project_report.txt --out backbone_from_report.md\n"
458
+ "```\n\n"
459
+ "- **OpenAI-compatible**:\n\n"
460
+ "```bash\n"
461
+ "export OPENAI_API_KEY=...\n"
462
+ "export OPENAI_MODEL=...\n"
463
+ "export OPENAI_BASE_URL=...\n"
464
+ "python -m backboneagent --report project_report.txt --out backbone_from_report.md\n"
465
+ "```\n"
466
+ )
467
+
468
+ # Write output
469
+ out_path = str(cfg.out_md_path)
470
+ with open(out_path, "w", encoding="utf-8") as f:
471
+ f.write(md if md.endswith("\n") else (md + "\n"))
472
+
473
+ meta = {
474
+ "generated_at": time.strftime("%Y-%m-%d %H:%M:%S"),
475
+ "agent": INTERNAL_NAME,
476
+ "agent_display_name": DISPLAY_NAME,
477
+ "report_path": os.path.abspath(cfg.report_path),
478
+ "out_md_path": os.path.abspath(out_path),
479
+ "architecture_json_path": os.path.abspath(cfg.architecture_json_path),
480
+ "architecture_md_path": os.path.abspath(cfg.architecture_md_path),
481
+ "filter_method": filter_method,
482
+ "core_modules": sorted(core_prefixes),
483
+ "excerpt_chars": int(len(excerpts)),
484
+ "max_input_chars": int(cfg.max_input_chars),
485
+ "llm_error": llm_error or "",
486
+ }
487
+ _p(cfg, "done")
488
+ return {"meta": meta, "out_md_path": out_path, "markdown": md}
@@ -0,0 +1,16 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+
5
+ __all__ = ["__version__", "INTERNAL_NAME", "DISPLAY_NAME"]
6
+
7
+ __version__ = "0.1.0"
8
+
9
+ # Stable internal identifier used in JSON/meta.
10
+ INTERNAL_NAME = "FileAtlas"
11
+
12
+ # Human-friendly display name shown in progress bars / reports / docs.
13
+ # Override per-run if desired:
14
+ # export BEARKING_FILEATLAS_NAME="YourName"
15
+ DISPLAY_NAME = os.getenv("BEARKING_FILEATLAS_NAME", "FileAtlas")
16
+
@@ -0,0 +1,20 @@
1
+ """
2
+ 模块入口(不依赖命令行参数):
3
+
4
+ python -m fileatlas
5
+
6
+ 默认扫描当前目录并输出 project_map.json。
7
+ 如果你想自定义参数,请直接运行根目录下的 run_fileatlas.py 并修改配置。
8
+ """
9
+
10
+ from .runner import FileAtlasConfig, run_fileatlas
11
+
12
+
13
+ def main() -> int:
14
+ run_fileatlas(FileAtlasConfig())
15
+ return 0
16
+
17
+
18
+ if __name__ == "__main__":
19
+ raise SystemExit(main())
20
+