gwc-pybundle 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gwc_pybundle-0.4.2.dist-info/METADATA +476 -0
- gwc_pybundle-0.4.2.dist-info/RECORD +34 -0
- gwc_pybundle-0.4.2.dist-info/WHEEL +5 -0
- gwc_pybundle-0.4.2.dist-info/entry_points.txt +2 -0
- gwc_pybundle-0.4.2.dist-info/licenses/LICENSE.md +25 -0
- gwc_pybundle-0.4.2.dist-info/top_level.txt +1 -0
- pybundle/__init__.py +0 -0
- pybundle/__main__.py +4 -0
- pybundle/cli.py +228 -0
- pybundle/context.py +232 -0
- pybundle/doctor.py +101 -0
- pybundle/manifest.py +78 -0
- pybundle/packaging.py +41 -0
- pybundle/policy.py +176 -0
- pybundle/profiles.py +146 -0
- pybundle/roadmap_model.py +38 -0
- pybundle/roadmap_scan.py +262 -0
- pybundle/root_detect.py +14 -0
- pybundle/runner.py +72 -0
- pybundle/steps/base.py +20 -0
- pybundle/steps/compileall.py +76 -0
- pybundle/steps/context_expand.py +272 -0
- pybundle/steps/copy_pack.py +300 -0
- pybundle/steps/error_refs.py +204 -0
- pybundle/steps/handoff_md.py +166 -0
- pybundle/steps/mypy.py +60 -0
- pybundle/steps/pytest.py +66 -0
- pybundle/steps/repro_md.py +161 -0
- pybundle/steps/rg_scans.py +78 -0
- pybundle/steps/roadmap.py +158 -0
- pybundle/steps/ruff.py +111 -0
- pybundle/steps/shell.py +67 -0
- pybundle/steps/tree.py +136 -0
- pybundle/tools.py +7 -0
pybundle/policy.py
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
# pybundle/policy.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Iterable
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
# Common junk that AI should not ingest by default
|
|
10
|
+
DEFAULT_EXCLUDE_DIRS: set[str] = {
|
|
11
|
+
".git", ".hg", ".svn", ".venv", "venv", ".direnv", ".cache",
|
|
12
|
+
".mypy_cache", ".ruff_cache", ".pytest_cache", "__pycache__",
|
|
13
|
+
"node_modules", ".pybundle-venv", "binaries", "dist", "build",
|
|
14
|
+
"target", "out", ".next", ".nuxt", ".svelte-kit", "artifacts",
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
# File extensions that commonly produce noise or massive distraction in AI mode
|
|
18
|
+
DEFAULT_EXCLUDE_FILE_EXTS: set[str] = {
|
|
19
|
+
# packaging/installers/binaries
|
|
20
|
+
".appimage", ".deb", ".rpm", ".exe", ".msi", ".dmg", ".pkg",
|
|
21
|
+
".so", ".dll", ".dylib",
|
|
22
|
+
|
|
23
|
+
# runtime DBs
|
|
24
|
+
".db", ".sqlite", ".sqlite3",
|
|
25
|
+
|
|
26
|
+
# archives (often huge)
|
|
27
|
+
".zip", ".tar", ".gz", ".tgz", ".bz2", ".xz", ".7z",
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
# Manifests + config files that are often essential for polyglot projects
|
|
31
|
+
DEFAULT_INCLUDE_FILES: list[str] = [
|
|
32
|
+
# Python
|
|
33
|
+
"pyproject.toml",
|
|
34
|
+
"requirements.txt",
|
|
35
|
+
"requirements-dev.txt",
|
|
36
|
+
"poetry.lock",
|
|
37
|
+
"pdm.lock",
|
|
38
|
+
"uv.lock",
|
|
39
|
+
"setup.cfg",
|
|
40
|
+
"setup.py",
|
|
41
|
+
"mypy.ini",
|
|
42
|
+
"ruff.toml",
|
|
43
|
+
".ruff.toml",
|
|
44
|
+
"pytest.ini",
|
|
45
|
+
"tox.ini",
|
|
46
|
+
".python-version",
|
|
47
|
+
|
|
48
|
+
# Docs / meta
|
|
49
|
+
"README.md",
|
|
50
|
+
"README.rst",
|
|
51
|
+
"README.txt",
|
|
52
|
+
"CHANGELOG.md",
|
|
53
|
+
"LICENSE",
|
|
54
|
+
"LICENSE.md",
|
|
55
|
+
|
|
56
|
+
# Node / frontend
|
|
57
|
+
"package.json",
|
|
58
|
+
"package-lock.json",
|
|
59
|
+
"pnpm-lock.yaml",
|
|
60
|
+
"yarn.lock",
|
|
61
|
+
"tsconfig.json",
|
|
62
|
+
"vite.config.js",
|
|
63
|
+
"vite.config.ts",
|
|
64
|
+
"webpack.config.js",
|
|
65
|
+
"webpack.config.ts",
|
|
66
|
+
|
|
67
|
+
# Rust / Tauri
|
|
68
|
+
"Cargo.toml",
|
|
69
|
+
"Cargo.lock",
|
|
70
|
+
"tauri.conf.json",
|
|
71
|
+
"tauri.conf.json5",
|
|
72
|
+
"tauri.conf.toml",
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
DEFAULT_INCLUDE_DIRS: list[str] = [
|
|
76
|
+
# Python-ish
|
|
77
|
+
"src", "app", "tests", "tools", "docs", ".github", "templates",
|
|
78
|
+
"static", "src-tauri", "frontend", "web", "ui",
|
|
79
|
+
]
|
|
80
|
+
|
|
81
|
+
DEFAULT_INCLUDE_GLOBS: list[str] = [
|
|
82
|
+
"*.py", "*/**/*.py", "templates/**/*", "static/**/*",
|
|
83
|
+
]
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@dataclass(frozen=True)
|
|
87
|
+
class AIContextPolicy:
|
|
88
|
+
# path filters
|
|
89
|
+
exclude_dirs: set[str] = field(default_factory=lambda: set(DEFAULT_EXCLUDE_DIRS))
|
|
90
|
+
exclude_file_exts: set[str] = field(default_factory=lambda: set(DEFAULT_EXCLUDE_FILE_EXTS))
|
|
91
|
+
|
|
92
|
+
# curated inclusion
|
|
93
|
+
include_files: list[str] = field(default_factory=lambda: list(DEFAULT_INCLUDE_FILES))
|
|
94
|
+
include_dirs: list[str] = field(default_factory=lambda: list(DEFAULT_INCLUDE_DIRS))
|
|
95
|
+
include_globs: list[str] = field(default_factory=lambda: list(DEFAULT_INCLUDE_GLOBS))
|
|
96
|
+
|
|
97
|
+
# AI-friendly knobs
|
|
98
|
+
tree_max_depth: int = 4
|
|
99
|
+
largest_limit: int = 80
|
|
100
|
+
roadmap_max_files: int = 20000
|
|
101
|
+
roadmap_mermaid_depth: int = 2
|
|
102
|
+
roadmap_mermaid_max_edges: int = 180
|
|
103
|
+
|
|
104
|
+
def include_dir_candidates(self, root: Path) -> list[Path]:
|
|
105
|
+
out: list[Path] = []
|
|
106
|
+
for d in self.include_dirs:
|
|
107
|
+
p = root / d
|
|
108
|
+
if p.exists():
|
|
109
|
+
out.append(p)
|
|
110
|
+
return out or [root]
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@dataclass
|
|
114
|
+
class PathFilter:
|
|
115
|
+
"""
|
|
116
|
+
Shared filtering logic across steps:
|
|
117
|
+
- prune excluded dir names
|
|
118
|
+
- prune venvs by structure (any name)
|
|
119
|
+
- optionally exclude noisy file types by extension
|
|
120
|
+
"""
|
|
121
|
+
exclude_dirs: set[str]
|
|
122
|
+
exclude_file_exts: set[str]
|
|
123
|
+
detect_venvs: bool = True
|
|
124
|
+
|
|
125
|
+
def is_venv_root(self, p: Path) -> bool:
|
|
126
|
+
if not p.is_dir():
|
|
127
|
+
return False
|
|
128
|
+
|
|
129
|
+
if (p / "pyvenv.cfg").is_file():
|
|
130
|
+
return True
|
|
131
|
+
|
|
132
|
+
if (p / "bin").is_dir():
|
|
133
|
+
if (p / "bin" / "activate").is_file() and (
|
|
134
|
+
(p / "bin" / "python").exists() or (p / "bin" / "python3").exists()
|
|
135
|
+
):
|
|
136
|
+
return True
|
|
137
|
+
if any((p / "lib").glob("python*/site-packages")):
|
|
138
|
+
return True
|
|
139
|
+
|
|
140
|
+
if (p / "Scripts").is_dir():
|
|
141
|
+
if (p / "Scripts" / "activate").is_file() and (
|
|
142
|
+
(p / "Scripts" / "python.exe").is_file() or (p / "Scripts" / "python").exists()
|
|
143
|
+
):
|
|
144
|
+
return True
|
|
145
|
+
if (p / "Lib" / "site-packages").is_dir():
|
|
146
|
+
return True
|
|
147
|
+
|
|
148
|
+
if (p / ".Python").exists():
|
|
149
|
+
return True
|
|
150
|
+
|
|
151
|
+
return False
|
|
152
|
+
|
|
153
|
+
def should_prune_dir(self, parent_dir: Path, child_name: str) -> bool:
|
|
154
|
+
if child_name in self.exclude_dirs:
|
|
155
|
+
return True
|
|
156
|
+
if self.detect_venvs and self.is_venv_root(parent_dir / child_name):
|
|
157
|
+
return True
|
|
158
|
+
return False
|
|
159
|
+
|
|
160
|
+
def should_include_file(self, root: Path, p: Path) -> bool:
|
|
161
|
+
try:
|
|
162
|
+
rel = p.relative_to(root)
|
|
163
|
+
except Exception:
|
|
164
|
+
return False
|
|
165
|
+
|
|
166
|
+
# reject files under excluded dirs by name
|
|
167
|
+
for part in rel.parts[:-1]:
|
|
168
|
+
if part in self.exclude_dirs:
|
|
169
|
+
return False
|
|
170
|
+
|
|
171
|
+
# reject excluded extensions
|
|
172
|
+
ext = p.suffix.lower()
|
|
173
|
+
if ext in self.exclude_file_exts:
|
|
174
|
+
return False
|
|
175
|
+
|
|
176
|
+
return True
|
pybundle/profiles.py
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
import dataclasses
|
|
5
|
+
from .context import RunOptions
|
|
6
|
+
from .steps.shell import ShellStep
|
|
7
|
+
from .steps.tree import TreeStep, LargestFilesStep
|
|
8
|
+
from .steps.compileall import CompileAllStep
|
|
9
|
+
from .steps.ruff import RuffCheckStep, RuffFormatCheckStep
|
|
10
|
+
from .steps.mypy import MypyStep
|
|
11
|
+
from .steps.pytest import PytestStep
|
|
12
|
+
from .steps.rg_scans import default_rg_steps
|
|
13
|
+
from .steps.error_refs import ErrorReferencedFilesStep
|
|
14
|
+
from .steps.context_expand import ErrorContextExpandStep
|
|
15
|
+
from .steps.copy_pack import CuratedCopyStep
|
|
16
|
+
from .steps.repro_md import ReproMarkdownStep
|
|
17
|
+
from .steps.handoff_md import HandoffMarkdownStep
|
|
18
|
+
from .steps.roadmap import RoadmapStep
|
|
19
|
+
from .policy import AIContextPolicy
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass(frozen=True)
|
|
23
|
+
class Profile:
|
|
24
|
+
name: str
|
|
25
|
+
steps: list
|
|
26
|
+
|
|
27
|
+
def _dedupe_steps(steps: list) -> list:
|
|
28
|
+
seen = set()
|
|
29
|
+
out = []
|
|
30
|
+
for s in steps:
|
|
31
|
+
key = getattr(s, "out", None) or getattr(s, "out_md", None) or getattr(s, "name", None)
|
|
32
|
+
# fallback to class name if needed
|
|
33
|
+
key = key or s.__class__.__name__
|
|
34
|
+
if key in seen:
|
|
35
|
+
continue
|
|
36
|
+
seen.add(key)
|
|
37
|
+
out.append(s)
|
|
38
|
+
return out
|
|
39
|
+
|
|
40
|
+
def resolve_defaults(profile: str, opts: RunOptions) -> RunOptions:
|
|
41
|
+
if profile == "ai":
|
|
42
|
+
return dataclasses.replace(
|
|
43
|
+
opts,
|
|
44
|
+
no_ruff = opts.no_ruff if opts.no_ruff is not None else True,
|
|
45
|
+
no_mypy = opts.no_mypy if opts.no_mypy is not None else True,
|
|
46
|
+
no_pytest = opts.no_pytest if opts.no_pytest is not None else True,
|
|
47
|
+
no_rg = opts.no_rg if opts.no_rg is not None else True,
|
|
48
|
+
no_error_refs = opts.no_error_refs if opts.no_error_refs is not None else True,
|
|
49
|
+
no_context = opts.no_context if opts.no_context is not None else True,
|
|
50
|
+
no_compileall = opts.no_compileall if opts.no_compileall is not None else True,
|
|
51
|
+
)
|
|
52
|
+
return opts
|
|
53
|
+
|
|
54
|
+
def _analysis_steps(options: RunOptions) -> list:
|
|
55
|
+
policy = AIContextPolicy()
|
|
56
|
+
|
|
57
|
+
steps: list = [
|
|
58
|
+
ShellStep("git status", "meta/00_git_status.txt", ["git", "status"], require_cmd="git"),
|
|
59
|
+
ShellStep("git diff", "meta/01_git_diff.txt", ["git", "diff"], require_cmd="git"),
|
|
60
|
+
ShellStep("uname -a", "meta/21_uname.txt", ["uname", "-a"], require_cmd="uname"),
|
|
61
|
+
|
|
62
|
+
TreeStep(max_depth=policy.tree_max_depth, policy=policy),
|
|
63
|
+
LargestFilesStep(limit=policy.largest_limit, policy=policy),
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
# Lint/type/test (CLI-overridable)
|
|
67
|
+
if not options.no_ruff:
|
|
68
|
+
steps += [
|
|
69
|
+
RuffCheckStep(target=options.ruff_target),
|
|
70
|
+
RuffFormatCheckStep(target=options.ruff_target),
|
|
71
|
+
]
|
|
72
|
+
|
|
73
|
+
if not options.no_mypy:
|
|
74
|
+
steps += [MypyStep(target=options.mypy_target)]
|
|
75
|
+
|
|
76
|
+
if not options.no_pytest:
|
|
77
|
+
steps += [PytestStep(args=options.pytest_args or ["-q"])]
|
|
78
|
+
|
|
79
|
+
# Landmine scans
|
|
80
|
+
if not options.no_rg:
|
|
81
|
+
steps += list(default_rg_steps(target="."))
|
|
82
|
+
|
|
83
|
+
# Error-driven packs
|
|
84
|
+
if not options.no_error_refs:
|
|
85
|
+
steps += [ErrorReferencedFilesStep(max_files=options.error_max_files)]
|
|
86
|
+
|
|
87
|
+
if not options.no_context:
|
|
88
|
+
steps += [
|
|
89
|
+
ErrorContextExpandStep(
|
|
90
|
+
depth=options.context_depth,
|
|
91
|
+
max_files=options.context_max_files,
|
|
92
|
+
)
|
|
93
|
+
]
|
|
94
|
+
|
|
95
|
+
if not options.no_compileall:
|
|
96
|
+
steps.append(CompileAllStep())
|
|
97
|
+
|
|
98
|
+
# Curated pack + repro doc
|
|
99
|
+
steps += [
|
|
100
|
+
ShellStep("python -V", "meta/20_python_version.txt", ["python", "-V"], require_cmd="python"),
|
|
101
|
+
ShellStep("pip freeze", "meta/22_pip_freeze.txt", ["python", "-m", "pip", "freeze"], require_cmd="python"),
|
|
102
|
+
|
|
103
|
+
CuratedCopyStep(policy=policy),
|
|
104
|
+
ReproMarkdownStep(),
|
|
105
|
+
RoadmapStep(policy=policy),
|
|
106
|
+
HandoffMarkdownStep(),
|
|
107
|
+
]
|
|
108
|
+
|
|
109
|
+
return _dedupe_steps(steps)
|
|
110
|
+
|
|
111
|
+
def get_profile(name: str, options: RunOptions) -> Profile:
|
|
112
|
+
if name == "analysis":
|
|
113
|
+
return Profile(name="analysis", steps=_analysis_steps(options))
|
|
114
|
+
|
|
115
|
+
if name == "debug":
|
|
116
|
+
# debug inherits analysis but keeps the same options
|
|
117
|
+
steps = list(_analysis_steps(options))
|
|
118
|
+
steps.append(
|
|
119
|
+
ShellStep(
|
|
120
|
+
"pip check",
|
|
121
|
+
"logs/25_pip_check.txt",
|
|
122
|
+
["python", "-m", "pip", "check"],
|
|
123
|
+
require_cmd="python",
|
|
124
|
+
)
|
|
125
|
+
)
|
|
126
|
+
return Profile(name="debug", steps=steps)
|
|
127
|
+
|
|
128
|
+
if name == "backup":
|
|
129
|
+
# Scaffold: we'll implement real backup modes next
|
|
130
|
+
return Profile(
|
|
131
|
+
name="backup",
|
|
132
|
+
steps=[
|
|
133
|
+
ShellStep(
|
|
134
|
+
"python -V",
|
|
135
|
+
"meta/20_python_version.txt",
|
|
136
|
+
["python", "-V"],
|
|
137
|
+
require_cmd="python",
|
|
138
|
+
),
|
|
139
|
+
],
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
if name == "ai":
|
|
143
|
+
opts = resolve_defaults("ai", options)
|
|
144
|
+
return Profile(name="ai", steps=_analysis_steps(opts))
|
|
145
|
+
|
|
146
|
+
raise ValueError(f"unknown profile: {name}")
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from dataclasses import dataclass, asdict
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Literal, Dict, List, Optional
|
|
5
|
+
|
|
6
|
+
Lang = Literal["python", "js", "ts", "rust", "html", "css", "config", "unknown"]
|
|
7
|
+
EdgeType = Literal["import", "require", "use", "mod", "include", "script", "entrypoint"]
|
|
8
|
+
|
|
9
|
+
@dataclass(frozen=True)
|
|
10
|
+
class Node:
|
|
11
|
+
id: str # stable id (usually path)
|
|
12
|
+
path: str # repo-relative
|
|
13
|
+
lang: Lang
|
|
14
|
+
|
|
15
|
+
@dataclass(frozen=True)
|
|
16
|
+
class Edge:
|
|
17
|
+
src: str # node id
|
|
18
|
+
dst: str # node id (or synthetic id)
|
|
19
|
+
type: EdgeType
|
|
20
|
+
note: str = "" # e.g. "from X import Y", "package.json script: dev"
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class EntryPoint:
|
|
24
|
+
node: str # node id
|
|
25
|
+
reason: str # why we think it's an entry
|
|
26
|
+
confidence: int = 2 # 1-3
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class RoadmapGraph:
|
|
30
|
+
version: int
|
|
31
|
+
root: str
|
|
32
|
+
nodes: List[Node]
|
|
33
|
+
edges: List[Edge]
|
|
34
|
+
entrypoints: List[EntryPoint]
|
|
35
|
+
stats: Dict[str, int] # counts by lang/edge types/etc.
|
|
36
|
+
|
|
37
|
+
def to_dict(self) -> dict:
|
|
38
|
+
return asdict(self)
|
pybundle/roadmap_scan.py
ADDED
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import ast
|
|
3
|
+
import os
|
|
4
|
+
import json
|
|
5
|
+
import re
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Iterable, Optional, Literal
|
|
9
|
+
from .steps.copy_pack import _is_venv_root, _is_under_venv
|
|
10
|
+
|
|
11
|
+
from .roadmap_model import Node, Edge, EntryPoint, RoadmapGraph
|
|
12
|
+
|
|
13
|
+
PY_EXT = {".py"}
|
|
14
|
+
JS_EXT = {".js", ".jsx", ".mjs", ".cjs"}
|
|
15
|
+
TS_EXT = {".ts", ".tsx"}
|
|
16
|
+
RUST_EXT = {".rs"}
|
|
17
|
+
Lang = Literal["python", "js", "ts", "rust", "html", "css", "config", "unknown"]
|
|
18
|
+
|
|
19
|
+
IMPORT_RE = re.compile(r'^\s*import\s+.*?\s+from\s+[\'"](.+?)[\'"]\s*;?\s*$', re.M)
|
|
20
|
+
REQUIRE_RE = re.compile(r'require\(\s*[\'"](.+?)[\'"]\s*\)')
|
|
21
|
+
RUST_USE_RE = re.compile(r'^\s*use\s+([a-zA-Z0-9_:]+)', re.M)
|
|
22
|
+
RUST_MOD_RE = re.compile(r'^\s*mod\s+([a-zA-Z0-9_]+)\s*;', re.M)
|
|
23
|
+
|
|
24
|
+
def _rel(root: Path, p: Path) -> str:
|
|
25
|
+
return str(p.resolve().relative_to(root.resolve())).replace("\\", "/")
|
|
26
|
+
|
|
27
|
+
def guess_lang(p: Path) -> Lang:
|
|
28
|
+
suf = p.suffix.lower()
|
|
29
|
+
if suf in PY_EXT: return "python"
|
|
30
|
+
if suf in TS_EXT: return "ts"
|
|
31
|
+
if suf in JS_EXT: return "js"
|
|
32
|
+
if suf in RUST_EXT: return "rust"
|
|
33
|
+
if suf in {".html", ".jinja", ".j2"}: return "html"
|
|
34
|
+
if suf in {".css", ".scss", ".sass"}: return "css"
|
|
35
|
+
if suf in {".toml", ".yaml", ".yml", ".json", ".ini", ".cfg"}: return "config"
|
|
36
|
+
return "unknown"
|
|
37
|
+
|
|
38
|
+
def scan_python_imports(root: Path, file_path: Path) -> list[str]:
|
|
39
|
+
# returns import module strings (not resolved paths)
|
|
40
|
+
try:
|
|
41
|
+
tree = ast.parse(file_path.read_text(encoding="utf-8", errors="replace"))
|
|
42
|
+
except Exception:
|
|
43
|
+
return []
|
|
44
|
+
mods: list[str] = []
|
|
45
|
+
for n in ast.walk(tree):
|
|
46
|
+
if isinstance(n, ast.Import):
|
|
47
|
+
for a in n.names:
|
|
48
|
+
mods.append(a.name)
|
|
49
|
+
elif isinstance(n, ast.ImportFrom):
|
|
50
|
+
if n.module:
|
|
51
|
+
mods.append(n.module)
|
|
52
|
+
return mods
|
|
53
|
+
|
|
54
|
+
def scan_js_imports(text: str) -> list[str]:
|
|
55
|
+
out = []
|
|
56
|
+
out += IMPORT_RE.findall(text)
|
|
57
|
+
out += REQUIRE_RE.findall(text)
|
|
58
|
+
return out
|
|
59
|
+
|
|
60
|
+
def scan_rust_uses(text: str) -> tuple[list[str], list[str]]:
|
|
61
|
+
uses = RUST_USE_RE.findall(text)
|
|
62
|
+
mods = RUST_MOD_RE.findall(text)
|
|
63
|
+
return uses, mods
|
|
64
|
+
|
|
65
|
+
def detect_entrypoints(root: Path) -> list[EntryPoint]:
|
|
66
|
+
eps: list[EntryPoint] = []
|
|
67
|
+
|
|
68
|
+
# Python CLI entry: __main__.py
|
|
69
|
+
p = root / "src"
|
|
70
|
+
if p.exists():
|
|
71
|
+
for main in p.rglob("__main__.py"):
|
|
72
|
+
eps.append(EntryPoint(node=_rel(root, main), reason="python __main__.py", confidence=3))
|
|
73
|
+
|
|
74
|
+
# Rust main.rs (including tauri src-tauri)
|
|
75
|
+
for mr in root.rglob("main.rs"):
|
|
76
|
+
if "target/" in str(mr): # safety
|
|
77
|
+
continue
|
|
78
|
+
eps.append(EntryPoint(node=_rel(root, mr), reason="rust main.rs", confidence=3))
|
|
79
|
+
|
|
80
|
+
# package.json scripts as entrypoints (synthetic)
|
|
81
|
+
pkg = root / "package.json"
|
|
82
|
+
if pkg.is_file():
|
|
83
|
+
eps.append(EntryPoint(node="package.json", reason="node scripts", confidence=2))
|
|
84
|
+
|
|
85
|
+
return eps
|
|
86
|
+
|
|
87
|
+
def detect_entrypoints_from_nodes(nodes: dict[str, Node]) -> list[EntryPoint]:
|
|
88
|
+
"""Derive entrypoints from the scanned node list (deterministic, no FS scope issues)."""
|
|
89
|
+
eps: list[EntryPoint] = []
|
|
90
|
+
|
|
91
|
+
for nid, n in nodes.items():
|
|
92
|
+
path = n.path
|
|
93
|
+
if path.endswith("__main__.py"):
|
|
94
|
+
eps.append(EntryPoint(node=nid, reason="python __main__.py", confidence=3))
|
|
95
|
+
elif path.endswith("main.rs"):
|
|
96
|
+
eps.append(EntryPoint(node=nid, reason="rust main.rs", confidence=3))
|
|
97
|
+
elif path == "package.json":
|
|
98
|
+
eps.append(EntryPoint(node=nid, reason="node package.json scripts", confidence=2))
|
|
99
|
+
elif path == "pyproject.toml":
|
|
100
|
+
eps.append(EntryPoint(node=nid, reason="python pyproject.toml (scripts/entrypoints likely)", confidence=1))
|
|
101
|
+
|
|
102
|
+
# Optional hints (useful for library-ish layouts)
|
|
103
|
+
for hint in ("src/pybundle/cli.py", "src/pybundle/__init__.py"):
|
|
104
|
+
if hint in nodes:
|
|
105
|
+
eps.append(EntryPoint(node=hint, reason="likely CLI/module entry", confidence=1))
|
|
106
|
+
|
|
107
|
+
# Deduplicate deterministically
|
|
108
|
+
uniq = {(e.node, e.reason, e.confidence) for e in eps}
|
|
109
|
+
eps = [EntryPoint(node=a, reason=b, confidence=c) for (a, b, c) in uniq]
|
|
110
|
+
return sorted(eps, key=lambda e: (e.node, -e.confidence, e.reason))
|
|
111
|
+
|
|
112
|
+
def _resolve_py_to_node(root: Path, src_rel: str, mod: str) -> Optional[str]:
|
|
113
|
+
"""
|
|
114
|
+
Resolve a Python import module string to a local file node (relative path),
|
|
115
|
+
if it exists in the scanned repo. Deterministic, no sys.path tricks.
|
|
116
|
+
"""
|
|
117
|
+
# Normalize relative imports like ".cli" or "..utils"
|
|
118
|
+
# We only support relative imports within the src file's package directory.
|
|
119
|
+
if mod.startswith("."):
|
|
120
|
+
# count leading dots
|
|
121
|
+
dots = 0
|
|
122
|
+
for ch in mod:
|
|
123
|
+
if ch == ".":
|
|
124
|
+
dots += 1
|
|
125
|
+
else:
|
|
126
|
+
break
|
|
127
|
+
tail = mod[dots:] # remaining name after dots
|
|
128
|
+
src_dir = Path(src_rel).parent # e.g. pybundle/
|
|
129
|
+
# go up (dots-1) levels: from . = same package, .. = parent, etc
|
|
130
|
+
base = src_dir
|
|
131
|
+
for _ in range(max(dots - 1, 0)):
|
|
132
|
+
base = base.parent
|
|
133
|
+
if tail:
|
|
134
|
+
parts = tail.split(".")
|
|
135
|
+
cand = base.joinpath(*parts)
|
|
136
|
+
else:
|
|
137
|
+
cand = base
|
|
138
|
+
else:
|
|
139
|
+
cand = Path(*mod.split("."))
|
|
140
|
+
|
|
141
|
+
# candidate file paths relative to root
|
|
142
|
+
py_file = (root / cand).with_suffix(".py")
|
|
143
|
+
init_file = root / cand / "__init__.py"
|
|
144
|
+
|
|
145
|
+
if py_file.is_file():
|
|
146
|
+
return _rel(root, py_file)
|
|
147
|
+
if init_file.is_file():
|
|
148
|
+
return _rel(root, init_file)
|
|
149
|
+
return None
|
|
150
|
+
|
|
151
|
+
def build_roadmap(root: Path, include_dirs: list[Path], exclude_dirs: set[str], max_files: int = 20000) -> RoadmapGraph:
|
|
152
|
+
nodes: dict[str, Node] = {}
|
|
153
|
+
edges: list[Edge] = []
|
|
154
|
+
|
|
155
|
+
# Walk selected dirs
|
|
156
|
+
files: list[Path] = []
|
|
157
|
+
root_res = root.resolve()
|
|
158
|
+
|
|
159
|
+
skipped_big = 0
|
|
160
|
+
|
|
161
|
+
for d in include_dirs:
|
|
162
|
+
if not d.exists():
|
|
163
|
+
continue
|
|
164
|
+
|
|
165
|
+
if _is_venv_root(d):
|
|
166
|
+
continue
|
|
167
|
+
|
|
168
|
+
for dirpath, dirnames, filenames in os.walk(d):
|
|
169
|
+
dirpath_p = Path(dirpath)
|
|
170
|
+
|
|
171
|
+
# 1) prune excluded dirs by name
|
|
172
|
+
dirnames[:] = [dn for dn in dirnames if dn not in exclude_dirs]
|
|
173
|
+
|
|
174
|
+
# 2) prune venv dirs by structure (ANY name)
|
|
175
|
+
dirnames[:] = [dn for dn in dirnames if dn not in exclude_dirs and dn != ".pybundle-venv"]
|
|
176
|
+
dirnames[:] = [dn for dn in dirnames if not _is_venv_root(dirpath_p / dn)]
|
|
177
|
+
|
|
178
|
+
for fn in filenames:
|
|
179
|
+
p = dirpath_p / fn
|
|
180
|
+
|
|
181
|
+
# 3️⃣ skip anything under a venv (belt + suspenders)
|
|
182
|
+
rel = Path(_rel(root, p))
|
|
183
|
+
if _is_under_venv(root, rel):
|
|
184
|
+
continue
|
|
185
|
+
|
|
186
|
+
rel_s = _rel(root, p)
|
|
187
|
+
if rel_s.startswith(".pybundle-venv/") or "/site-packages/" in rel_s:
|
|
188
|
+
continue
|
|
189
|
+
if _is_under_venv(root, Path(rel_s)):
|
|
190
|
+
continue
|
|
191
|
+
|
|
192
|
+
try:
|
|
193
|
+
if p.stat().st_size > 2_000_000:
|
|
194
|
+
skipped_big += 1
|
|
195
|
+
continue
|
|
196
|
+
except OSError:
|
|
197
|
+
continue
|
|
198
|
+
|
|
199
|
+
files.append(p)
|
|
200
|
+
if len(files) >= max_files:
|
|
201
|
+
break
|
|
202
|
+
|
|
203
|
+
if len(files) >= max_files:
|
|
204
|
+
break
|
|
205
|
+
|
|
206
|
+
# Create nodes
|
|
207
|
+
for f in files:
|
|
208
|
+
rel = _rel(root, f)
|
|
209
|
+
nodes[rel] = Node(id=rel, path=rel, lang=guess_lang(f))
|
|
210
|
+
|
|
211
|
+
# Scan edges
|
|
212
|
+
for f in files:
|
|
213
|
+
rel = _rel(root, f)
|
|
214
|
+
lang = nodes[rel].lang
|
|
215
|
+
text = None
|
|
216
|
+
|
|
217
|
+
if lang in {"js", "ts", "rust", "html", "config"}:
|
|
218
|
+
text = f.read_text(encoding="utf-8", errors="replace")
|
|
219
|
+
|
|
220
|
+
if lang == "python":
|
|
221
|
+
for mod in scan_python_imports(root, f):
|
|
222
|
+
resolved = _resolve_py_to_node(root, rel, mod)
|
|
223
|
+
if resolved and resolved in nodes:
|
|
224
|
+
edges.append(Edge(src=rel, dst=resolved, type="import"))
|
|
225
|
+
else:
|
|
226
|
+
edges.append(Edge(src=rel, dst=f"py:{mod}", type="import"))
|
|
227
|
+
elif lang in {"js", "ts"} and text is not None:
|
|
228
|
+
for spec in scan_js_imports(text):
|
|
229
|
+
edges.append(Edge(src=rel, dst=f"js:{spec}", type="import"))
|
|
230
|
+
elif lang == "rust" and text is not None:
|
|
231
|
+
uses, mods = scan_rust_uses(text)
|
|
232
|
+
for u in uses:
|
|
233
|
+
edges.append(Edge(src=rel, dst=f"rs:{u}", type="use"))
|
|
234
|
+
for m in mods:
|
|
235
|
+
edges.append(Edge(src=rel, dst=f"rsmod:{m}", type="mod"))
|
|
236
|
+
|
|
237
|
+
# TODO: add template includes, docker compose, pyproject scripts, etc.
|
|
238
|
+
|
|
239
|
+
# Entrypoints
|
|
240
|
+
eps = detect_entrypoints_from_nodes(nodes)
|
|
241
|
+
|
|
242
|
+
# Stats
|
|
243
|
+
stats: dict[str, int] = {}
|
|
244
|
+
for n in nodes.values():
|
|
245
|
+
stats[f"nodes_{n.lang}"] = stats.get(f"nodes_{n.lang}", 0) + 1
|
|
246
|
+
for e in edges:
|
|
247
|
+
stats[f"edges_{e.type}"] = stats.get(f"edges_{e.type}", 0) + 1
|
|
248
|
+
stats["skipped_big_files"] = skipped_big
|
|
249
|
+
|
|
250
|
+
# determinism: sort
|
|
251
|
+
node_list = sorted(nodes.values(), key=lambda x: x.id)
|
|
252
|
+
edge_list = sorted(edges, key=lambda e: (e.src, e.dst, e.type, e.note))
|
|
253
|
+
eps_sorted = sorted(eps, key=lambda e: (e.node, -e.confidence, e.reason))
|
|
254
|
+
|
|
255
|
+
return RoadmapGraph(
|
|
256
|
+
version=1,
|
|
257
|
+
root=str(root),
|
|
258
|
+
nodes=node_list,
|
|
259
|
+
edges=edge_list,
|
|
260
|
+
entrypoints=eps_sorted,
|
|
261
|
+
stats=stats,
|
|
262
|
+
)
|
pybundle/root_detect.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
_MARKERS = [".git", "pyproject.toml", "requirements.txt", "setup.cfg", "setup.py"]
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def detect_project_root(start: Path) -> Path | None:
|
|
9
|
+
cur = start.resolve()
|
|
10
|
+
for p in [cur, *cur.parents]:
|
|
11
|
+
for m in _MARKERS:
|
|
12
|
+
if (p / m).exists():
|
|
13
|
+
return p
|
|
14
|
+
return None
|