devtime-ei 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. devtime/__init__.py +9 -0
  2. devtime/ai/__init__.py +0 -0
  3. devtime/ai/local.py +11 -0
  4. devtime/ai/prompts.py +24 -0
  5. devtime/ai/providers.py +41 -0
  6. devtime/assets/devtimeignore.starter +23 -0
  7. devtime/cli.py +374 -0
  8. devtime/config.py +67 -0
  9. devtime/db/__init__.py +0 -0
  10. devtime/db/connection.py +16 -0
  11. devtime/db/migrations.py +114 -0
  12. devtime/db/repository.py +351 -0
  13. devtime/db/schema.sql +145 -0
  14. devtime/fixtures/__init__.py +0 -0
  15. devtime/fixtures/assertions.py +51 -0
  16. devtime/fixtures/loader.py +52 -0
  17. devtime/fixtures/runner.py +73 -0
  18. devtime/intelligence/__init__.py +0 -0
  19. devtime/intelligence/claims.py +235 -0
  20. devtime/intelligence/concepts.py +483 -0
  21. devtime/intelligence/context_pack.py +276 -0
  22. devtime/intelligence/evidence.py +127 -0
  23. devtime/intelligence/lineage.py +21 -0
  24. devtime/intelligence/risk.py +267 -0
  25. devtime/intelligence/scoring.py +99 -0
  26. devtime/mcp/__init__.py +0 -0
  27. devtime/mcp/schemas.py +39 -0
  28. devtime/mcp/server.py +35 -0
  29. devtime/mcp/tools.py +90 -0
  30. devtime/output/__init__.py +0 -0
  31. devtime/output/json_export.py +50 -0
  32. devtime/output/markdown.py +50 -0
  33. devtime/output/terminal.py +208 -0
  34. devtime/paths.py +40 -0
  35. devtime/privacy.py +96 -0
  36. devtime/scanner/__init__.py +0 -0
  37. devtime/scanner/extractors/__init__.py +0 -0
  38. devtime/scanner/extractors/base.py +83 -0
  39. devtime/scanner/extractors/config_files.py +41 -0
  40. devtime/scanner/extractors/docs.py +35 -0
  41. devtime/scanner/extractors/nextjs.py +82 -0
  42. devtime/scanner/extractors/python.py +81 -0
  43. devtime/scanner/extractors/tests.py +61 -0
  44. devtime/scanner/extractors/typescript.py +99 -0
  45. devtime/scanner/file_walker.py +96 -0
  46. devtime/scanner/ignore.py +96 -0
  47. devtime/scanner/language.py +36 -0
  48. devtime/scanner/signals.py +252 -0
  49. devtime_ei-0.1.0.dist-info/METADATA +289 -0
  50. devtime_ei-0.1.0.dist-info/RECORD +54 -0
  51. devtime_ei-0.1.0.dist-info/WHEEL +5 -0
  52. devtime_ei-0.1.0.dist-info/entry_points.txt +2 -0
  53. devtime_ei-0.1.0.dist-info/licenses/LICENSE +201 -0
  54. devtime_ei-0.1.0.dist-info/top_level.txt +1 -0
devtime/privacy.py ADDED
@@ -0,0 +1,96 @@
1
+ """Privacy and security implementation (Builder Edition, Chapter 19).
2
+
3
+ Privacy is architecture, not a footer.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import re
9
+ from pathlib import Path
10
+
11
+ from devtime import config, paths
12
+
13
+ # Secret patterns (Chapter 19).
14
+ SECRET_PATTERNS = [
15
+ r"AKIA[0-9A-Z]{16}",
16
+ r"-----BEGIN PRIVATE KEY-----",
17
+ r"sk-[A-Za-z0-9_-]{20,}",
18
+ r"""(?i)(api_key|secret|token|password)\s*=\s*['"][^'"]+['"]""",
19
+ ]
20
+
21
+
22
+ def redact_secret_like_values(text: str) -> str:
23
+ for pattern in SECRET_PATTERNS:
24
+ text = re.sub(pattern, "<redacted-secret>", text)
25
+ return text
26
+
27
+
28
+ def privacy_report(root: Path | None = None) -> dict:
29
+ root = root or paths.repo_root()
30
+ cfg = config.load_config(root)
31
+ priv = cfg["privacy"]
32
+
33
+ good: list[str] = []
34
+ warning: list[str] = []
35
+ recommended: list[str] = []
36
+
37
+ good.append("AI disabled" if not priv["ai_enabled"] else "AI enabled")
38
+ good.append("Cloud disabled" if not priv["cloud_enabled"] else "Cloud enabled")
39
+ good.append("Telemetry off" if not priv["telemetry_enabled"] else "Telemetry on")
40
+
41
+ if (root / ".devtimeignore").exists():
42
+ good.append(".devtimeignore active")
43
+ if (root / ".env").exists():
44
+ good.append(".env ignored")
45
+
46
+ status = _devtime_ignored(root)
47
+ if status is True:
48
+ good.append(".devtime/ is git-ignored")
49
+ elif status is False:
50
+ warning.append(".devtime/ is not ignored by git")
51
+ recommended.append(
52
+ "Add `.devtime/` to `.gitignore` unless you intentionally share local memory."
53
+ )
54
+ else: # unknown (git unavailable)
55
+ warning.append("Could not confirm whether .devtime/ is ignored (git unavailable)")
56
+ recommended.append(
57
+ "Verify `.devtime/` is ignored, e.g. add it to `.gitignore`."
58
+ )
59
+
60
+ return {"good": good, "warning": warning, "recommended": recommended}
61
+
62
+
63
+ def _devtime_ignored(root: Path) -> bool | None:
64
+ """Return True/False if .devtime/ is git-ignored, or None if undeterminable.
65
+
66
+ Trust Repair (v0.0.6): prefer `git check-ignore`, which honors parent
67
+ .gitignore rules, so a nested repo whose parent ignores .devtime/ is not
68
+ falsely warned. Falls back to a local .gitignore text check.
69
+ """
70
+ import subprocess
71
+
72
+ # Probe a path *inside* .devtime/ so the dir-only pattern matches even when the
73
+ # directory does not exist yet, and so parent .gitignore rules are honored.
74
+ target = ".devtime/devtime.sqlite"
75
+ try:
76
+ proc = subprocess.run(
77
+ ["git", "check-ignore", "-q", target],
78
+ cwd=str(root),
79
+ capture_output=True,
80
+ text=True,
81
+ )
82
+ # exit 0 = ignored, 1 = not ignored, 128 = not a git repo / error.
83
+ if proc.returncode == 0:
84
+ return True
85
+ if proc.returncode == 1:
86
+ return False
87
+ except FileNotFoundError:
88
+ pass
89
+
90
+ # Fallback: local .gitignore text only (cannot see parent rules).
91
+ gitignore = root / ".gitignore"
92
+ if gitignore.exists() and ".devtime/" in gitignore.read_text(
93
+ encoding="utf-8", errors="ignore"
94
+ ):
95
+ return True
96
+ return None
File without changes
File without changes
@@ -0,0 +1,83 @@
1
+ """Shared signal model for extractors (Builder Edition, Chapter 8).
2
+
3
+ A signal is a small extracted fact. It does not have to be perfect, but it must
4
+ be typed.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from dataclasses import dataclass, field
10
+ from typing import Any
11
+
12
+ from devtime.scanner.file_walker import WalkedFile
13
+
14
+
15
+ @dataclass
16
+ class Signal:
17
+ kind: str
18
+ name: str | None
19
+ file_rel_path: str
20
+ value: str | None = None
21
+ start_line: int | None = None
22
+ end_line: int | None = None
23
+ confidence: float = 0.5
24
+ metadata: dict[str, Any] = field(default_factory=dict)
25
+
26
+
27
+ def signal(
28
+ kind: str,
29
+ *,
30
+ name: str | None = None,
31
+ file: WalkedFile,
32
+ value: str | None = None,
33
+ start_line: int | None = None,
34
+ end_line: int | None = None,
35
+ confidence: float = 0.5,
36
+ metadata: dict[str, Any] | None = None,
37
+ ) -> Signal:
38
+ return Signal(
39
+ kind=kind,
40
+ name=name,
41
+ file_rel_path=file.rel_path,
42
+ value=value,
43
+ start_line=start_line,
44
+ end_line=end_line,
45
+ confidence=confidence,
46
+ metadata=metadata or {},
47
+ )
48
+
49
+
50
+ def read_text(file: WalkedFile) -> str:
51
+ try:
52
+ return file.path.read_text(encoding="utf-8", errors="ignore")
53
+ except OSError:
54
+ return ""
55
+
56
+
57
+ def classify_jwt_purpose(text: str, rel_path: str) -> str:
58
+ """Classify what a JWT is used for (Trust Repair v0.0.6).
59
+
60
+ Returns "access", "invitation", or "unclear". Invitation/verification tokens
61
+ are not access tokens and must not be claimed as such.
62
+ """
63
+ hay = (text + " " + rel_path).lower()
64
+ access_signals = (
65
+ "access token", "accesstoken", "access_token", "bearer", "authorization",
66
+ "login", "signin", "sign-in", "refresh token", "refresh_token",
67
+ "req.cookies", "set-cookie", "auth middleware", "current_user",
68
+ "get_current_user",
69
+ )
70
+ invitation_signals = (
71
+ "invite", "invitation", "verify email", "verify-email", "email_verification",
72
+ "password reset", "password-reset", "reset_token", "magic link", "magic-link",
73
+ "one-time", "onetime",
74
+ )
75
+ has_access = any(s in hay for s in access_signals)
76
+ has_invite = any(s in hay for s in invitation_signals)
77
+ if has_access and not has_invite:
78
+ return "access"
79
+ if has_invite and not has_access:
80
+ return "invitation"
81
+ if has_access and has_invite:
82
+ return "access" # an access path that also issues invites still does access auth
83
+ return "unclear"
@@ -0,0 +1,41 @@
1
+ """Config and dependency-manifest extractor (Builder Edition, Chapter 8)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import re
7
+
8
+ from devtime.scanner.extractors.base import Signal, read_text, signal
9
+ from devtime.scanner.file_walker import WalkedFile
10
+
11
+ _ENV_REF_RE = re.compile(r"\b([A-Z][A-Z0-9_]{3,})\b")
12
+
13
+
14
+ def extract_config_signals(file: WalkedFile) -> list[Signal]:
15
+ name = file.path.name.lower()
16
+ text = read_text(file)
17
+ signals: list[Signal] = []
18
+
19
+ # package.json / requirements: dependency signals.
20
+ if name == "package.json":
21
+ try:
22
+ data = json.loads(text)
23
+ except json.JSONDecodeError:
24
+ data = {}
25
+ for section in ("dependencies", "devDependencies"):
26
+ for dep in (data.get(section) or {}):
27
+ signals.append(signal("dependency", name=dep, file=file, confidence=0.6))
28
+ elif name in ("requirements.txt", "requirements-dev.txt"):
29
+ for line in text.splitlines():
30
+ dep = re.split(r"[=<>!~ ]", line.strip(), 1)[0]
31
+ if dep and not dep.startswith("#"):
32
+ signals.append(signal("dependency", name=dep, file=file, confidence=0.6))
33
+
34
+ # .env.example and config files: env var names are concept hints (never values).
35
+ if name.startswith(".env") or name.endswith((".env", ".env.example")):
36
+ for match in _ENV_REF_RE.finditer(text):
37
+ signals.append(
38
+ signal("config", name=match.group(1), file=file, confidence=0.5)
39
+ )
40
+
41
+ return signals
@@ -0,0 +1,35 @@
1
+ """Docs and decision-record extractor (Builder Edition, Chapter 8)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+ from devtime.scanner.extractors.base import Signal, read_text, signal
8
+ from devtime.scanner.file_walker import WalkedFile
9
+
10
+ _HEADING_RE = re.compile(r"^#{1,3}\s+(.+?)\s*$", re.M)
11
+
12
+
13
+ def extract_doc_signals(file: WalkedFile) -> list[Signal]:
14
+ text = read_text(file)
15
+ signals: list[Signal] = []
16
+
17
+ is_decision = "/decisions/" in file.rel_path.lower() or re.match(
18
+ r"^\d{3,4}-", file.path.name
19
+ )
20
+
21
+ for match in _HEADING_RE.finditer(text):
22
+ heading = match.group(1).strip()
23
+ line = text.count("\n", 0, match.start()) + 1
24
+ kind = "decision" if is_decision else "doc"
25
+ signals.append(
26
+ signal(
27
+ kind,
28
+ name=heading,
29
+ file=file,
30
+ start_line=line,
31
+ confidence=0.7 if is_decision else 0.4,
32
+ )
33
+ )
34
+
35
+ return signals
@@ -0,0 +1,82 @@
1
+ """Next.js App Router signal extractor.
2
+
3
+ Added during V0 Reality Validation: Snapilio and SaaSVoice are Next.js App Router
4
+ apps whose API surface is file-based (`app/api/**/route.ts` exporting HTTP method
5
+ handlers). The Express/FastAPI extractors saw none of it, so every concept
6
+ degraded to weak dependency evidence. This extractor parses route handlers and
7
+ derives the route path from the directory structure.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import re
13
+
14
+ from devtime.scanner.extractors.base import Signal, read_text, signal
15
+ from devtime.scanner.file_walker import WalkedFile
16
+
17
+ # export async function GET(...) / export function POST(...) / export const DELETE = ...
18
+ _METHOD_RE = re.compile(
19
+ r"export\s+(?:async\s+)?function\s+(GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS)\b"
20
+ r"|export\s+const\s+(GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS)\s*=",
21
+ )
22
+
23
+ _ROUTE_FILES = {"route.ts", "route.js", "route.tsx", "route.jsx"}
24
+
25
+
26
+ def is_app_router_route(rel_path: str) -> bool:
27
+ name = rel_path.rsplit("/", 1)[-1]
28
+ if name not in _ROUTE_FILES:
29
+ return False
30
+ return "/app/" in rel_path or rel_path.startswith("app/")
31
+
32
+
33
+ def derive_route_path(rel_path: str) -> str:
34
+ """Turn app/api/(payments)/checkout/[id]/route.ts -> /api/checkout/[id].
35
+
36
+ Route groups like (payments) are stripped; dynamic segments like [id] and
37
+ [...slug] are kept. Paths are normalised so no empty segments survive.
38
+ """
39
+ parts = rel_path.split("/")
40
+ # Locate the segment named "app" (handles src/app/... too); start after it.
41
+ try:
42
+ app_idx = len(parts) - 1 - parts[::-1].index("app")
43
+ except ValueError:
44
+ app_idx = -1
45
+ segments = parts[app_idx + 1 : -1] # drop everything up to app/ and the route file
46
+
47
+ cleaned: list[str] = []
48
+ for seg in segments:
49
+ if not seg:
50
+ continue
51
+ # Route groups (parentheses) and parallel/intercept routes do not affect the URL.
52
+ if seg.startswith("(") and seg.endswith(")"):
53
+ continue
54
+ if seg.startswith("@"):
55
+ continue
56
+ cleaned.append(seg)
57
+ return "/" + "/".join(cleaned)
58
+
59
+
60
+ def extract_nextjs_signals(file: WalkedFile) -> list[Signal]:
61
+ if not is_app_router_route(file.rel_path):
62
+ return []
63
+ text = read_text(file)
64
+ methods: list[str] = []
65
+ for match in _METHOD_RE.finditer(text):
66
+ methods.append(match.group(1) or match.group(2))
67
+ if not methods:
68
+ return []
69
+
70
+ route_path = derive_route_path(file.rel_path)
71
+ signals: list[Signal] = []
72
+ for method in methods:
73
+ signals.append(
74
+ signal(
75
+ "route",
76
+ name=f"{method} {route_path}",
77
+ file=file,
78
+ confidence=0.82,
79
+ metadata={"method": method, "path": route_path, "framework": "nextjs"},
80
+ )
81
+ )
82
+ return signals
@@ -0,0 +1,81 @@
1
+ """Python signal extractor (Builder Edition, Chapter 8)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+ from devtime.scanner.extractors.base import (
8
+ Signal,
9
+ classify_jwt_purpose,
10
+ read_text,
11
+ signal,
12
+ )
13
+ from devtime.scanner.file_walker import WalkedFile
14
+
15
+ _IMPORT_RE = re.compile(r"""^\s*(?:from\s+(\w[\w.]*)\s+import|import\s+(\w[\w.]*))""", re.M)
16
+ _ROUTE_RE = re.compile(
17
+ r"""@(?:app|router)\.(get|post|put|patch|delete)\(\s*['"]([^'"]+)['"]"""
18
+ )
19
+
20
+
21
+ def extract_python_signals(file: WalkedFile) -> list[Signal]:
22
+ text = read_text(file)
23
+ signals: list[Signal] = []
24
+
25
+ for match in _IMPORT_RE.finditer(text):
26
+ module = (match.group(1) or match.group(2) or "").split(".")[0]
27
+ if module:
28
+ signals.append(signal("dependency", name=module, file=file, confidence=0.6))
29
+
30
+ for match in _ROUTE_RE.finditer(text):
31
+ method = match.group(1).upper()
32
+ path = match.group(2)
33
+ signals.append(
34
+ signal(
35
+ "route",
36
+ name=f"{method} {path}",
37
+ file=file,
38
+ confidence=0.8,
39
+ metadata={"method": method, "path": path, "framework": "fastapi"},
40
+ )
41
+ )
42
+
43
+ if "Depends(" in text and ("current_user" in text or "get_current_user" in text):
44
+ signals.append(
45
+ signal("auth_dependency", name="current_user", file=file, confidence=0.75)
46
+ )
47
+
48
+ if "@celery.task" in text or ".task(" in text or "@shared_task" in text:
49
+ signals.append(
50
+ signal("background_job", name=file.rel_path, file=file, confidence=0.75)
51
+ )
52
+
53
+ if re.search(r"\bjwt\.(encode|decode)\b|\bPyJWT\b", text):
54
+ purpose = classify_jwt_purpose(text, file.rel_path)
55
+ signals.append(
56
+ signal(
57
+ "token_usage",
58
+ name="jwt",
59
+ file=file,
60
+ confidence=0.8,
61
+ metadata={"purpose": purpose},
62
+ )
63
+ )
64
+
65
+ # File uploads: FastAPI UploadFile / multipart / werkzeug request.files.
66
+ if re.search(r"\bUploadFile\b|=\s*File\(|multipart/form-data|request\.files", text):
67
+ signals.append(
68
+ signal("upload_endpoint", name="upload", file=file, confidence=0.8)
69
+ )
70
+
71
+ if "stripe.Webhook.construct_event" in text:
72
+ signals.append(
73
+ signal(
74
+ "webhook_signature_verification",
75
+ name="stripe",
76
+ file=file,
77
+ confidence=0.9,
78
+ )
79
+ )
80
+
81
+ return signals
@@ -0,0 +1,61 @@
1
+ """Test-file extractor (Builder Edition, Chapter 8).
2
+
3
+ Behavior-specific tests are strong evidence, so test names are first-class signals.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import re
9
+
10
+ from devtime.scanner.extractors.base import Signal, read_text, signal
11
+ from devtime.scanner.file_walker import WalkedFile
12
+
13
+ _TEST_NAME_RE = re.compile(
14
+ r"""(?:it|test|describe)\(\s*['"]([^'"]+)['"]""" # JS/TS
15
+ r"""|def\s+(test_\w+)""" # pytest
16
+ )
17
+ # Imports a test references - used to attach tests to the implementation they cover
18
+ # (Evidence Precision v0.0.7): a truthful "imports the implementation" reason.
19
+ _IMPORT_RE = re.compile(
20
+ r"""from\s+([\w.]+)\s+import|import\s+([\w.]+)""" # python
21
+ r"""|from\s+['"]([^'"]+)['"]""", # JS/TS
22
+ )
23
+
24
+
25
+ def _extract_imports(text: str) -> list[str]:
26
+ mods: list[str] = []
27
+ for m in _IMPORT_RE.finditer(text):
28
+ mod = m.group(1) or m.group(2) or m.group(3)
29
+ if mod:
30
+ mods.append(mod.lower())
31
+ return mods
32
+
33
+
34
+ def _is_e2e(rel_path: str) -> bool:
35
+ """E2E UI specs match concept keywords by accident and should be weak evidence.
36
+
37
+ Reality Validation finding: `tests-e2e/specs/sidebar-navigation.e2e.spec.ts`
38
+ was defining File Uploads, and other e2e specs polluted Data Export.
39
+ """
40
+ low = rel_path.lower()
41
+ return ".e2e." in low or "/tests-e2e/" in low or "/e2e/" in low or "playwright" in low
42
+
43
+
44
+ def extract_test_signals(file: WalkedFile) -> list[Signal]:
45
+ text = read_text(file)
46
+ e2e = _is_e2e(file.rel_path)
47
+ imports = _extract_imports(text)
48
+ signals: list[Signal] = []
49
+ for match in _TEST_NAME_RE.finditer(text):
50
+ name = match.group(1) or match.group(2)
51
+ if name:
52
+ signals.append(
53
+ signal(
54
+ "test",
55
+ name=name,
56
+ file=file,
57
+ confidence=0.4 if e2e else 0.8,
58
+ metadata={"e2e": e2e, "imports": imports},
59
+ )
60
+ )
61
+ return signals
@@ -0,0 +1,99 @@
1
+ """TypeScript / JavaScript signal extractor (Builder Edition, Chapter 8)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+ from devtime.scanner.extractors.base import (
8
+ Signal,
9
+ classify_jwt_purpose,
10
+ read_text,
11
+ signal,
12
+ )
13
+ from devtime.scanner.file_walker import WalkedFile
14
+
15
+ _IMPORT_RE = re.compile(r"""import\s+.*?from\s+['"]([^'"]+)['"]""")
16
+ # Match app/router as well as named routers (authRouter, exportRouter, etc).
17
+ _ROUTE_RE = re.compile(
18
+ r"""\b(?:app|\w*[Rr]outer)\.(get|post|put|patch|delete)\(\s*['"]([^'"]+)['"]""",
19
+ re.I,
20
+ )
21
+ _MIDDLEWARE_RE = re.compile(
22
+ r"""\b(requireAuth|authMiddleware|isAuthenticated|ensureAuth|requireAdmin)\b"""
23
+ )
24
+ _BULLMQ_WORKER_RE = re.compile(r"""new\s+Worker\(\s*['"]([^'"]+)['"]""")
25
+ _BULLMQ_QUEUE_RE = re.compile(r"""new\s+Queue\(\s*['"]([^'"]+)['"]""")
26
+
27
+
28
+ def extract_typescript_signals(file: WalkedFile) -> list[Signal]:
29
+ text = read_text(file)
30
+ signals: list[Signal] = []
31
+
32
+ for match in _IMPORT_RE.finditer(text):
33
+ module = match.group(1)
34
+ # Skip relative imports; external dependencies are the useful signal.
35
+ if module.startswith("."):
36
+ continue
37
+ signals.append(signal("dependency", name=module, file=file, confidence=0.6))
38
+
39
+ for match in _ROUTE_RE.finditer(text):
40
+ method = match.group(1).upper()
41
+ path = match.group(2)
42
+ signals.append(
43
+ signal(
44
+ "route",
45
+ name=f"{method} {path}",
46
+ file=file,
47
+ confidence=0.8,
48
+ metadata={"method": method, "path": path, "framework": "express"},
49
+ )
50
+ )
51
+
52
+ if _MIDDLEWARE_RE.search(text):
53
+ signals.append(
54
+ signal("middleware", name="auth", file=file, confidence=0.7)
55
+ )
56
+
57
+ if "stripe.webhooks.constructEvent" in text:
58
+ signals.append(
59
+ signal(
60
+ "webhook_signature_verification",
61
+ name="stripe",
62
+ file=file,
63
+ confidence=0.9,
64
+ )
65
+ )
66
+
67
+ if re.search(r"\bjsonwebtoken\b|\bjwt\.(sign|verify)\b", text):
68
+ purpose = classify_jwt_purpose(text, file.rel_path)
69
+ signals.append(
70
+ signal(
71
+ "token_usage",
72
+ name="jwt",
73
+ file=file,
74
+ confidence=0.8,
75
+ metadata={"purpose": purpose},
76
+ )
77
+ )
78
+
79
+ # File uploads: multipart / multer / busboy / formData with a file part.
80
+ if re.search(r"multipart/form-data|\bmulter\b|\bbusboy\b|\.formData\(", text):
81
+ signals.append(
82
+ signal("upload_endpoint", name="upload", file=file, confidence=0.75)
83
+ )
84
+
85
+ for match in _BULLMQ_WORKER_RE.finditer(text):
86
+ signals.append(
87
+ signal(
88
+ "background_job",
89
+ name=f"worker:{match.group(1)}",
90
+ file=file,
91
+ confidence=0.8,
92
+ )
93
+ )
94
+ for match in _BULLMQ_QUEUE_RE.finditer(text):
95
+ signals.append(
96
+ signal("queue", name=f"queue:{match.group(1)}", file=file, confidence=0.8)
97
+ )
98
+
99
+ return signals
@@ -0,0 +1,96 @@
1
+ """Safe repository file walker (Builder Edition, Chapter 7).
2
+
3
+ Walks files without executing repository code. Ignored directories are pruned
4
+ *before* traversal (Reality Hardening): the walker never descends into
5
+ node_modules, .git, build output, .devtime, etc., instead of walking them and
6
+ filtering after the fact. Symlinks, large files, binaries, and ignored/secret
7
+ files are still skipped per file.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import os
13
+ from dataclasses import dataclass
14
+ from pathlib import Path
15
+
16
+ from devtime.scanner import ignore as ignore_mod
17
+ from devtime.scanner.language import is_doc_path, is_test_path
18
+
19
+
20
+ @dataclass
21
+ class WalkedFile:
22
+ path: Path
23
+ rel_path: str
24
+ size_bytes: int
25
+ extension: str
26
+ is_test: bool
27
+ is_doc: bool
28
+
29
+
30
+ def _normalize(rel: str) -> str:
31
+ while "//" in rel:
32
+ rel = rel.replace("//", "/")
33
+ return rel
34
+
35
+
36
+ def walk_repository(
37
+ root: Path,
38
+ ignore_matcher: ignore_mod.IgnoreMatcher,
39
+ max_size_bytes: int,
40
+ *,
41
+ follow_symlinks: bool = False,
42
+ stats: dict | None = None,
43
+ ):
44
+ root = Path(root)
45
+ for dirpath, dirnames, filenames in os.walk(root, followlinks=follow_symlinks):
46
+ rel_dir = _normalize(Path(dirpath).relative_to(root).as_posix())
47
+
48
+ # --- Prune ignored directories before descending into them. ---
49
+ kept: list[str] = []
50
+ for d in sorted(dirnames):
51
+ if ignore_mod.is_pruned_dirname(d):
52
+ if stats is not None:
53
+ stats["pruned_dirs"] = stats.get("pruned_dirs", 0) + 1
54
+ continue
55
+ child_rel = d if rel_dir in ("", ".") else f"{rel_dir}/{d}"
56
+ child_rel = _normalize(child_rel)
57
+ if ignore_matcher.match_dir(child_rel):
58
+ if stats is not None:
59
+ stats["pruned_dirs"] = stats.get("pruned_dirs", 0) + 1
60
+ continue
61
+ child_path = Path(dirpath) / d
62
+ if child_path.is_symlink() and not follow_symlinks:
63
+ continue
64
+ kept.append(d)
65
+ dirnames[:] = kept # in-place prune controls os.walk descent
66
+
67
+ for fn in sorted(filenames):
68
+ rel = _normalize(fn if rel_dir in ("", ".") else f"{rel_dir}/{fn}")
69
+ if ignore_matcher.match(rel):
70
+ if stats is not None:
71
+ stats["skipped_files"] = stats.get("skipped_files", 0) + 1
72
+ continue
73
+ path = Path(dirpath) / fn
74
+ if path.is_symlink() and not follow_symlinks:
75
+ continue
76
+ extension = path.suffix.lower()
77
+ if ignore_mod.is_binary_extension(extension):
78
+ if stats is not None:
79
+ stats["skipped_files"] = stats.get("skipped_files", 0) + 1
80
+ continue
81
+ try:
82
+ size = path.stat().st_size
83
+ except OSError:
84
+ continue
85
+ if size > max_size_bytes:
86
+ if stats is not None:
87
+ stats["skipped_files"] = stats.get("skipped_files", 0) + 1
88
+ continue
89
+ yield WalkedFile(
90
+ path=path,
91
+ rel_path=rel,
92
+ size_bytes=size,
93
+ extension=extension,
94
+ is_test=is_test_path(rel),
95
+ is_doc=is_doc_path(rel),
96
+ )