devtime-ei 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devtime/__init__.py +9 -0
- devtime/ai/__init__.py +0 -0
- devtime/ai/local.py +11 -0
- devtime/ai/prompts.py +24 -0
- devtime/ai/providers.py +41 -0
- devtime/assets/devtimeignore.starter +23 -0
- devtime/cli.py +374 -0
- devtime/config.py +67 -0
- devtime/db/__init__.py +0 -0
- devtime/db/connection.py +16 -0
- devtime/db/migrations.py +114 -0
- devtime/db/repository.py +351 -0
- devtime/db/schema.sql +145 -0
- devtime/fixtures/__init__.py +0 -0
- devtime/fixtures/assertions.py +51 -0
- devtime/fixtures/loader.py +52 -0
- devtime/fixtures/runner.py +73 -0
- devtime/intelligence/__init__.py +0 -0
- devtime/intelligence/claims.py +235 -0
- devtime/intelligence/concepts.py +483 -0
- devtime/intelligence/context_pack.py +276 -0
- devtime/intelligence/evidence.py +127 -0
- devtime/intelligence/lineage.py +21 -0
- devtime/intelligence/risk.py +267 -0
- devtime/intelligence/scoring.py +99 -0
- devtime/mcp/__init__.py +0 -0
- devtime/mcp/schemas.py +39 -0
- devtime/mcp/server.py +35 -0
- devtime/mcp/tools.py +90 -0
- devtime/output/__init__.py +0 -0
- devtime/output/json_export.py +50 -0
- devtime/output/markdown.py +50 -0
- devtime/output/terminal.py +208 -0
- devtime/paths.py +40 -0
- devtime/privacy.py +96 -0
- devtime/scanner/__init__.py +0 -0
- devtime/scanner/extractors/__init__.py +0 -0
- devtime/scanner/extractors/base.py +83 -0
- devtime/scanner/extractors/config_files.py +41 -0
- devtime/scanner/extractors/docs.py +35 -0
- devtime/scanner/extractors/nextjs.py +82 -0
- devtime/scanner/extractors/python.py +81 -0
- devtime/scanner/extractors/tests.py +61 -0
- devtime/scanner/extractors/typescript.py +99 -0
- devtime/scanner/file_walker.py +96 -0
- devtime/scanner/ignore.py +96 -0
- devtime/scanner/language.py +36 -0
- devtime/scanner/signals.py +252 -0
- devtime_ei-0.1.0.dist-info/METADATA +289 -0
- devtime_ei-0.1.0.dist-info/RECORD +54 -0
- devtime_ei-0.1.0.dist-info/WHEEL +5 -0
- devtime_ei-0.1.0.dist-info/entry_points.txt +2 -0
- devtime_ei-0.1.0.dist-info/licenses/LICENSE +201 -0
- devtime_ei-0.1.0.dist-info/top_level.txt +1 -0
devtime/privacy.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""Privacy and security implementation (Builder Edition, Chapter 19).
|
|
2
|
+
|
|
3
|
+
Privacy is architecture, not a footer.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import re
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from devtime import config, paths
|
|
12
|
+
|
|
13
|
+
# Secret patterns (Chapter 19).
|
|
14
|
+
SECRET_PATTERNS = [
|
|
15
|
+
r"AKIA[0-9A-Z]{16}",
|
|
16
|
+
r"-----BEGIN PRIVATE KEY-----",
|
|
17
|
+
r"sk-[A-Za-z0-9_-]{20,}",
|
|
18
|
+
r"""(?i)(api_key|secret|token|password)\s*=\s*['"][^'"]+['"]""",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def redact_secret_like_values(text: str) -> str:
|
|
23
|
+
for pattern in SECRET_PATTERNS:
|
|
24
|
+
text = re.sub(pattern, "<redacted-secret>", text)
|
|
25
|
+
return text
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def privacy_report(root: Path | None = None) -> dict:
|
|
29
|
+
root = root or paths.repo_root()
|
|
30
|
+
cfg = config.load_config(root)
|
|
31
|
+
priv = cfg["privacy"]
|
|
32
|
+
|
|
33
|
+
good: list[str] = []
|
|
34
|
+
warning: list[str] = []
|
|
35
|
+
recommended: list[str] = []
|
|
36
|
+
|
|
37
|
+
good.append("AI disabled" if not priv["ai_enabled"] else "AI enabled")
|
|
38
|
+
good.append("Cloud disabled" if not priv["cloud_enabled"] else "Cloud enabled")
|
|
39
|
+
good.append("Telemetry off" if not priv["telemetry_enabled"] else "Telemetry on")
|
|
40
|
+
|
|
41
|
+
if (root / ".devtimeignore").exists():
|
|
42
|
+
good.append(".devtimeignore active")
|
|
43
|
+
if (root / ".env").exists():
|
|
44
|
+
good.append(".env ignored")
|
|
45
|
+
|
|
46
|
+
status = _devtime_ignored(root)
|
|
47
|
+
if status is True:
|
|
48
|
+
good.append(".devtime/ is git-ignored")
|
|
49
|
+
elif status is False:
|
|
50
|
+
warning.append(".devtime/ is not ignored by git")
|
|
51
|
+
recommended.append(
|
|
52
|
+
"Add `.devtime/` to `.gitignore` unless you intentionally share local memory."
|
|
53
|
+
)
|
|
54
|
+
else: # unknown (git unavailable)
|
|
55
|
+
warning.append("Could not confirm whether .devtime/ is ignored (git unavailable)")
|
|
56
|
+
recommended.append(
|
|
57
|
+
"Verify `.devtime/` is ignored, e.g. add it to `.gitignore`."
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
return {"good": good, "warning": warning, "recommended": recommended}
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _devtime_ignored(root: Path) -> bool | None:
|
|
64
|
+
"""Return True/False if .devtime/ is git-ignored, or None if undeterminable.
|
|
65
|
+
|
|
66
|
+
Trust Repair (v0.0.6): prefer `git check-ignore`, which honors parent
|
|
67
|
+
.gitignore rules, so a nested repo whose parent ignores .devtime/ is not
|
|
68
|
+
falsely warned. Falls back to a local .gitignore text check.
|
|
69
|
+
"""
|
|
70
|
+
import subprocess
|
|
71
|
+
|
|
72
|
+
# Probe a path *inside* .devtime/ so the dir-only pattern matches even when the
|
|
73
|
+
# directory does not exist yet, and so parent .gitignore rules are honored.
|
|
74
|
+
target = ".devtime/devtime.sqlite"
|
|
75
|
+
try:
|
|
76
|
+
proc = subprocess.run(
|
|
77
|
+
["git", "check-ignore", "-q", target],
|
|
78
|
+
cwd=str(root),
|
|
79
|
+
capture_output=True,
|
|
80
|
+
text=True,
|
|
81
|
+
)
|
|
82
|
+
# exit 0 = ignored, 1 = not ignored, 128 = not a git repo / error.
|
|
83
|
+
if proc.returncode == 0:
|
|
84
|
+
return True
|
|
85
|
+
if proc.returncode == 1:
|
|
86
|
+
return False
|
|
87
|
+
except FileNotFoundError:
|
|
88
|
+
pass
|
|
89
|
+
|
|
90
|
+
# Fallback: local .gitignore text only (cannot see parent rules).
|
|
91
|
+
gitignore = root / ".gitignore"
|
|
92
|
+
if gitignore.exists() and ".devtime/" in gitignore.read_text(
|
|
93
|
+
encoding="utf-8", errors="ignore"
|
|
94
|
+
):
|
|
95
|
+
return True
|
|
96
|
+
return None
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Shared signal model for extractors (Builder Edition, Chapter 8).
|
|
2
|
+
|
|
3
|
+
A signal is a small extracted fact. It does not have to be perfect, but it must
|
|
4
|
+
be typed.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from devtime.scanner.file_walker import WalkedFile
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class Signal:
|
|
17
|
+
kind: str
|
|
18
|
+
name: str | None
|
|
19
|
+
file_rel_path: str
|
|
20
|
+
value: str | None = None
|
|
21
|
+
start_line: int | None = None
|
|
22
|
+
end_line: int | None = None
|
|
23
|
+
confidence: float = 0.5
|
|
24
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def signal(
|
|
28
|
+
kind: str,
|
|
29
|
+
*,
|
|
30
|
+
name: str | None = None,
|
|
31
|
+
file: WalkedFile,
|
|
32
|
+
value: str | None = None,
|
|
33
|
+
start_line: int | None = None,
|
|
34
|
+
end_line: int | None = None,
|
|
35
|
+
confidence: float = 0.5,
|
|
36
|
+
metadata: dict[str, Any] | None = None,
|
|
37
|
+
) -> Signal:
|
|
38
|
+
return Signal(
|
|
39
|
+
kind=kind,
|
|
40
|
+
name=name,
|
|
41
|
+
file_rel_path=file.rel_path,
|
|
42
|
+
value=value,
|
|
43
|
+
start_line=start_line,
|
|
44
|
+
end_line=end_line,
|
|
45
|
+
confidence=confidence,
|
|
46
|
+
metadata=metadata or {},
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def read_text(file: WalkedFile) -> str:
|
|
51
|
+
try:
|
|
52
|
+
return file.path.read_text(encoding="utf-8", errors="ignore")
|
|
53
|
+
except OSError:
|
|
54
|
+
return ""
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def classify_jwt_purpose(text: str, rel_path: str) -> str:
|
|
58
|
+
"""Classify what a JWT is used for (Trust Repair v0.0.6).
|
|
59
|
+
|
|
60
|
+
Returns "access", "invitation", or "unclear". Invitation/verification tokens
|
|
61
|
+
are not access tokens and must not be claimed as such.
|
|
62
|
+
"""
|
|
63
|
+
hay = (text + " " + rel_path).lower()
|
|
64
|
+
access_signals = (
|
|
65
|
+
"access token", "accesstoken", "access_token", "bearer", "authorization",
|
|
66
|
+
"login", "signin", "sign-in", "refresh token", "refresh_token",
|
|
67
|
+
"req.cookies", "set-cookie", "auth middleware", "current_user",
|
|
68
|
+
"get_current_user",
|
|
69
|
+
)
|
|
70
|
+
invitation_signals = (
|
|
71
|
+
"invite", "invitation", "verify email", "verify-email", "email_verification",
|
|
72
|
+
"password reset", "password-reset", "reset_token", "magic link", "magic-link",
|
|
73
|
+
"one-time", "onetime",
|
|
74
|
+
)
|
|
75
|
+
has_access = any(s in hay for s in access_signals)
|
|
76
|
+
has_invite = any(s in hay for s in invitation_signals)
|
|
77
|
+
if has_access and not has_invite:
|
|
78
|
+
return "access"
|
|
79
|
+
if has_invite and not has_access:
|
|
80
|
+
return "invitation"
|
|
81
|
+
if has_access and has_invite:
|
|
82
|
+
return "access" # an access path that also issues invites still does access auth
|
|
83
|
+
return "unclear"
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""Config and dependency-manifest extractor (Builder Edition, Chapter 8)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
|
|
8
|
+
from devtime.scanner.extractors.base import Signal, read_text, signal
|
|
9
|
+
from devtime.scanner.file_walker import WalkedFile
|
|
10
|
+
|
|
11
|
+
_ENV_REF_RE = re.compile(r"\b([A-Z][A-Z0-9_]{3,})\b")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def extract_config_signals(file: WalkedFile) -> list[Signal]:
|
|
15
|
+
name = file.path.name.lower()
|
|
16
|
+
text = read_text(file)
|
|
17
|
+
signals: list[Signal] = []
|
|
18
|
+
|
|
19
|
+
# package.json / requirements: dependency signals.
|
|
20
|
+
if name == "package.json":
|
|
21
|
+
try:
|
|
22
|
+
data = json.loads(text)
|
|
23
|
+
except json.JSONDecodeError:
|
|
24
|
+
data = {}
|
|
25
|
+
for section in ("dependencies", "devDependencies"):
|
|
26
|
+
for dep in (data.get(section) or {}):
|
|
27
|
+
signals.append(signal("dependency", name=dep, file=file, confidence=0.6))
|
|
28
|
+
elif name in ("requirements.txt", "requirements-dev.txt"):
|
|
29
|
+
for line in text.splitlines():
|
|
30
|
+
dep = re.split(r"[=<>!~ ]", line.strip(), 1)[0]
|
|
31
|
+
if dep and not dep.startswith("#"):
|
|
32
|
+
signals.append(signal("dependency", name=dep, file=file, confidence=0.6))
|
|
33
|
+
|
|
34
|
+
# .env.example and config files: env var names are concept hints (never values).
|
|
35
|
+
if name.startswith(".env") or name.endswith((".env", ".env.example")):
|
|
36
|
+
for match in _ENV_REF_RE.finditer(text):
|
|
37
|
+
signals.append(
|
|
38
|
+
signal("config", name=match.group(1), file=file, confidence=0.5)
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
return signals
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""Docs and decision-record extractor (Builder Edition, Chapter 8)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
from devtime.scanner.extractors.base import Signal, read_text, signal
|
|
8
|
+
from devtime.scanner.file_walker import WalkedFile
|
|
9
|
+
|
|
10
|
+
_HEADING_RE = re.compile(r"^#{1,3}\s+(.+?)\s*$", re.M)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def extract_doc_signals(file: WalkedFile) -> list[Signal]:
|
|
14
|
+
text = read_text(file)
|
|
15
|
+
signals: list[Signal] = []
|
|
16
|
+
|
|
17
|
+
is_decision = "/decisions/" in file.rel_path.lower() or re.match(
|
|
18
|
+
r"^\d{3,4}-", file.path.name
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
for match in _HEADING_RE.finditer(text):
|
|
22
|
+
heading = match.group(1).strip()
|
|
23
|
+
line = text.count("\n", 0, match.start()) + 1
|
|
24
|
+
kind = "decision" if is_decision else "doc"
|
|
25
|
+
signals.append(
|
|
26
|
+
signal(
|
|
27
|
+
kind,
|
|
28
|
+
name=heading,
|
|
29
|
+
file=file,
|
|
30
|
+
start_line=line,
|
|
31
|
+
confidence=0.7 if is_decision else 0.4,
|
|
32
|
+
)
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
return signals
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""Next.js App Router signal extractor.
|
|
2
|
+
|
|
3
|
+
Added during V0 Reality Validation: Snapilio and SaaSVoice are Next.js App Router
|
|
4
|
+
apps whose API surface is file-based (`app/api/**/route.ts` exporting HTTP method
|
|
5
|
+
handlers). The Express/FastAPI extractors saw none of it, so every concept
|
|
6
|
+
degraded to weak dependency evidence. This extractor parses route handlers and
|
|
7
|
+
derives the route path from the directory structure.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import re
|
|
13
|
+
|
|
14
|
+
from devtime.scanner.extractors.base import Signal, read_text, signal
|
|
15
|
+
from devtime.scanner.file_walker import WalkedFile
|
|
16
|
+
|
|
17
|
+
# export async function GET(...) / export function POST(...) / export const DELETE = ...
|
|
18
|
+
_METHOD_RE = re.compile(
|
|
19
|
+
r"export\s+(?:async\s+)?function\s+(GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS)\b"
|
|
20
|
+
r"|export\s+const\s+(GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS)\s*=",
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
_ROUTE_FILES = {"route.ts", "route.js", "route.tsx", "route.jsx"}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def is_app_router_route(rel_path: str) -> bool:
|
|
27
|
+
name = rel_path.rsplit("/", 1)[-1]
|
|
28
|
+
if name not in _ROUTE_FILES:
|
|
29
|
+
return False
|
|
30
|
+
return "/app/" in rel_path or rel_path.startswith("app/")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def derive_route_path(rel_path: str) -> str:
|
|
34
|
+
"""Turn app/api/(payments)/checkout/[id]/route.ts -> /api/checkout/[id].
|
|
35
|
+
|
|
36
|
+
Route groups like (payments) are stripped; dynamic segments like [id] and
|
|
37
|
+
[...slug] are kept. Paths are normalised so no empty segments survive.
|
|
38
|
+
"""
|
|
39
|
+
parts = rel_path.split("/")
|
|
40
|
+
# Locate the segment named "app" (handles src/app/... too); start after it.
|
|
41
|
+
try:
|
|
42
|
+
app_idx = len(parts) - 1 - parts[::-1].index("app")
|
|
43
|
+
except ValueError:
|
|
44
|
+
app_idx = -1
|
|
45
|
+
segments = parts[app_idx + 1 : -1] # drop everything up to app/ and the route file
|
|
46
|
+
|
|
47
|
+
cleaned: list[str] = []
|
|
48
|
+
for seg in segments:
|
|
49
|
+
if not seg:
|
|
50
|
+
continue
|
|
51
|
+
# Route groups (parentheses) and parallel/intercept routes do not affect the URL.
|
|
52
|
+
if seg.startswith("(") and seg.endswith(")"):
|
|
53
|
+
continue
|
|
54
|
+
if seg.startswith("@"):
|
|
55
|
+
continue
|
|
56
|
+
cleaned.append(seg)
|
|
57
|
+
return "/" + "/".join(cleaned)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def extract_nextjs_signals(file: WalkedFile) -> list[Signal]:
|
|
61
|
+
if not is_app_router_route(file.rel_path):
|
|
62
|
+
return []
|
|
63
|
+
text = read_text(file)
|
|
64
|
+
methods: list[str] = []
|
|
65
|
+
for match in _METHOD_RE.finditer(text):
|
|
66
|
+
methods.append(match.group(1) or match.group(2))
|
|
67
|
+
if not methods:
|
|
68
|
+
return []
|
|
69
|
+
|
|
70
|
+
route_path = derive_route_path(file.rel_path)
|
|
71
|
+
signals: list[Signal] = []
|
|
72
|
+
for method in methods:
|
|
73
|
+
signals.append(
|
|
74
|
+
signal(
|
|
75
|
+
"route",
|
|
76
|
+
name=f"{method} {route_path}",
|
|
77
|
+
file=file,
|
|
78
|
+
confidence=0.82,
|
|
79
|
+
metadata={"method": method, "path": route_path, "framework": "nextjs"},
|
|
80
|
+
)
|
|
81
|
+
)
|
|
82
|
+
return signals
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""Python signal extractor (Builder Edition, Chapter 8)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
from devtime.scanner.extractors.base import (
|
|
8
|
+
Signal,
|
|
9
|
+
classify_jwt_purpose,
|
|
10
|
+
read_text,
|
|
11
|
+
signal,
|
|
12
|
+
)
|
|
13
|
+
from devtime.scanner.file_walker import WalkedFile
|
|
14
|
+
|
|
15
|
+
_IMPORT_RE = re.compile(r"""^\s*(?:from\s+(\w[\w.]*)\s+import|import\s+(\w[\w.]*))""", re.M)
|
|
16
|
+
_ROUTE_RE = re.compile(
|
|
17
|
+
r"""@(?:app|router)\.(get|post|put|patch|delete)\(\s*['"]([^'"]+)['"]"""
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def extract_python_signals(file: WalkedFile) -> list[Signal]:
|
|
22
|
+
text = read_text(file)
|
|
23
|
+
signals: list[Signal] = []
|
|
24
|
+
|
|
25
|
+
for match in _IMPORT_RE.finditer(text):
|
|
26
|
+
module = (match.group(1) or match.group(2) or "").split(".")[0]
|
|
27
|
+
if module:
|
|
28
|
+
signals.append(signal("dependency", name=module, file=file, confidence=0.6))
|
|
29
|
+
|
|
30
|
+
for match in _ROUTE_RE.finditer(text):
|
|
31
|
+
method = match.group(1).upper()
|
|
32
|
+
path = match.group(2)
|
|
33
|
+
signals.append(
|
|
34
|
+
signal(
|
|
35
|
+
"route",
|
|
36
|
+
name=f"{method} {path}",
|
|
37
|
+
file=file,
|
|
38
|
+
confidence=0.8,
|
|
39
|
+
metadata={"method": method, "path": path, "framework": "fastapi"},
|
|
40
|
+
)
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
if "Depends(" in text and ("current_user" in text or "get_current_user" in text):
|
|
44
|
+
signals.append(
|
|
45
|
+
signal("auth_dependency", name="current_user", file=file, confidence=0.75)
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
if "@celery.task" in text or ".task(" in text or "@shared_task" in text:
|
|
49
|
+
signals.append(
|
|
50
|
+
signal("background_job", name=file.rel_path, file=file, confidence=0.75)
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
if re.search(r"\bjwt\.(encode|decode)\b|\bPyJWT\b", text):
|
|
54
|
+
purpose = classify_jwt_purpose(text, file.rel_path)
|
|
55
|
+
signals.append(
|
|
56
|
+
signal(
|
|
57
|
+
"token_usage",
|
|
58
|
+
name="jwt",
|
|
59
|
+
file=file,
|
|
60
|
+
confidence=0.8,
|
|
61
|
+
metadata={"purpose": purpose},
|
|
62
|
+
)
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
# File uploads: FastAPI UploadFile / multipart / werkzeug request.files.
|
|
66
|
+
if re.search(r"\bUploadFile\b|=\s*File\(|multipart/form-data|request\.files", text):
|
|
67
|
+
signals.append(
|
|
68
|
+
signal("upload_endpoint", name="upload", file=file, confidence=0.8)
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
if "stripe.Webhook.construct_event" in text:
|
|
72
|
+
signals.append(
|
|
73
|
+
signal(
|
|
74
|
+
"webhook_signature_verification",
|
|
75
|
+
name="stripe",
|
|
76
|
+
file=file,
|
|
77
|
+
confidence=0.9,
|
|
78
|
+
)
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
return signals
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""Test-file extractor (Builder Edition, Chapter 8).
|
|
2
|
+
|
|
3
|
+
Behavior-specific tests are strong evidence, so test names are first-class signals.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import re
|
|
9
|
+
|
|
10
|
+
from devtime.scanner.extractors.base import Signal, read_text, signal
|
|
11
|
+
from devtime.scanner.file_walker import WalkedFile
|
|
12
|
+
|
|
13
|
+
_TEST_NAME_RE = re.compile(
|
|
14
|
+
r"""(?:it|test|describe)\(\s*['"]([^'"]+)['"]""" # JS/TS
|
|
15
|
+
r"""|def\s+(test_\w+)""" # pytest
|
|
16
|
+
)
|
|
17
|
+
# Imports a test references - used to attach tests to the implementation they cover
|
|
18
|
+
# (Evidence Precision v0.0.7): a truthful "imports the implementation" reason.
|
|
19
|
+
_IMPORT_RE = re.compile(
|
|
20
|
+
r"""from\s+([\w.]+)\s+import|import\s+([\w.]+)""" # python
|
|
21
|
+
r"""|from\s+['"]([^'"]+)['"]""", # JS/TS
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _extract_imports(text: str) -> list[str]:
|
|
26
|
+
mods: list[str] = []
|
|
27
|
+
for m in _IMPORT_RE.finditer(text):
|
|
28
|
+
mod = m.group(1) or m.group(2) or m.group(3)
|
|
29
|
+
if mod:
|
|
30
|
+
mods.append(mod.lower())
|
|
31
|
+
return mods
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _is_e2e(rel_path: str) -> bool:
|
|
35
|
+
"""E2E UI specs match concept keywords by accident and should be weak evidence.
|
|
36
|
+
|
|
37
|
+
Reality Validation finding: `tests-e2e/specs/sidebar-navigation.e2e.spec.ts`
|
|
38
|
+
was defining File Uploads, and other e2e specs polluted Data Export.
|
|
39
|
+
"""
|
|
40
|
+
low = rel_path.lower()
|
|
41
|
+
return ".e2e." in low or "/tests-e2e/" in low or "/e2e/" in low or "playwright" in low
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def extract_test_signals(file: WalkedFile) -> list[Signal]:
|
|
45
|
+
text = read_text(file)
|
|
46
|
+
e2e = _is_e2e(file.rel_path)
|
|
47
|
+
imports = _extract_imports(text)
|
|
48
|
+
signals: list[Signal] = []
|
|
49
|
+
for match in _TEST_NAME_RE.finditer(text):
|
|
50
|
+
name = match.group(1) or match.group(2)
|
|
51
|
+
if name:
|
|
52
|
+
signals.append(
|
|
53
|
+
signal(
|
|
54
|
+
"test",
|
|
55
|
+
name=name,
|
|
56
|
+
file=file,
|
|
57
|
+
confidence=0.4 if e2e else 0.8,
|
|
58
|
+
metadata={"e2e": e2e, "imports": imports},
|
|
59
|
+
)
|
|
60
|
+
)
|
|
61
|
+
return signals
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""TypeScript / JavaScript signal extractor (Builder Edition, Chapter 8)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
from devtime.scanner.extractors.base import (
|
|
8
|
+
Signal,
|
|
9
|
+
classify_jwt_purpose,
|
|
10
|
+
read_text,
|
|
11
|
+
signal,
|
|
12
|
+
)
|
|
13
|
+
from devtime.scanner.file_walker import WalkedFile
|
|
14
|
+
|
|
15
|
+
_IMPORT_RE = re.compile(r"""import\s+.*?from\s+['"]([^'"]+)['"]""")
|
|
16
|
+
# Match app/router as well as named routers (authRouter, exportRouter, etc).
|
|
17
|
+
_ROUTE_RE = re.compile(
|
|
18
|
+
r"""\b(?:app|\w*[Rr]outer)\.(get|post|put|patch|delete)\(\s*['"]([^'"]+)['"]""",
|
|
19
|
+
re.I,
|
|
20
|
+
)
|
|
21
|
+
_MIDDLEWARE_RE = re.compile(
|
|
22
|
+
r"""\b(requireAuth|authMiddleware|isAuthenticated|ensureAuth|requireAdmin)\b"""
|
|
23
|
+
)
|
|
24
|
+
_BULLMQ_WORKER_RE = re.compile(r"""new\s+Worker\(\s*['"]([^'"]+)['"]""")
|
|
25
|
+
_BULLMQ_QUEUE_RE = re.compile(r"""new\s+Queue\(\s*['"]([^'"]+)['"]""")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def extract_typescript_signals(file: WalkedFile) -> list[Signal]:
|
|
29
|
+
text = read_text(file)
|
|
30
|
+
signals: list[Signal] = []
|
|
31
|
+
|
|
32
|
+
for match in _IMPORT_RE.finditer(text):
|
|
33
|
+
module = match.group(1)
|
|
34
|
+
# Skip relative imports; external dependencies are the useful signal.
|
|
35
|
+
if module.startswith("."):
|
|
36
|
+
continue
|
|
37
|
+
signals.append(signal("dependency", name=module, file=file, confidence=0.6))
|
|
38
|
+
|
|
39
|
+
for match in _ROUTE_RE.finditer(text):
|
|
40
|
+
method = match.group(1).upper()
|
|
41
|
+
path = match.group(2)
|
|
42
|
+
signals.append(
|
|
43
|
+
signal(
|
|
44
|
+
"route",
|
|
45
|
+
name=f"{method} {path}",
|
|
46
|
+
file=file,
|
|
47
|
+
confidence=0.8,
|
|
48
|
+
metadata={"method": method, "path": path, "framework": "express"},
|
|
49
|
+
)
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
if _MIDDLEWARE_RE.search(text):
|
|
53
|
+
signals.append(
|
|
54
|
+
signal("middleware", name="auth", file=file, confidence=0.7)
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
if "stripe.webhooks.constructEvent" in text:
|
|
58
|
+
signals.append(
|
|
59
|
+
signal(
|
|
60
|
+
"webhook_signature_verification",
|
|
61
|
+
name="stripe",
|
|
62
|
+
file=file,
|
|
63
|
+
confidence=0.9,
|
|
64
|
+
)
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
if re.search(r"\bjsonwebtoken\b|\bjwt\.(sign|verify)\b", text):
|
|
68
|
+
purpose = classify_jwt_purpose(text, file.rel_path)
|
|
69
|
+
signals.append(
|
|
70
|
+
signal(
|
|
71
|
+
"token_usage",
|
|
72
|
+
name="jwt",
|
|
73
|
+
file=file,
|
|
74
|
+
confidence=0.8,
|
|
75
|
+
metadata={"purpose": purpose},
|
|
76
|
+
)
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
# File uploads: multipart / multer / busboy / formData with a file part.
|
|
80
|
+
if re.search(r"multipart/form-data|\bmulter\b|\bbusboy\b|\.formData\(", text):
|
|
81
|
+
signals.append(
|
|
82
|
+
signal("upload_endpoint", name="upload", file=file, confidence=0.75)
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
for match in _BULLMQ_WORKER_RE.finditer(text):
|
|
86
|
+
signals.append(
|
|
87
|
+
signal(
|
|
88
|
+
"background_job",
|
|
89
|
+
name=f"worker:{match.group(1)}",
|
|
90
|
+
file=file,
|
|
91
|
+
confidence=0.8,
|
|
92
|
+
)
|
|
93
|
+
)
|
|
94
|
+
for match in _BULLMQ_QUEUE_RE.finditer(text):
|
|
95
|
+
signals.append(
|
|
96
|
+
signal("queue", name=f"queue:{match.group(1)}", file=file, confidence=0.8)
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
return signals
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""Safe repository file walker (Builder Edition, Chapter 7).
|
|
2
|
+
|
|
3
|
+
Walks files without executing repository code. Ignored directories are pruned
|
|
4
|
+
*before* traversal (Reality Hardening): the walker never descends into
|
|
5
|
+
node_modules, .git, build output, .devtime, etc., instead of walking them and
|
|
6
|
+
filtering after the fact. Symlinks, large files, binaries, and ignored/secret
|
|
7
|
+
files are still skipped per file.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import os
|
|
13
|
+
from dataclasses import dataclass
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
from devtime.scanner import ignore as ignore_mod
|
|
17
|
+
from devtime.scanner.language import is_doc_path, is_test_path
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class WalkedFile:
|
|
22
|
+
path: Path
|
|
23
|
+
rel_path: str
|
|
24
|
+
size_bytes: int
|
|
25
|
+
extension: str
|
|
26
|
+
is_test: bool
|
|
27
|
+
is_doc: bool
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _normalize(rel: str) -> str:
|
|
31
|
+
while "//" in rel:
|
|
32
|
+
rel = rel.replace("//", "/")
|
|
33
|
+
return rel
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def walk_repository(
|
|
37
|
+
root: Path,
|
|
38
|
+
ignore_matcher: ignore_mod.IgnoreMatcher,
|
|
39
|
+
max_size_bytes: int,
|
|
40
|
+
*,
|
|
41
|
+
follow_symlinks: bool = False,
|
|
42
|
+
stats: dict | None = None,
|
|
43
|
+
):
|
|
44
|
+
root = Path(root)
|
|
45
|
+
for dirpath, dirnames, filenames in os.walk(root, followlinks=follow_symlinks):
|
|
46
|
+
rel_dir = _normalize(Path(dirpath).relative_to(root).as_posix())
|
|
47
|
+
|
|
48
|
+
# --- Prune ignored directories before descending into them. ---
|
|
49
|
+
kept: list[str] = []
|
|
50
|
+
for d in sorted(dirnames):
|
|
51
|
+
if ignore_mod.is_pruned_dirname(d):
|
|
52
|
+
if stats is not None:
|
|
53
|
+
stats["pruned_dirs"] = stats.get("pruned_dirs", 0) + 1
|
|
54
|
+
continue
|
|
55
|
+
child_rel = d if rel_dir in ("", ".") else f"{rel_dir}/{d}"
|
|
56
|
+
child_rel = _normalize(child_rel)
|
|
57
|
+
if ignore_matcher.match_dir(child_rel):
|
|
58
|
+
if stats is not None:
|
|
59
|
+
stats["pruned_dirs"] = stats.get("pruned_dirs", 0) + 1
|
|
60
|
+
continue
|
|
61
|
+
child_path = Path(dirpath) / d
|
|
62
|
+
if child_path.is_symlink() and not follow_symlinks:
|
|
63
|
+
continue
|
|
64
|
+
kept.append(d)
|
|
65
|
+
dirnames[:] = kept # in-place prune controls os.walk descent
|
|
66
|
+
|
|
67
|
+
for fn in sorted(filenames):
|
|
68
|
+
rel = _normalize(fn if rel_dir in ("", ".") else f"{rel_dir}/{fn}")
|
|
69
|
+
if ignore_matcher.match(rel):
|
|
70
|
+
if stats is not None:
|
|
71
|
+
stats["skipped_files"] = stats.get("skipped_files", 0) + 1
|
|
72
|
+
continue
|
|
73
|
+
path = Path(dirpath) / fn
|
|
74
|
+
if path.is_symlink() and not follow_symlinks:
|
|
75
|
+
continue
|
|
76
|
+
extension = path.suffix.lower()
|
|
77
|
+
if ignore_mod.is_binary_extension(extension):
|
|
78
|
+
if stats is not None:
|
|
79
|
+
stats["skipped_files"] = stats.get("skipped_files", 0) + 1
|
|
80
|
+
continue
|
|
81
|
+
try:
|
|
82
|
+
size = path.stat().st_size
|
|
83
|
+
except OSError:
|
|
84
|
+
continue
|
|
85
|
+
if size > max_size_bytes:
|
|
86
|
+
if stats is not None:
|
|
87
|
+
stats["skipped_files"] = stats.get("skipped_files", 0) + 1
|
|
88
|
+
continue
|
|
89
|
+
yield WalkedFile(
|
|
90
|
+
path=path,
|
|
91
|
+
rel_path=rel,
|
|
92
|
+
size_bytes=size,
|
|
93
|
+
extension=extension,
|
|
94
|
+
is_test=is_test_path(rel),
|
|
95
|
+
is_doc=is_doc_path(rel),
|
|
96
|
+
)
|