@svayam-opensource/prj 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +123 -0
- package/agent/harness-manifest.yaml +225 -0
- package/agent/session-protocol.md +116 -0
- package/bin/prj +21 -0
- package/package.json +41 -0
- package/prj +2381 -0
- package/scripts/add-repo.sh +126 -0
- package/scripts/cancel.sh +157 -0
- package/scripts/close-knowledge.sh +250 -0
- package/scripts/close-project.sh +233 -0
- package/scripts/create-task.sh +226 -0
- package/scripts/install-deps.sh +292 -0
- package/scripts/join.sh +89 -0
- package/scripts/lib.sh +841 -0
- package/scripts/merge-task.sh +163 -0
- package/scripts/onboard-repo.sh +275 -0
- package/scripts/pause.sh +80 -0
- package/scripts/project-access.sh +34 -0
- package/scripts/propose-knowledge.sh +168 -0
- package/scripts/release-to-public.sh +185 -0
- package/scripts/render-harness.sh +151 -0
- package/scripts/resume.sh +103 -0
- package/scripts/seed.sh +774 -0
- package/scripts/sync-from-publish.sh +193 -0
- package/scripts/sync.sh +90 -0
- package/scripts/test-merge.sh +100 -0
- package/scripts/validate/check_knowledge.py +158 -0
- package/scripts/validate/check_privacy.py +211 -0
- package/scripts/validate/check_protocol.py +117 -0
- package/scripts/validate/check_secrets.py +175 -0
- package/scripts/validate/run.py +391 -0
- package/setup.sh +529 -0
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Privacy check for the publish branch.
|
|
4
|
+
|
|
5
|
+
Verifies that none of the per-org values from main's org-config.yaml have
|
|
6
|
+
leaked into the publish branch. This is the inverse of STRICT_PLACEHOLDERS=1
|
|
7
|
+
(which catches placeholders leaking into main): here we catch real values
|
|
8
|
+
leaking out of main into publish.
|
|
9
|
+
|
|
10
|
+
How it works:
|
|
11
|
+
1. Read main's org-config.yaml via `git show main:org-config.yaml`.
|
|
12
|
+
2. Extract values that are org-specific (skip generic defaults like
|
|
13
|
+
"main", "dev", "@*-tbd", placeholder strings).
|
|
14
|
+
3. Grep the working tree for any of those values.
|
|
15
|
+
4. Any match is a privacy leak — exit non-zero.
|
|
16
|
+
|
|
17
|
+
Used as a CI gate on PRs to publish (defense in depth alongside the
|
|
18
|
+
test-merge gate, scripts/sync-from-publish.sh's STRICT_PLACEHOLDERS=1
|
|
19
|
+
check, and the discipline of editing publish-side only).
|
|
20
|
+
|
|
21
|
+
Designed to run in the private (source) repo where main is accessible.
|
|
22
|
+
Not intended for the public mirror repo.
|
|
23
|
+
|
|
24
|
+
Usage:
|
|
25
|
+
python3 scripts/validate/check_privacy.py [REPO_ROOT]
|
|
26
|
+
|
|
27
|
+
Exits 0 if no leaks, 1 if leaks found, 2 on error.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
import re
|
|
31
|
+
import subprocess
|
|
32
|
+
import sys
|
|
33
|
+
from pathlib import Path
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
import yaml
|
|
37
|
+
except ImportError:
|
|
38
|
+
print("[FAIL] PyYAML not installed. Run: bash scripts/install-deps.sh", file=sys.stderr)
|
|
39
|
+
sys.exit(2)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# Keys whose values are scanned for leaks
|
|
43
|
+
PRIVATE_KEYS = [
|
|
44
|
+
"org_name",
|
|
45
|
+
"org_short_name",
|
|
46
|
+
"org_slug",
|
|
47
|
+
"org_slug_lower",
|
|
48
|
+
"github_org",
|
|
49
|
+
"workspace_repo",
|
|
50
|
+
"policy_owner_email",
|
|
51
|
+
"policy_owner_github",
|
|
52
|
+
"legal_owner_github",
|
|
53
|
+
"infra_owner_github",
|
|
54
|
+
"system_arch_owner_github",
|
|
55
|
+
"data_arch_owner_github",
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
# Generic values — never considered org-specific
|
|
59
|
+
GENERIC_VALUES = {
|
|
60
|
+
"", "main", "dev", "master",
|
|
61
|
+
"YYYY-MM-DD",
|
|
62
|
+
"Your Organization Name", "YourOrg",
|
|
63
|
+
"ORG", "org",
|
|
64
|
+
"your-github-org", "000-org-prj",
|
|
65
|
+
"you@example.com", "@your-github-handle",
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
# Patterns that indicate a value is still a placeholder / not org-specific
|
|
69
|
+
PLACEHOLDER_VALUE_PATTERNS = [
|
|
70
|
+
re.compile(r"^@[a-z-]*-tbd$"),
|
|
71
|
+
re.compile(r"^\{\{[A-Za-z_]+\}\}$"),
|
|
72
|
+
re.compile(r"^\d{4}-\d{2}-\d{2}$"),
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
# File patterns to scan
|
|
76
|
+
SCAN_SUFFIXES = {".md", ".yaml", ".yml", ".sh", ".py"}
|
|
77
|
+
SCAN_NAMES = {"CODEOWNERS", "prj"}
|
|
78
|
+
|
|
79
|
+
# Files where leak values are expected and not actually leaks
|
|
80
|
+
ALLOWED_FILES = {
|
|
81
|
+
"setup.sh", # contains placeholder strings as part of substitution rules
|
|
82
|
+
"org-config.yaml", # the source of values; not itself in publish content
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
# Per-key attribution allowance: these (key, file) combinations are NOT leaks.
|
|
86
|
+
# Legitimate copyright/attribution of the framework's original author in the
|
|
87
|
+
# standard public-facing files. Any other key in those files, or these keys
|
|
88
|
+
# elsewhere, are still flagged.
|
|
89
|
+
# org_short_name is included because it is typically a substring of org_name
|
|
90
|
+
# (e.g., a short brand name appearing inside the full legal name) and would
|
|
91
|
+
# otherwise false-positive on every legitimate copyright line.
|
|
92
|
+
ATTRIBUTION_KEYS = {"org_name", "org_short_name"}
|
|
93
|
+
ATTRIBUTION_FILES = {
|
|
94
|
+
"LICENSE", "README.md",
|
|
95
|
+
"CONTRIBUTING.md", "CODE_OF_CONDUCT.md", "SECURITY.md",
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def is_generic(value: str) -> bool:
|
|
100
|
+
if not isinstance(value, str):
|
|
101
|
+
return True
|
|
102
|
+
if value in GENERIC_VALUES:
|
|
103
|
+
return True
|
|
104
|
+
for pattern in PLACEHOLDER_VALUE_PATTERNS:
|
|
105
|
+
if pattern.match(value):
|
|
106
|
+
return True
|
|
107
|
+
return False
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def get_main_config(repo_root: Path) -> dict | None:
|
|
111
|
+
try:
|
|
112
|
+
result = subprocess.run(
|
|
113
|
+
["git", "-C", str(repo_root), "show", "main:org-config.yaml"],
|
|
114
|
+
capture_output=True, text=True, check=True,
|
|
115
|
+
)
|
|
116
|
+
config = yaml.safe_load(result.stdout)
|
|
117
|
+
if not isinstance(config, dict):
|
|
118
|
+
print("[ERROR] main:org-config.yaml is not a mapping.", file=sys.stderr)
|
|
119
|
+
return None
|
|
120
|
+
return config
|
|
121
|
+
except subprocess.CalledProcessError as e:
|
|
122
|
+
print(
|
|
123
|
+
"[ERROR] Cannot read 'main:org-config.yaml'. "
|
|
124
|
+
"Privacy check requires access to main branch in this repo.\n"
|
|
125
|
+
f" git error: {e.stderr.strip()}",
|
|
126
|
+
file=sys.stderr,
|
|
127
|
+
)
|
|
128
|
+
return None
|
|
129
|
+
except yaml.YAMLError as e:
|
|
130
|
+
print(f"[ERROR] main:org-config.yaml does not parse: {e}", file=sys.stderr)
|
|
131
|
+
return None
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def scannable_files(repo_root: Path):
|
|
135
|
+
for f in repo_root.rglob("*"):
|
|
136
|
+
if not f.is_file():
|
|
137
|
+
continue
|
|
138
|
+
rel = f.relative_to(repo_root)
|
|
139
|
+
if any(p.startswith(".") for p in rel.parts):
|
|
140
|
+
continue
|
|
141
|
+
if f.name in ALLOWED_FILES:
|
|
142
|
+
continue
|
|
143
|
+
if f.suffix in SCAN_SUFFIXES or f.name in SCAN_NAMES:
|
|
144
|
+
yield f
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def find_leaks(repo_root: Path, key: str, value: str) -> list[tuple[Path, int, str]]:
|
|
148
|
+
leaks: list[tuple[Path, int, str]] = []
|
|
149
|
+
attribution_ok = key in ATTRIBUTION_KEYS
|
|
150
|
+
for f in scannable_files(repo_root):
|
|
151
|
+
# Skip attribution-allowed files for keys that are valid attribution
|
|
152
|
+
if attribution_ok and f.name in ATTRIBUTION_FILES:
|
|
153
|
+
continue
|
|
154
|
+
try:
|
|
155
|
+
text = f.read_text()
|
|
156
|
+
except Exception:
|
|
157
|
+
continue
|
|
158
|
+
for lineno, line in enumerate(text.splitlines(), 1):
|
|
159
|
+
if value in line:
|
|
160
|
+
leaks.append((f.relative_to(repo_root), lineno, line.strip()))
|
|
161
|
+
return leaks
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def main() -> int:
|
|
165
|
+
repo_root = Path(sys.argv[1] if len(sys.argv) > 1 else ".").resolve()
|
|
166
|
+
|
|
167
|
+
config = get_main_config(repo_root)
|
|
168
|
+
if config is None:
|
|
169
|
+
return 2
|
|
170
|
+
|
|
171
|
+
# Collect non-generic values to check
|
|
172
|
+
to_check: list[tuple[str, str]] = []
|
|
173
|
+
for key in PRIVATE_KEYS:
|
|
174
|
+
value = config.get(key)
|
|
175
|
+
if isinstance(value, str) and not is_generic(value):
|
|
176
|
+
# For @-prefixed handles, also strip the @ for substring matching
|
|
177
|
+
to_check.append((key, value))
|
|
178
|
+
|
|
179
|
+
if not to_check:
|
|
180
|
+
print("=== no org-specific values to check (main appears to be unconfigured) ===")
|
|
181
|
+
return 0
|
|
182
|
+
|
|
183
|
+
print(f"Checking publish for leaks of {len(to_check)} value(s) from main:org-config.yaml...")
|
|
184
|
+
print()
|
|
185
|
+
|
|
186
|
+
total_leaks = 0
|
|
187
|
+
for key, value in to_check:
|
|
188
|
+
leaks = find_leaks(repo_root, key, value)
|
|
189
|
+
if leaks:
|
|
190
|
+
total_leaks += len(leaks)
|
|
191
|
+
print(f"[LEAK] {key}={value!r} appears in {len(leaks)} location(s):")
|
|
192
|
+
for relpath, lineno, line in leaks[:10]:
|
|
193
|
+
snippet = line if len(line) <= 100 else line[:97] + "..."
|
|
194
|
+
print(f" {relpath}:{lineno}: {snippet}")
|
|
195
|
+
if len(leaks) > 10:
|
|
196
|
+
print(f" ... and {len(leaks) - 10} more")
|
|
197
|
+
print()
|
|
198
|
+
|
|
199
|
+
if total_leaks:
|
|
200
|
+
print(f"=== {total_leaks} privacy leak(s) found ===")
|
|
201
|
+
print()
|
|
202
|
+
print("These values from main:org-config.yaml have leaked into publish content.")
|
|
203
|
+
print("Remove them or replace with placeholder/example values.")
|
|
204
|
+
return 1
|
|
205
|
+
|
|
206
|
+
print("=== no privacy leaks ===")
|
|
207
|
+
return 0
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
if __name__ == "__main__":
|
|
211
|
+
sys.exit(main())
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Session-start protocol integrity check (#54 Increment 1 — Layer 3-A).
|
|
4
|
+
|
|
5
|
+
The session-start protocol (agent/session-protocol.md) is only enforceable if it
|
|
6
|
+
is reliably DELIVERED. This validator gates that precondition, author-agnostic
|
|
7
|
+
and deterministic:
|
|
8
|
+
|
|
9
|
+
1. agent/session-protocol.md exists and is non-empty.
|
|
10
|
+
2. It still carries the §0 "agent speaks first / context manifest / before you
|
|
11
|
+
change any code" mandate — i.e. nobody gutted the protocol while leaving the
|
|
12
|
+
file in place.
|
|
13
|
+
3. Every generated tool copy is in sync with the canonical protocol
|
|
14
|
+
(scripts/render-harness.sh --check) — i.e. no generated copy was hand-edited
|
|
15
|
+
to weaken or drop the protocol, and the protocol is rendered to every active
|
|
16
|
+
install path (a missing path counts as drift).
|
|
17
|
+
|
|
18
|
+
Invoked via scripts/validate/run.py (the test-merge `validate` gate) and runnable
|
|
19
|
+
directly: python3 scripts/validate/check_protocol.py [REPO_ROOT].
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
import subprocess
|
|
23
|
+
import sys
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
|
|
26
|
+
# Stable anchors from session-protocol.md §0. If any disappears, the protocol's
|
|
27
|
+
# core mandate was removed even if the file still exists.
|
|
28
|
+
MANDATE_ANCHORS = (
|
|
29
|
+
"agent speaks first",
|
|
30
|
+
"context manifest",
|
|
31
|
+
"before you change any code",
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def check_protocol(repo_root: Path) -> list[str]:
|
|
36
|
+
errors: list[str] = []
|
|
37
|
+
|
|
38
|
+
protocol = repo_root / "agent" / "session-protocol.md"
|
|
39
|
+
if not protocol.exists():
|
|
40
|
+
return ["agent/session-protocol.md is missing — the session-start protocol is undelivered"]
|
|
41
|
+
text = protocol.read_text(encoding="utf-8", errors="replace")
|
|
42
|
+
if not text.strip():
|
|
43
|
+
errors.append("agent/session-protocol.md is empty")
|
|
44
|
+
else:
|
|
45
|
+
low = text.lower()
|
|
46
|
+
missing = [a for a in MANDATE_ANCHORS if a not in low]
|
|
47
|
+
if missing:
|
|
48
|
+
errors.append(
|
|
49
|
+
"agent/session-protocol.md no longer contains its §0 mandate "
|
|
50
|
+
f"(missing: {', '.join(missing)})"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# Generated copies must match the canonical protocol. Reuse render-harness's
|
|
54
|
+
# own --check (it also treats a missing install path as drift).
|
|
55
|
+
renderer = repo_root / "scripts" / "render-harness.sh"
|
|
56
|
+
manifest = repo_root / "agent" / "harness-manifest.yaml"
|
|
57
|
+
if renderer.exists() and manifest.exists():
|
|
58
|
+
try:
|
|
59
|
+
r = subprocess.run(
|
|
60
|
+
["bash", str(renderer), "--check"],
|
|
61
|
+
cwd=str(repo_root), capture_output=True, text=True,
|
|
62
|
+
)
|
|
63
|
+
if r.returncode != 0:
|
|
64
|
+
detail = (r.stdout + r.stderr).strip().replace("\n", " ")
|
|
65
|
+
errors.append(
|
|
66
|
+
"generated protocol copies are out of sync with "
|
|
67
|
+
f"agent/session-protocol.md — run ./scripts/render-harness.sh ({detail[:300]})"
|
|
68
|
+
)
|
|
69
|
+
except OSError as e:
|
|
70
|
+
errors.append(f"could not run render-harness.sh --check: {e}")
|
|
71
|
+
|
|
72
|
+
# #54 Increment 2 — if the Claude Code client gate is configured, its parts
|
|
73
|
+
# must all be present (guard against a hook being deleted while settings.json
|
|
74
|
+
# still points at it, which would break the gate). Conditional on
|
|
75
|
+
# settings.json existing, so repos that haven't adopted the gate aren't forced
|
|
76
|
+
# to.
|
|
77
|
+
settings = repo_root / ".claude" / "settings.json"
|
|
78
|
+
if settings.exists() and "session-start" in settings.read_text(encoding="utf-8", errors="replace"):
|
|
79
|
+
for rel in (
|
|
80
|
+
".claude/hooks/session-start.sh",
|
|
81
|
+
".claude/hooks/pre-tool-gate.sh",
|
|
82
|
+
".claude/hooks/session-ack.sh",
|
|
83
|
+
".claude/commands/session-start.md",
|
|
84
|
+
):
|
|
85
|
+
p = repo_root / rel
|
|
86
|
+
if not p.exists() or not p.read_text(encoding="utf-8", errors="replace").strip():
|
|
87
|
+
errors.append(f"session-start gate is configured but {rel} is missing/empty")
|
|
88
|
+
|
|
89
|
+
# Same guard for the Cursor client gate (.cursor/hooks.json).
|
|
90
|
+
cursor_hooks = repo_root / ".cursor" / "hooks.json"
|
|
91
|
+
if cursor_hooks.exists() and "session-gate" in cursor_hooks.read_text(encoding="utf-8", errors="replace"):
|
|
92
|
+
for rel in (
|
|
93
|
+
".cursor/hooks/session-start.sh",
|
|
94
|
+
".cursor/hooks/session-gate.sh",
|
|
95
|
+
".cursor/hooks/session-ack.sh",
|
|
96
|
+
):
|
|
97
|
+
p = repo_root / rel
|
|
98
|
+
if not p.exists() or not p.read_text(encoding="utf-8", errors="replace").strip():
|
|
99
|
+
errors.append(f"Cursor session-start gate is configured but {rel} is missing/empty")
|
|
100
|
+
|
|
101
|
+
return errors
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def main() -> int:
|
|
105
|
+
repo_root = Path(sys.argv[1] if len(sys.argv) > 1 else ".").resolve()
|
|
106
|
+
errors = check_protocol(repo_root)
|
|
107
|
+
if errors:
|
|
108
|
+
print(f"[FAIL] protocol ({len(errors)} error(s)):")
|
|
109
|
+
for e in errors:
|
|
110
|
+
print(f" - {e}")
|
|
111
|
+
return 1
|
|
112
|
+
print("=== protocol integrity ok ===")
|
|
113
|
+
return 0
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
if __name__ == "__main__":
|
|
117
|
+
sys.exit(main())
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Secret / credential scanner (POL-143 enforcement).
|
|
4
|
+
|
|
5
|
+
PRJ-013 audit finding H8 (dimension H/I): POL-143 forbids committing secrets,
|
|
6
|
+
tokens, keys, and credentials, but the framework had ZERO automated
|
|
7
|
+
enforcement — privacy-check.yml/check_privacy.py only catch org-config values
|
|
8
|
+
leaking to publish, not credentials. This scanner closes that gap as a CI gate
|
|
9
|
+
(it runs inside scripts/validate/run.py, which the test-merge `validate` job
|
|
10
|
+
invokes on every PR to main/publish).
|
|
11
|
+
|
|
12
|
+
Design goals:
|
|
13
|
+
- HIGH SIGNAL, LOW false-positive. Patterns match only well-known,
|
|
14
|
+
structurally-distinctive credential shapes, not anything that merely looks
|
|
15
|
+
"password-ish". This keeps the gate trustworthy so it is never disabled.
|
|
16
|
+
- stdlib only (runs under any python3, no pip deps).
|
|
17
|
+
- tracked text files only (skips .git, skips binaries via a NUL-byte sniff).
|
|
18
|
+
- an inline `# pragma: allowlist secret` on the matching line suppresses the
|
|
19
|
+
finding (for fixtures, docs that must show an example token shape, etc.).
|
|
20
|
+
|
|
21
|
+
Usage:
|
|
22
|
+
python3 scripts/validate/check_secrets.py [REPO_ROOT]
|
|
23
|
+
|
|
24
|
+
Exits 0 if no secrets found, 1 if any high-confidence secret is found.
|
|
25
|
+
Invokable directly or via run.py's `check_secrets(repo_root)` entry point.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
import re
|
|
29
|
+
import subprocess
|
|
30
|
+
import sys
|
|
31
|
+
from pathlib import Path
|
|
32
|
+
|
|
33
|
+
# Inline suppression marker (detect-secrets compatible style).
|
|
34
|
+
ALLOWLIST_MARKER = "pragma: allowlist secret"
|
|
35
|
+
|
|
36
|
+
# Directories never scanned (in addition to anything git doesn't track).
|
|
37
|
+
SKIP_DIR_PARTS = {".git"}
|
|
38
|
+
|
|
39
|
+
# A placeholder value is NOT a secret. Used by the generic assignment patterns
|
|
40
|
+
# (password=/api_key=) to avoid flagging example/empty/template values.
|
|
41
|
+
PLACEHOLDER_RE = re.compile(
|
|
42
|
+
r"""^(?:
|
|
43
|
+
| \s* # empty / whitespace
|
|
44
|
+
| x+ | \.\.\. | -+ | \*+ # x..., ---, ***
|
|
45
|
+
| changeme | example | placeholder
|
|
46
|
+
| your[-_].* | my[-_].* | some[-_].*
|
|
47
|
+
| redacted | dummy | sample | test(?:ing)? | fake
|
|
48
|
+
| none | null | nil | true | false
|
|
49
|
+
| \$\{[^}]+\} | \{\{[^}]+\}\} # ${VAR} / {{TOKEN}}
|
|
50
|
+
| \$[A-Za-z_][A-Za-z0-9_]* # $VAR
|
|
51
|
+
| <[^>]+> # <your-token>
|
|
52
|
+
)$""",
|
|
53
|
+
re.IGNORECASE | re.VERBOSE,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# ── High-signal patterns ──────────────────────────────────────────────────────
|
|
57
|
+
# Each entry: (label, compiled-regex). A regex matching anywhere on a line
|
|
58
|
+
# (that is not an allowlisted line) is a finding. These shapes are
|
|
59
|
+
# distinctive enough that a match is almost certainly a real credential.
|
|
60
|
+
PATTERNS = [
|
|
61
|
+
(
|
|
62
|
+
"private key block",
|
|
63
|
+
re.compile(r"-----BEGIN (?:[A-Z0-9]+ )*PRIVATE KEY-----"),
|
|
64
|
+
),
|
|
65
|
+
(
|
|
66
|
+
"GitHub token",
|
|
67
|
+
re.compile(r"\b(?:ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9]{36,}\b"),
|
|
68
|
+
),
|
|
69
|
+
(
|
|
70
|
+
"GitHub fine-grained PAT",
|
|
71
|
+
re.compile(r"\bgithub_pat_[A-Za-z0-9_]{60,}\b"),
|
|
72
|
+
),
|
|
73
|
+
(
|
|
74
|
+
"AWS access key id",
|
|
75
|
+
re.compile(r"\b(?:AKIA|ASIA|AGPA|AIDA|AROA|ANPA|ANVA)[0-9A-Z]{16}\b"),
|
|
76
|
+
),
|
|
77
|
+
(
|
|
78
|
+
"Slack token",
|
|
79
|
+
re.compile(r"\bxox[baprs]-[A-Za-z0-9-]{10,}\b"),
|
|
80
|
+
),
|
|
81
|
+
]
|
|
82
|
+
|
|
83
|
+
# Generic assignment patterns: `password = "..."` / `api_key: '...'` etc.
|
|
84
|
+
# These are the only patterns that gate on the VALUE not being a placeholder,
|
|
85
|
+
# since the keyword alone (password/api_key) is common in legitimate code/docs.
|
|
86
|
+
ASSIGNMENT_RE = re.compile(
|
|
87
|
+
r"""(?P<key>\b(?:password|passwd|pwd|api[_-]?key|secret[_-]?key
|
|
88
|
+
|access[_-]?token|auth[_-]?token|client[_-]?secret)\b)
|
|
89
|
+
\s*[:=]\s*
|
|
90
|
+
(?P<q>["'])(?P<val>[^"']{6,})(?P=q)""",
|
|
91
|
+
re.IGNORECASE | re.VERBOSE,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def is_placeholder(value: str) -> bool:
|
|
96
|
+
return bool(PLACEHOLDER_RE.match(value.strip()))
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def tracked_files(repo_root: Path) -> list[Path]:
|
|
100
|
+
"""Return git-tracked files; fall back to a filesystem walk if not a repo."""
|
|
101
|
+
try:
|
|
102
|
+
result = subprocess.run(
|
|
103
|
+
["git", "-C", str(repo_root), "ls-files", "-z"],
|
|
104
|
+
capture_output=True, check=True,
|
|
105
|
+
)
|
|
106
|
+
names = result.stdout.decode("utf-8", "replace").split("\0")
|
|
107
|
+
return [repo_root / n for n in names if n]
|
|
108
|
+
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
109
|
+
out = []
|
|
110
|
+
for f in repo_root.rglob("*"):
|
|
111
|
+
if f.is_file() and not any(p in SKIP_DIR_PARTS for p in f.relative_to(repo_root).parts):
|
|
112
|
+
out.append(f)
|
|
113
|
+
return out
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def is_binary(path: Path) -> bool:
|
|
117
|
+
try:
|
|
118
|
+
with open(path, "rb") as fh:
|
|
119
|
+
return b"\0" in fh.read(8192)
|
|
120
|
+
except OSError:
|
|
121
|
+
return True
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def scan_file(repo_root: Path, path: Path) -> list[tuple[str, int, str, str]]:
|
|
125
|
+
"""Return (relpath, lineno, label, snippet) findings for one file."""
|
|
126
|
+
findings: list[tuple[str, int, str, str]] = []
|
|
127
|
+
rel = str(path.relative_to(repo_root))
|
|
128
|
+
try:
|
|
129
|
+
text = path.read_text(encoding="utf-8")
|
|
130
|
+
except (OSError, UnicodeDecodeError):
|
|
131
|
+
return findings
|
|
132
|
+
|
|
133
|
+
for lineno, line in enumerate(text.splitlines(), 1):
|
|
134
|
+
if ALLOWLIST_MARKER in line:
|
|
135
|
+
continue
|
|
136
|
+
for label, pat in PATTERNS:
|
|
137
|
+
if pat.search(line):
|
|
138
|
+
findings.append((rel, lineno, label, line.strip()[:120]))
|
|
139
|
+
m = ASSIGNMENT_RE.search(line)
|
|
140
|
+
if m and not is_placeholder(m.group("val")):
|
|
141
|
+
findings.append((rel, lineno, "hardcoded credential", line.strip()[:120]))
|
|
142
|
+
return findings
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def check_secrets(repo_root: Path) -> list[str]:
|
|
146
|
+
"""run.py entry point: return a list of error strings (empty == pass)."""
|
|
147
|
+
errors: list[str] = []
|
|
148
|
+
for path in tracked_files(repo_root):
|
|
149
|
+
if any(p in SKIP_DIR_PARTS for p in path.relative_to(repo_root).parts):
|
|
150
|
+
continue
|
|
151
|
+
if not path.is_file() or is_binary(path):
|
|
152
|
+
continue
|
|
153
|
+
for rel, lineno, label, snippet in scan_file(repo_root, path):
|
|
154
|
+
errors.append(f"{rel}:{lineno}: {label}: {snippet}")
|
|
155
|
+
return errors
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def main() -> int:
|
|
159
|
+
repo_root = Path(sys.argv[1] if len(sys.argv) > 1 else ".").resolve()
|
|
160
|
+
errors = check_secrets(repo_root)
|
|
161
|
+
if errors:
|
|
162
|
+
print(f"[FAIL] secrets ({len(errors)} finding(s)):")
|
|
163
|
+
for e in errors:
|
|
164
|
+
print(f" - {e}")
|
|
165
|
+
print()
|
|
166
|
+
print("POL-143: remove the credential and rotate it. If this is a known")
|
|
167
|
+
print("non-secret (fixture/example), append '# pragma: allowlist secret'")
|
|
168
|
+
print("to the line.")
|
|
169
|
+
return 1
|
|
170
|
+
print("=== no secrets found ===")
|
|
171
|
+
return 0
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
if __name__ == "__main__":
|
|
175
|
+
sys.exit(main())
|