archapi 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- archapi/__init__.py +3 -0
- archapi/core.py +295 -0
- archapi/frameworks/__init__.py +0 -0
- archapi/frameworks/base.py +56 -0
- archapi/frameworks/detector.py +113 -0
- archapi/frameworks/express_ts/__init__.py +0 -0
- archapi/frameworks/express_ts/adapter.py +237 -0
- archapi/frameworks/fastapi_adapter.py +184 -0
- archapi/frameworks/generic.py +211 -0
- archapi/frameworks/registry.py +28 -0
- archapi/generation/__init__.py +0 -0
- archapi/genome/__init__.py +0 -0
- archapi/indexing/__init__.py +0 -0
- archapi/indexing/cache.py +141 -0
- archapi/mapping/__init__.py +0 -0
- archapi/planning/__init__.py +0 -0
- archapi/planning/intent_planner.py +175 -0
- archapi/planning/task_dag.py +81 -0
- archapi/scanner/__init__.py +0 -0
- archapi/security/__init__.py +0 -0
- archapi/security/context_redactor.py +38 -0
- archapi/security/policy_gate.py +70 -0
- archapi/security/secret_scanner.py +90 -0
- archapi/types.py +103 -0
- archapi/validation/__init__.py +0 -0
- archapi/validation/architecture_score.py +77 -0
- archapi/validation/basic_validators.py +38 -0
- archapi/validation/command_validator.py +146 -0
- archapi-0.3.0.dist-info/METADATA +79 -0
- archapi-0.3.0.dist-info/RECORD +33 -0
- archapi-0.3.0.dist-info/WHEEL +5 -0
- archapi-0.3.0.dist-info/licenses/LICENSE +21 -0
- archapi-0.3.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
from dataclasses import asdict, is_dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, Dict, List, Optional
|
|
8
|
+
|
|
9
|
+
from archapi.types import APIGenome, DetectionResult, ScanResult
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
IGNORED_DIRS = {
|
|
13
|
+
".git",
|
|
14
|
+
".venv",
|
|
15
|
+
"node_modules",
|
|
16
|
+
"dist",
|
|
17
|
+
"build",
|
|
18
|
+
"coverage",
|
|
19
|
+
"__pycache__",
|
|
20
|
+
"vendor",
|
|
21
|
+
"target",
|
|
22
|
+
".archapi",
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class CacheManager:
|
|
27
|
+
def __init__(self, project_path: Path):
|
|
28
|
+
self.project_path = project_path
|
|
29
|
+
self.cache_dir = project_path / ".archapi"
|
|
30
|
+
|
|
31
|
+
def ensure_cache_dir(self) -> None:
|
|
32
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
33
|
+
|
|
34
|
+
def save_json(self, name: str, data: Dict[str, Any]) -> Path:
|
|
35
|
+
self.ensure_cache_dir()
|
|
36
|
+
target = self.cache_dir / name
|
|
37
|
+
target.write_text(
|
|
38
|
+
json.dumps(self._json_safe(data), indent=2, sort_keys=True),
|
|
39
|
+
encoding="utf-8",
|
|
40
|
+
)
|
|
41
|
+
return target
|
|
42
|
+
|
|
43
|
+
def load_json(self, name: str) -> Optional[Dict[str, Any]]:
|
|
44
|
+
target = self.cache_dir / name
|
|
45
|
+
if not target.exists():
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
return json.loads(target.read_text(encoding="utf-8"))
|
|
49
|
+
|
|
50
|
+
def hash_file(self, path: Path) -> str:
|
|
51
|
+
digest = hashlib.sha256()
|
|
52
|
+
with path.open("rb") as f:
|
|
53
|
+
for chunk in iter(lambda: f.read(8192), b""):
|
|
54
|
+
digest.update(chunk)
|
|
55
|
+
return digest.hexdigest()
|
|
56
|
+
|
|
57
|
+
def collect_file_hashes(self) -> Dict[str, str]:
|
|
58
|
+
hashes: Dict[str, str] = {}
|
|
59
|
+
|
|
60
|
+
for path in self.project_path.rglob("*"):
|
|
61
|
+
if path.is_dir():
|
|
62
|
+
continue
|
|
63
|
+
|
|
64
|
+
if any(part in IGNORED_DIRS for part in path.parts):
|
|
65
|
+
continue
|
|
66
|
+
|
|
67
|
+
rel = str(path.relative_to(self.project_path))
|
|
68
|
+
hashes[rel] = self.hash_file(path)
|
|
69
|
+
|
|
70
|
+
return hashes
|
|
71
|
+
|
|
72
|
+
def changed_files(self) -> List[str]:
|
|
73
|
+
old_hashes = self.load_json("file_hashes.json") or {}
|
|
74
|
+
new_hashes = self.collect_file_hashes()
|
|
75
|
+
|
|
76
|
+
changed: List[str] = []
|
|
77
|
+
|
|
78
|
+
all_files = set(old_hashes.keys()) | set(new_hashes.keys())
|
|
79
|
+
|
|
80
|
+
for file in sorted(all_files):
|
|
81
|
+
if old_hashes.get(file) != new_hashes.get(file):
|
|
82
|
+
changed.append(file)
|
|
83
|
+
|
|
84
|
+
return changed
|
|
85
|
+
|
|
86
|
+
def save_snapshot(
|
|
87
|
+
self,
|
|
88
|
+
detection: DetectionResult,
|
|
89
|
+
scan: ScanResult,
|
|
90
|
+
maps: Dict[str, Any],
|
|
91
|
+
genome: APIGenome,
|
|
92
|
+
) -> Dict[str, Path]:
|
|
93
|
+
saved = {}
|
|
94
|
+
|
|
95
|
+
saved["file_hashes"] = self.save_json(
|
|
96
|
+
"file_hashes.json",
|
|
97
|
+
self.collect_file_hashes(),
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
saved["detection"] = self.save_json(
|
|
101
|
+
"detection.json",
|
|
102
|
+
self._json_safe(detection),
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
saved["project_index"] = self.save_json(
|
|
106
|
+
"project_index.json",
|
|
107
|
+
self._json_safe(scan),
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
saved["maps"] = self.save_json(
|
|
111
|
+
"maps.json",
|
|
112
|
+
self._json_safe(maps),
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
saved["genome"] = self.save_json(
|
|
116
|
+
"genome.json",
|
|
117
|
+
self._json_safe(genome),
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
return saved
|
|
121
|
+
|
|
122
|
+
def _json_safe(self, value: Any) -> Any:
|
|
123
|
+
if is_dataclass(value):
|
|
124
|
+
return self._json_safe(asdict(value))
|
|
125
|
+
|
|
126
|
+
if isinstance(value, Path):
|
|
127
|
+
try:
|
|
128
|
+
return str(value.relative_to(self.project_path))
|
|
129
|
+
except ValueError:
|
|
130
|
+
return str(value)
|
|
131
|
+
|
|
132
|
+
if isinstance(value, dict):
|
|
133
|
+
return {
|
|
134
|
+
str(k): self._json_safe(v)
|
|
135
|
+
for k, v in value.items()
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
if isinstance(value, list):
|
|
139
|
+
return [self._json_safe(v) for v in value]
|
|
140
|
+
|
|
141
|
+
return value
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from typing import Dict, List
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class IntentPlan:
|
|
10
|
+
method: str
|
|
11
|
+
path: str
|
|
12
|
+
entities: List[str]
|
|
13
|
+
resource: str
|
|
14
|
+
action: str
|
|
15
|
+
response_status: int
|
|
16
|
+
metadata: Dict[str, str] = field(default_factory=dict)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class IntentPlanner:
|
|
20
|
+
EXPLICIT_METHODS = {
|
|
21
|
+
"get": "GET",
|
|
22
|
+
"post": "POST",
|
|
23
|
+
"put": "PUT",
|
|
24
|
+
"patch": "PATCH",
|
|
25
|
+
"delete": "DELETE",
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
ENTITY_RULES = [
|
|
29
|
+
("order", "Order"),
|
|
30
|
+
("orders", "Order"),
|
|
31
|
+
("product", "Product"),
|
|
32
|
+
("products", "Product"),
|
|
33
|
+
("review", "Review"),
|
|
34
|
+
("reviews", "Review"),
|
|
35
|
+
("payment", "Payment"),
|
|
36
|
+
("payments", "Payment"),
|
|
37
|
+
("inventory", "Inventory"),
|
|
38
|
+
("booking", "Booking"),
|
|
39
|
+
("bookings", "Booking"),
|
|
40
|
+
("account", "Account"),
|
|
41
|
+
("accounts", "Account"),
|
|
42
|
+
("user", "User"),
|
|
43
|
+
("users", "User"),
|
|
44
|
+
("profile", "Profile"),
|
|
45
|
+
("profiles", "Profile"),
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
PLURAL_RULES = {
|
|
49
|
+
"History": "histories",
|
|
50
|
+
"Category": "categories",
|
|
51
|
+
"Company": "companies",
|
|
52
|
+
"Inventory": "inventory",
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
def plan(self, request: str) -> IntentPlan:
|
|
56
|
+
text = request.lower()
|
|
57
|
+
method = self._infer_method(text)
|
|
58
|
+
entities = self._infer_entities(text)
|
|
59
|
+
resource = entities[-1] if entities else "Resource"
|
|
60
|
+
action = self._infer_action(text, method)
|
|
61
|
+
path = self._infer_path(text, method, resource)
|
|
62
|
+
|
|
63
|
+
return IntentPlan(
|
|
64
|
+
method=method,
|
|
65
|
+
path=path,
|
|
66
|
+
entities=entities,
|
|
67
|
+
resource=resource,
|
|
68
|
+
action=action,
|
|
69
|
+
response_status=self._response_status(method),
|
|
70
|
+
metadata={"planner": "deterministic-v0.2"},
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
def _infer_method(self, text: str) -> str:
|
|
74
|
+
for word, method in self.EXPLICIT_METHODS.items():
|
|
75
|
+
if f" {word} " in f" {text} ":
|
|
76
|
+
return method
|
|
77
|
+
|
|
78
|
+
if any(word in text for word in ["create", "add", "submit"]):
|
|
79
|
+
return "POST"
|
|
80
|
+
if any(word in text for word in ["update", "edit", "replace"]):
|
|
81
|
+
return "PUT"
|
|
82
|
+
if any(word in text for word in ["modify", "partial"]):
|
|
83
|
+
return "PATCH"
|
|
84
|
+
if any(word in text for word in ["delete", "remove", "disable"]):
|
|
85
|
+
return "DELETE"
|
|
86
|
+
|
|
87
|
+
return "GET"
|
|
88
|
+
|
|
89
|
+
def _infer_entities(self, text: str) -> List[str]:
|
|
90
|
+
detected: List[str] = []
|
|
91
|
+
|
|
92
|
+
for keyword, entity in self.ENTITY_RULES:
|
|
93
|
+
if keyword in text and entity not in detected:
|
|
94
|
+
detected.append(entity)
|
|
95
|
+
|
|
96
|
+
if "user" in text and "order" in text:
|
|
97
|
+
return ["User", "Order"]
|
|
98
|
+
|
|
99
|
+
if "product" in text and "review" in text:
|
|
100
|
+
return ["Product", "Review"]
|
|
101
|
+
|
|
102
|
+
if "product" in text and "inventory" in text:
|
|
103
|
+
return ["Product", "Inventory"]
|
|
104
|
+
|
|
105
|
+
if detected:
|
|
106
|
+
return detected[:2]
|
|
107
|
+
|
|
108
|
+
words = re.findall(r"[A-Za-z]+", text)
|
|
109
|
+
stop = {
|
|
110
|
+
"create", "get", "fetch", "update", "delete", "api", "for",
|
|
111
|
+
"a", "an", "the", "to", "by", "of", "history", "status",
|
|
112
|
+
"new", "existing", "details", "detail"
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
fallback = []
|
|
116
|
+
for word in words:
|
|
117
|
+
if word.lower() not in stop and len(word) > 2:
|
|
118
|
+
fallback.append(word.capitalize())
|
|
119
|
+
|
|
120
|
+
return fallback[:2] or ["Resource"]
|
|
121
|
+
|
|
122
|
+
def _infer_action(self, text: str, method: str) -> str:
|
|
123
|
+
if "history" in text:
|
|
124
|
+
return "history"
|
|
125
|
+
if "status" in text:
|
|
126
|
+
return "status"
|
|
127
|
+
if "review" in text:
|
|
128
|
+
return "review"
|
|
129
|
+
if "inventory" in text:
|
|
130
|
+
return "inventory"
|
|
131
|
+
if "disable" in text:
|
|
132
|
+
return "disable"
|
|
133
|
+
if "cancel" in text or "cancellation" in text:
|
|
134
|
+
return "cancellation"
|
|
135
|
+
|
|
136
|
+
return {
|
|
137
|
+
"GET": "read",
|
|
138
|
+
"POST": "create",
|
|
139
|
+
"PUT": "update",
|
|
140
|
+
"PATCH": "partial_update",
|
|
141
|
+
"DELETE": "delete",
|
|
142
|
+
}.get(method, "unknown")
|
|
143
|
+
|
|
144
|
+
def _infer_path(self, text: str, method: str, resource: str) -> str:
|
|
145
|
+
if "user" in text and "order" in text:
|
|
146
|
+
return "/users/{user_id}/orders"
|
|
147
|
+
|
|
148
|
+
if "product" in text and "review" in text:
|
|
149
|
+
if method == "POST":
|
|
150
|
+
return "/products/{product_id}/reviews"
|
|
151
|
+
return "/products/{product_id}/reviews/{id}"
|
|
152
|
+
|
|
153
|
+
if "payment" in text and "status" in text:
|
|
154
|
+
return "/payments/{id}/status"
|
|
155
|
+
|
|
156
|
+
if "product" in text and "inventory" in text:
|
|
157
|
+
return "/products/{product_id}/inventory"
|
|
158
|
+
|
|
159
|
+
if "user" in text and ("disable" in text or method == "DELETE"):
|
|
160
|
+
return "/users/{id}"
|
|
161
|
+
|
|
162
|
+
if "booking" in text and ("cancel" in text or "cancellation" in text):
|
|
163
|
+
return "/bookings/{id}/cancellation"
|
|
164
|
+
|
|
165
|
+
plural = self.PLURAL_RULES.get(resource, f"{resource.lower()}s")
|
|
166
|
+
|
|
167
|
+
if method == "POST":
|
|
168
|
+
return f"/{plural}"
|
|
169
|
+
|
|
170
|
+
return f"/{plural}/{{id}}"
|
|
171
|
+
|
|
172
|
+
def _response_status(self, method: str) -> int:
|
|
173
|
+
if method == "POST":
|
|
174
|
+
return 201
|
|
175
|
+
return 200
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Dict, List, Set
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class TaskNode:
|
|
9
|
+
name: str
|
|
10
|
+
depends_on: List[str] = field(default_factory=list)
|
|
11
|
+
status: str = "pending"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class TaskDAG:
|
|
16
|
+
nodes: Dict[str, TaskNode] = field(default_factory=dict)
|
|
17
|
+
|
|
18
|
+
def add_task(self, name: str, depends_on: List[str] = None) -> None:
|
|
19
|
+
self.nodes[name] = TaskNode(name=name, depends_on=depends_on or [])
|
|
20
|
+
|
|
21
|
+
def ready_tasks(self) -> List[str]:
|
|
22
|
+
ready: List[str] = []
|
|
23
|
+
completed = {
|
|
24
|
+
name
|
|
25
|
+
for name, node in self.nodes.items()
|
|
26
|
+
if node.status == "completed"
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
for name, node in self.nodes.items():
|
|
30
|
+
if node.status != "pending":
|
|
31
|
+
continue
|
|
32
|
+
|
|
33
|
+
if all(dep in completed for dep in node.depends_on):
|
|
34
|
+
ready.append(name)
|
|
35
|
+
|
|
36
|
+
return ready
|
|
37
|
+
|
|
38
|
+
def mark_completed(self, name: str) -> None:
|
|
39
|
+
self.nodes[name].status = "completed"
|
|
40
|
+
|
|
41
|
+
def mark_failed_and_block_dependents(self, name: str) -> List[str]:
|
|
42
|
+
self.nodes[name].status = "failed"
|
|
43
|
+
|
|
44
|
+
blocked: List[str] = []
|
|
45
|
+
for node_name, node in self.nodes.items():
|
|
46
|
+
if self._depends_on(node_name, name):
|
|
47
|
+
if node.status == "pending":
|
|
48
|
+
node.status = "blocked"
|
|
49
|
+
blocked.append(node_name)
|
|
50
|
+
|
|
51
|
+
return blocked
|
|
52
|
+
|
|
53
|
+
def _depends_on(self, node_name: str, dependency: str) -> bool:
|
|
54
|
+
visited: Set[str] = set()
|
|
55
|
+
|
|
56
|
+
def visit(current: str) -> bool:
|
|
57
|
+
if current in visited:
|
|
58
|
+
return False
|
|
59
|
+
visited.add(current)
|
|
60
|
+
|
|
61
|
+
node = self.nodes[current]
|
|
62
|
+
if dependency in node.depends_on:
|
|
63
|
+
return True
|
|
64
|
+
|
|
65
|
+
return any(visit(dep) for dep in node.depends_on if dep in self.nodes)
|
|
66
|
+
|
|
67
|
+
return visit(node_name)
|
|
68
|
+
|
|
69
|
+
@classmethod
|
|
70
|
+
def default_api_dag(cls) -> "TaskDAG":
|
|
71
|
+
dag = cls()
|
|
72
|
+
dag.add_task("detect_models")
|
|
73
|
+
dag.add_task("detect_route_style")
|
|
74
|
+
dag.add_task("detect_auth_style")
|
|
75
|
+
dag.add_task("detect_service_style")
|
|
76
|
+
dag.add_task("generate_schema", ["detect_models"])
|
|
77
|
+
dag.add_task("generate_route", ["detect_route_style", "detect_auth_style"])
|
|
78
|
+
dag.add_task("generate_service", ["detect_models", "detect_service_style"])
|
|
79
|
+
dag.add_task("generate_controller", ["generate_schema", "generate_route", "generate_service"])
|
|
80
|
+
dag.add_task("generate_test", ["generate_controller", "generate_route"])
|
|
81
|
+
return dag
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ContextRedactor:
|
|
7
|
+
"""
|
|
8
|
+
Redacts obvious sensitive values before text is sent to an language model.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
REDACTIONS = [
|
|
12
|
+
(
|
|
13
|
+
re.compile(r"(?i)(api[_-]?key\s*[:=]\s*)['\"]?[A-Za-z0-9_\-]{8,}['\"]?"),
|
|
14
|
+
r"\1[REDACTED_API_KEY]",
|
|
15
|
+
),
|
|
16
|
+
(
|
|
17
|
+
re.compile(r"(?i)((access_token|refresh_token|token)\s*[:=]\s*)['\"]?[A-Za-z0-9_\-\.]{8,}['\"]?"),
|
|
18
|
+
r"\1[REDACTED_TOKEN]",
|
|
19
|
+
),
|
|
20
|
+
(
|
|
21
|
+
re.compile(r"(?i)((secret|client_secret)\s*[:=]\s*)['\"]?[A-Za-z0-9_\-]{8,}['\"]?"),
|
|
22
|
+
r"\1[REDACTED_SECRET]",
|
|
23
|
+
),
|
|
24
|
+
(
|
|
25
|
+
re.compile(r"-----BEGIN (RSA |EC |OPENSSH |DSA )?PRIVATE KEY-----.*?-----END (RSA |EC |OPENSSH |DSA )?PRIVATE KEY-----", re.DOTALL),
|
|
26
|
+
"[REDACTED_PRIVATE_KEY]",
|
|
27
|
+
),
|
|
28
|
+
(
|
|
29
|
+
re.compile(r"AKIA[0-9A-Z]{16}"),
|
|
30
|
+
"[REDACTED_AWS_ACCESS_KEY]",
|
|
31
|
+
),
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
def redact(self, text: str) -> str:
|
|
35
|
+
redacted = text
|
|
36
|
+
for pattern, replacement in self.REDACTIONS:
|
|
37
|
+
redacted = pattern.sub(replacement, redacted)
|
|
38
|
+
return redacted
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import List
|
|
6
|
+
|
|
7
|
+
from archapi.types import GeneratedFile, GenerationResult
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class PolicyReport:
|
|
12
|
+
allowed: bool
|
|
13
|
+
errors: List[str] = field(default_factory=list)
|
|
14
|
+
warnings: List[str] = field(default_factory=list)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class PolicyGate:
|
|
18
|
+
"""
|
|
19
|
+
Simple policy gate for v0.1.
|
|
20
|
+
|
|
21
|
+
Blocks generated patches from touching risky files and sensitive paths.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
BLOCKED_EXACT_FILENAMES = {
|
|
25
|
+
".env",
|
|
26
|
+
".env.local",
|
|
27
|
+
".env.production",
|
|
28
|
+
"id_rsa",
|
|
29
|
+
"id_ed25519",
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
BLOCKED_PATH_PARTS = {
|
|
33
|
+
".git",
|
|
34
|
+
".venv",
|
|
35
|
+
"node_modules",
|
|
36
|
+
".archapi",
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
SENSITIVE_KEYWORDS = {
|
|
40
|
+
"payment",
|
|
41
|
+
"billing",
|
|
42
|
+
"auth.core",
|
|
43
|
+
"passport",
|
|
44
|
+
"oauth",
|
|
45
|
+
"jwt.secret",
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
def validate_files(self, files: List[GeneratedFile]) -> PolicyReport:
|
|
49
|
+
errors: List[str] = []
|
|
50
|
+
warnings: List[str] = []
|
|
51
|
+
|
|
52
|
+
for generated in files:
|
|
53
|
+
path = Path(generated.path)
|
|
54
|
+
parts = set(path.parts)
|
|
55
|
+
|
|
56
|
+
if path.name in self.BLOCKED_EXACT_FILENAMES:
|
|
57
|
+
errors.append(f"Policy blocked write to sensitive file: {path}")
|
|
58
|
+
|
|
59
|
+
if parts & self.BLOCKED_PATH_PARTS:
|
|
60
|
+
errors.append(f"Policy blocked write inside protected path: {path}")
|
|
61
|
+
|
|
62
|
+
lowered = str(path).lower()
|
|
63
|
+
for keyword in self.SENSITIVE_KEYWORDS:
|
|
64
|
+
if keyword in lowered:
|
|
65
|
+
warnings.append(f"Generated patch touches sensitive area '{keyword}': {path}")
|
|
66
|
+
|
|
67
|
+
return PolicyReport(allowed=len(errors) == 0, errors=errors, warnings=warnings)
|
|
68
|
+
|
|
69
|
+
def validate_result(self, result: GenerationResult) -> PolicyReport:
|
|
70
|
+
return self.validate_files(result.files)
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import List
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class SecretFinding:
|
|
11
|
+
file: str
|
|
12
|
+
line: int
|
|
13
|
+
pattern: str
|
|
14
|
+
preview: str
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class SecretScanReport:
|
|
19
|
+
success: bool
|
|
20
|
+
findings: List[SecretFinding] = field(default_factory=list)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class SecretScanner:
|
|
24
|
+
"""
|
|
25
|
+
Lightweight local secret scanner.
|
|
26
|
+
|
|
27
|
+
This is not a replacement for Gitleaks or TruffleHog.
|
|
28
|
+
It is a first safety layer for ArchAPI v0.1.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
DEFAULT_PATTERNS = {
|
|
32
|
+
"api_key_assignment": re.compile(r"(?i)(api[_-]?key)\s*[:=]\s*['\"]?[A-Za-z0-9_\-]{12,}"),
|
|
33
|
+
"secret_assignment": re.compile(r"(?i)(secret|client_secret)\s*[:=]\s*['\"]?[A-Za-z0-9_\-]{12,}"),
|
|
34
|
+
"token_assignment": re.compile(r"(?i)(token|access_token|refresh_token)\s*[:=]\s*['\"]?[A-Za-z0-9_\-\.]{12,}"),
|
|
35
|
+
"private_key": re.compile(r"-----BEGIN (RSA |EC |OPENSSH |DSA )?PRIVATE KEY-----"),
|
|
36
|
+
"aws_access_key": re.compile(r"AKIA[0-9A-Z]{16}"),
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
IGNORED_DIRS = {
|
|
40
|
+
".git",
|
|
41
|
+
".venv",
|
|
42
|
+
"node_modules",
|
|
43
|
+
"dist",
|
|
44
|
+
"build",
|
|
45
|
+
"coverage",
|
|
46
|
+
"__pycache__",
|
|
47
|
+
"vendor",
|
|
48
|
+
"target",
|
|
49
|
+
".archapi",
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
def __init__(self, project_path: Path):
|
|
53
|
+
self.project_path = Path(project_path)
|
|
54
|
+
|
|
55
|
+
def scan(self) -> SecretScanReport:
|
|
56
|
+
findings: List[SecretFinding] = []
|
|
57
|
+
|
|
58
|
+
for path in self.project_path.rglob("*"):
|
|
59
|
+
if path.is_dir():
|
|
60
|
+
continue
|
|
61
|
+
|
|
62
|
+
if any(part in self.IGNORED_DIRS for part in path.parts):
|
|
63
|
+
continue
|
|
64
|
+
|
|
65
|
+
try:
|
|
66
|
+
text = path.read_text(encoding="utf-8", errors="ignore")
|
|
67
|
+
except Exception:
|
|
68
|
+
continue
|
|
69
|
+
|
|
70
|
+
rel = str(path.relative_to(self.project_path))
|
|
71
|
+
|
|
72
|
+
for line_no, line in enumerate(text.splitlines(), start=1):
|
|
73
|
+
for pattern_name, pattern in self.DEFAULT_PATTERNS.items():
|
|
74
|
+
if pattern.search(line):
|
|
75
|
+
findings.append(
|
|
76
|
+
SecretFinding(
|
|
77
|
+
file=rel,
|
|
78
|
+
line=line_no,
|
|
79
|
+
pattern=pattern_name,
|
|
80
|
+
preview=self._preview(line),
|
|
81
|
+
)
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
return SecretScanReport(success=len(findings) == 0, findings=findings)
|
|
85
|
+
|
|
86
|
+
def _preview(self, line: str) -> str:
|
|
87
|
+
clean = line.strip()
|
|
88
|
+
if len(clean) <= 80:
|
|
89
|
+
return clean
|
|
90
|
+
return clean[:77] + "..."
|