pull-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pull_cli/__init__.py +5 -0
- pull_cli/__main__.py +6 -0
- pull_cli/assets.py +235 -0
- pull_cli/attachment_extractors.py +85 -0
- pull_cli/cli.py +329 -0
- pull_cli/clients/__init__.py +8 -0
- pull_cli/clients/base.py +29 -0
- pull_cli/clients/cloud_v2.py +132 -0
- pull_cli/clients/data_center.py +360 -0
- pull_cli/clients/hybrid.py +15 -0
- pull_cli/config.py +82 -0
- pull_cli/crawler.py +51 -0
- pull_cli/envelope.py +59 -0
- pull_cli/errors.py +50 -0
- pull_cli/extractor.py +344 -0
- pull_cli/guide.py +115 -0
- pull_cli/html_normalizer.py +111 -0
- pull_cli/links.py +186 -0
- pull_cli/macros.py +527 -0
- pull_cli/markdown_writer.py +24 -0
- pull_cli/models.py +232 -0
- pull_cli/paths.py +45 -0
- pull_cli/resolver.py +72 -0
- pull_cli/security.py +103 -0
- pull_cli/validator.py +398 -0
- pull_cli/writer.py +792 -0
- pull_cli-0.1.0.dist-info/METADATA +218 -0
- pull_cli-0.1.0.dist-info/RECORD +31 -0
- pull_cli-0.1.0.dist-info/WHEEL +4 -0
- pull_cli-0.1.0.dist-info/entry_points.txt +3 -0
- pull_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
pull_cli/validator.py
ADDED
|
@@ -0,0 +1,398 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
import re
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import yaml
|
|
11
|
+
|
|
12
|
+
from .models import WarningRecord
|
|
13
|
+
from .security import contains_secret_text
|
|
14
|
+
|
|
15
|
+
LOCAL_LINK_RE = re.compile(r"\[[^\]]*]\(([^)]+)\)")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class ValidationResult:
|
|
20
|
+
ok: bool
|
|
21
|
+
manifest_path: Path
|
|
22
|
+
output_dir: Path
|
|
23
|
+
errors: list[dict[str, Any]] = field(default_factory=list)
|
|
24
|
+
warnings: list[WarningRecord] = field(default_factory=list)
|
|
25
|
+
metrics: dict[str, Any] = field(default_factory=dict)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def validate_package(path: Path) -> ValidationResult:
|
|
29
|
+
manifest_path = path / "manifest.yaml" if path.is_dir() else path
|
|
30
|
+
output_dir = manifest_path.parent
|
|
31
|
+
result = ValidationResult(ok=True, manifest_path=manifest_path, output_dir=output_dir)
|
|
32
|
+
if not manifest_path.exists():
|
|
33
|
+
return _error(result, "ERR_VALIDATION_REQUIRED", "Manifest file does not exist.", {"path": str(manifest_path)})
|
|
34
|
+
if manifest_path.is_file() and manifest_path.suffix.lower() == ".md":
|
|
35
|
+
text = manifest_path.read_text(encoding="utf-8", errors="ignore")
|
|
36
|
+
if text.lstrip().startswith("# AI Navigation Manifest"):
|
|
37
|
+
return _error(
|
|
38
|
+
result,
|
|
39
|
+
"ERR_VALIDATION_REQUIRED",
|
|
40
|
+
"This is the AI Markdown entrypoint; validate its parent directory or adjacent manifest.yaml.",
|
|
41
|
+
{"path": str(manifest_path), "suggested_path": str(manifest_path.parent)},
|
|
42
|
+
)
|
|
43
|
+
try:
|
|
44
|
+
manifest = yaml.safe_load(manifest_path.read_text(encoding="utf-8")) or {}
|
|
45
|
+
except Exception as exc: # noqa: BLE001
|
|
46
|
+
return _error(result, "ERR_VALIDATION_REQUIRED", "Manifest YAML could not be parsed.", {"reason": str(exc)})
|
|
47
|
+
if not isinstance(manifest, dict):
|
|
48
|
+
return _error(result, "ERR_VALIDATION_REQUIRED", "Manifest root must be a mapping.", {})
|
|
49
|
+
_check_required(result, manifest, ["schema_version", "tool", "source", "root", "pages", "paths"])
|
|
50
|
+
if manifest.get("path_base") is not None:
|
|
51
|
+
_check_path_base(result, manifest.get("path_base"), "manifest.path_base")
|
|
52
|
+
manifest_paths = manifest.get("paths") if isinstance(manifest.get("paths"), dict) else {}
|
|
53
|
+
if manifest_paths.get("bundle"):
|
|
54
|
+
_check_relative_file(result, output_dir, manifest_paths.get("bundle"), "paths.bundle")
|
|
55
|
+
bundle_path = output_dir / str(manifest_paths.get("bundle"))
|
|
56
|
+
if bundle_path.exists():
|
|
57
|
+
_check_markdown_links(result, bundle_path, output_dir)
|
|
58
|
+
if manifest_paths.get("ai_manifest"):
|
|
59
|
+
_check_relative_file(result, output_dir, manifest_paths.get("ai_manifest"), "paths.ai_manifest")
|
|
60
|
+
_check_ai_manifest(result, output_dir, output_dir / str(manifest_paths.get("ai_manifest")))
|
|
61
|
+
if manifest_paths.get("ai_entry"):
|
|
62
|
+
_check_relative_file(result, output_dir, manifest_paths.get("ai_entry"), "paths.ai_entry")
|
|
63
|
+
ai_entry_path = output_dir / str(manifest_paths.get("ai_entry"))
|
|
64
|
+
if ai_entry_path.exists():
|
|
65
|
+
_check_markdown_links(result, ai_entry_path, output_dir)
|
|
66
|
+
pages = manifest.get("pages") if isinstance(manifest.get("pages"), list) else []
|
|
67
|
+
assets = manifest.get("assets") if isinstance(manifest.get("assets"), list) else []
|
|
68
|
+
result.metrics.update({"pages": len(pages), "assets": len(assets)})
|
|
69
|
+
if not pages:
|
|
70
|
+
_error(result, "ERR_VALIDATION_REQUIRED", "Manifest contains no pages.", {})
|
|
71
|
+
|
|
72
|
+
for page in pages:
|
|
73
|
+
if not isinstance(page, dict):
|
|
74
|
+
_error(result, "ERR_VALIDATION_REQUIRED", "Page manifest entry is not a mapping.", {})
|
|
75
|
+
continue
|
|
76
|
+
paths = page.get("paths") if isinstance(page.get("paths"), dict) else {}
|
|
77
|
+
for key in ("markdown", "metadata"):
|
|
78
|
+
_check_relative_file(result, output_dir, paths.get(key), f"page.{key}")
|
|
79
|
+
for optional in ("html", "source", "comments"):
|
|
80
|
+
if paths.get(optional):
|
|
81
|
+
_check_relative_file(result, output_dir, paths.get(optional), f"page.{optional}")
|
|
82
|
+
if optional == "comments":
|
|
83
|
+
comments_path = output_dir / str(paths.get(optional))
|
|
84
|
+
if comments_path.exists():
|
|
85
|
+
_check_markdown_links(result, comments_path, output_dir)
|
|
86
|
+
markdown_path = output_dir / str(paths.get("markdown", ""))
|
|
87
|
+
if markdown_path.exists():
|
|
88
|
+
_check_markdown_links(result, markdown_path, output_dir)
|
|
89
|
+
_check_redacted_rewritten_links(result, output_dir, pages, manifest.get("links"))
|
|
90
|
+
|
|
91
|
+
for asset in assets:
|
|
92
|
+
if not isinstance(asset, dict):
|
|
93
|
+
_error(result, "ERR_VALIDATION_REQUIRED", "Asset manifest entry is not a mapping.", {})
|
|
94
|
+
continue
|
|
95
|
+
local_path = asset.get("local_path")
|
|
96
|
+
asset_path = output_dir / str(local_path or "")
|
|
97
|
+
_check_relative_file(result, output_dir, local_path, "asset.local_path")
|
|
98
|
+
if asset_path.exists() and asset.get("sha256"):
|
|
99
|
+
digest = hashlib.sha256(asset_path.read_bytes()).hexdigest()
|
|
100
|
+
if digest != asset["sha256"]:
|
|
101
|
+
_error(
|
|
102
|
+
result,
|
|
103
|
+
"ERR_VALIDATION_REQUIRED",
|
|
104
|
+
"Asset checksum does not match manifest.",
|
|
105
|
+
{"asset": local_path},
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
warning_path = output_dir / "diagnostics" / "warnings.jsonl"
|
|
109
|
+
package_warning_count = 0
|
|
110
|
+
if warning_path.exists():
|
|
111
|
+
for line_number, line in enumerate(warning_path.read_text(encoding="utf-8").splitlines(), start=1):
|
|
112
|
+
if not line.strip():
|
|
113
|
+
continue
|
|
114
|
+
package_warning_count += 1
|
|
115
|
+
try:
|
|
116
|
+
json.loads(line)
|
|
117
|
+
except json.JSONDecodeError as exc:
|
|
118
|
+
_error(
|
|
119
|
+
result,
|
|
120
|
+
"ERR_VALIDATION_REQUIRED",
|
|
121
|
+
"warnings.jsonl contains invalid JSON.",
|
|
122
|
+
{"line": line_number, "reason": str(exc)},
|
|
123
|
+
)
|
|
124
|
+
result.metrics["package_warnings"] = package_warning_count
|
|
125
|
+
|
|
126
|
+
_scan_for_secret_markers(result, output_dir)
|
|
127
|
+
result.ok = not result.errors
|
|
128
|
+
return result
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _check_required(result: ValidationResult, manifest: dict[str, Any], keys: list[str]) -> None:
|
|
132
|
+
for key in keys:
|
|
133
|
+
if key not in manifest:
|
|
134
|
+
_error(result, "ERR_VALIDATION_REQUIRED", f"Manifest is missing required key {key!r}.", {})
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _check_relative_file(
|
|
138
|
+
result: ValidationResult, output_dir: Path, relative: object, label: str
|
|
139
|
+
) -> None:
|
|
140
|
+
if not isinstance(relative, str) or not relative:
|
|
141
|
+
_error(result, "ERR_VALIDATION_REQUIRED", f"Missing relative path for {label}.", {})
|
|
142
|
+
return
|
|
143
|
+
rel_path = Path(relative)
|
|
144
|
+
if rel_path.is_absolute() or ".." in rel_path.parts:
|
|
145
|
+
_error(result, "ERR_VALIDATION_REQUIRED", f"Path for {label} must be relative to output root.", {"path": relative})
|
|
146
|
+
return
|
|
147
|
+
if not (output_dir / rel_path).exists():
|
|
148
|
+
_error(result, "ERR_VALIDATION_REQUIRED", f"Referenced file for {label} does not exist.", {"path": relative})
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _check_markdown_links(result: ValidationResult, markdown_path: Path, output_dir: Path) -> None:
|
|
152
|
+
text = markdown_path.read_text(encoding="utf-8")
|
|
153
|
+
for match in LOCAL_LINK_RE.finditer(text):
|
|
154
|
+
target = _markdown_link_destination(match.group(1))
|
|
155
|
+
if not target or target in {"redacted-url", "<redacted-url>"} or target.startswith(("#", "/", "http://", "https://", "mailto:", "jira:")):
|
|
156
|
+
continue
|
|
157
|
+
target_path = target.split("#", 1)[0]
|
|
158
|
+
resolution_base = markdown_path.parent.resolve()
|
|
159
|
+
candidate_path = (markdown_path.parent / target_path).resolve()
|
|
160
|
+
details = {
|
|
161
|
+
"link": target,
|
|
162
|
+
"file": str(markdown_path),
|
|
163
|
+
"resolution_base": str(resolution_base),
|
|
164
|
+
"candidate_path": str(candidate_path),
|
|
165
|
+
}
|
|
166
|
+
if not candidate_path.is_relative_to(output_dir.resolve()):
|
|
167
|
+
_error(result, "ERR_VALIDATION_REQUIRED", "Markdown link escapes output directory.", details)
|
|
168
|
+
continue
|
|
169
|
+
if not candidate_path.exists():
|
|
170
|
+
_error(result, "ERR_VALIDATION_REQUIRED", "Markdown local link target does not exist.", details)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def _check_redacted_rewritten_links(
|
|
174
|
+
result: ValidationResult, output_dir: Path, pages: list[Any], links: object
|
|
175
|
+
) -> None:
|
|
176
|
+
if not isinstance(links, list):
|
|
177
|
+
return
|
|
178
|
+
rewritten_links_by_page: dict[str, int] = {}
|
|
179
|
+
rewritten_assets_by_page: dict[str, int] = {}
|
|
180
|
+
redacted_link_allowance_by_page: dict[str, int] = {}
|
|
181
|
+
redacted_asset_allowance_by_page: dict[str, int] = {}
|
|
182
|
+
for link in links:
|
|
183
|
+
if not isinstance(link, dict):
|
|
184
|
+
continue
|
|
185
|
+
source_page_id = link.get("source_page_id")
|
|
186
|
+
if not isinstance(source_page_id, str) or not source_page_id:
|
|
187
|
+
continue
|
|
188
|
+
kind = link.get("kind")
|
|
189
|
+
if link.get("status") == "rewritten" and link.get("rewritten"):
|
|
190
|
+
if kind == "asset":
|
|
191
|
+
rewritten_assets_by_page[source_page_id] = rewritten_assets_by_page.get(source_page_id, 0) + 1
|
|
192
|
+
else:
|
|
193
|
+
rewritten_links_by_page[source_page_id] = rewritten_links_by_page.get(source_page_id, 0) + 1
|
|
194
|
+
elif kind == "asset":
|
|
195
|
+
redacted_asset_allowance_by_page[source_page_id] = redacted_asset_allowance_by_page.get(source_page_id, 0) + 1
|
|
196
|
+
elif kind not in {"anchor", "mailto"}:
|
|
197
|
+
redacted_link_allowance_by_page[source_page_id] = redacted_link_allowance_by_page.get(source_page_id, 0) + 1
|
|
198
|
+
|
|
199
|
+
for page in pages:
|
|
200
|
+
if not isinstance(page, dict):
|
|
201
|
+
continue
|
|
202
|
+
page_id = page.get("page_id")
|
|
203
|
+
if not isinstance(page_id, str):
|
|
204
|
+
continue
|
|
205
|
+
has_rewritten_links = rewritten_links_by_page.get(page_id, 0) > 0
|
|
206
|
+
has_rewritten_assets = rewritten_assets_by_page.get(page_id, 0) > 0
|
|
207
|
+
if not has_rewritten_links and not has_rewritten_assets:
|
|
208
|
+
continue
|
|
209
|
+
paths = page.get("paths") if isinstance(page.get("paths"), dict) else {}
|
|
210
|
+
markdown = paths.get("markdown")
|
|
211
|
+
if not isinstance(markdown, str) or not markdown:
|
|
212
|
+
continue
|
|
213
|
+
markdown_path = output_dir / markdown
|
|
214
|
+
if not markdown_path.exists():
|
|
215
|
+
continue
|
|
216
|
+
redacted_links = _redacted_markdown_link_lines(markdown_path)
|
|
217
|
+
redacted_page_links = [link for link in redacted_links if link["kind"] == "link"]
|
|
218
|
+
redacted_asset_links = [link for link in redacted_links if link["kind"] == "image"]
|
|
219
|
+
link_allowance = redacted_link_allowance_by_page.get(page_id, 0)
|
|
220
|
+
asset_allowance = redacted_asset_allowance_by_page.get(page_id, 0) + _asset_warning_allowance(page)
|
|
221
|
+
if has_rewritten_links and len(redacted_page_links) > link_allowance:
|
|
222
|
+
_error(
|
|
223
|
+
result,
|
|
224
|
+
"ERR_VALIDATION_REDACTED_REWRITTEN_LINK",
|
|
225
|
+
"Markdown contains redacted link placeholders although the manifest has rewritten local links for this page.",
|
|
226
|
+
{
|
|
227
|
+
"page_id": page_id,
|
|
228
|
+
"file": str(markdown_path),
|
|
229
|
+
"redacted_links": len(redacted_page_links),
|
|
230
|
+
"non_rewritten_link_allowance": link_allowance,
|
|
231
|
+
"rewritten_links": rewritten_links_by_page[page_id],
|
|
232
|
+
"examples": redacted_page_links[:5],
|
|
233
|
+
},
|
|
234
|
+
)
|
|
235
|
+
if has_rewritten_assets and len(redacted_asset_links) > asset_allowance:
|
|
236
|
+
_error(
|
|
237
|
+
result,
|
|
238
|
+
"ERR_VALIDATION_REDACTED_REWRITTEN_LINK",
|
|
239
|
+
"Markdown contains redacted image placeholders although the manifest has rewritten local assets for this page.",
|
|
240
|
+
{
|
|
241
|
+
"page_id": page_id,
|
|
242
|
+
"file": str(markdown_path),
|
|
243
|
+
"redacted_images": len(redacted_asset_links),
|
|
244
|
+
"non_rewritten_asset_allowance": asset_allowance,
|
|
245
|
+
"rewritten_assets": rewritten_assets_by_page[page_id],
|
|
246
|
+
"examples": redacted_asset_links[:5],
|
|
247
|
+
},
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def _redacted_markdown_link_lines(markdown_path: Path) -> list[dict[str, Any]]:
|
|
252
|
+
examples = []
|
|
253
|
+
for line_number, line in enumerate(markdown_path.read_text(encoding="utf-8").splitlines(), start=1):
|
|
254
|
+
for match in LOCAL_LINK_RE.finditer(line):
|
|
255
|
+
if _markdown_link_destination(match.group(1)) == "redacted-url":
|
|
256
|
+
kind = "image" if match.start() > 0 and line[match.start() - 1] == "!" else "link"
|
|
257
|
+
examples.append({"line": line_number, "kind": kind, "text": line.strip()})
|
|
258
|
+
return examples
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def _asset_warning_allowance(page: dict[str, Any]) -> int:
|
|
262
|
+
warnings = page.get("warnings") if isinstance(page.get("warnings"), list) else []
|
|
263
|
+
return sum(
|
|
264
|
+
1
|
|
265
|
+
for warning in warnings
|
|
266
|
+
if isinstance(warning, dict) and warning.get("code") in {"W_ASSET_DOWNLOAD_FAILED", "W_ASSET_SKIPPED_BY_POLICY"}
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def _check_ai_manifest(result: ValidationResult, output_dir: Path, path: Path) -> None:
|
|
271
|
+
if not path.exists():
|
|
272
|
+
return
|
|
273
|
+
try:
|
|
274
|
+
ai_manifest = yaml.safe_load(path.read_text(encoding="utf-8")) or {}
|
|
275
|
+
except Exception as exc: # noqa: BLE001
|
|
276
|
+
_error(result, "ERR_VALIDATION_REQUIRED", "AI manifest YAML could not be parsed.", {"reason": str(exc)})
|
|
277
|
+
return
|
|
278
|
+
if not isinstance(ai_manifest, dict):
|
|
279
|
+
_error(result, "ERR_VALIDATION_REQUIRED", "AI manifest root must be a mapping.", {})
|
|
280
|
+
return
|
|
281
|
+
_check_required(result, ai_manifest, ["schema_version", "path_base", "root", "entrypoints", "pages", "diagnostics"])
|
|
282
|
+
_check_path_base(result, ai_manifest.get("path_base"), "ai_manifest.path_base")
|
|
283
|
+
entrypoints = ai_manifest.get("entrypoints") if isinstance(ai_manifest.get("entrypoints"), dict) else {}
|
|
284
|
+
for label, entrypoint in entrypoints.items():
|
|
285
|
+
if entrypoint:
|
|
286
|
+
_check_relative_file(result, output_dir, entrypoint, f"ai_manifest.entrypoints.{label}")
|
|
287
|
+
pages = ai_manifest.get("pages") if isinstance(ai_manifest.get("pages"), list) else []
|
|
288
|
+
seen_names: set[str] = set()
|
|
289
|
+
page_children: list[tuple[str, list[Any]]] = []
|
|
290
|
+
page_parents: list[tuple[str, Any]] = []
|
|
291
|
+
for page in pages:
|
|
292
|
+
if not isinstance(page, dict):
|
|
293
|
+
_error(result, "ERR_VALIDATION_REQUIRED", "AI manifest page entry is not a mapping.", {})
|
|
294
|
+
continue
|
|
295
|
+
name = page.get("name")
|
|
296
|
+
if not isinstance(name, str) or not name:
|
|
297
|
+
_error(result, "ERR_VALIDATION_REQUIRED", "AI manifest page is missing a name.", {})
|
|
298
|
+
elif name in seen_names:
|
|
299
|
+
_error(result, "ERR_VALIDATION_REQUIRED", "AI manifest page names must be unique.", {"name": name})
|
|
300
|
+
else:
|
|
301
|
+
seen_names.add(name)
|
|
302
|
+
children = page.get("children") if isinstance(page.get("children"), list) else []
|
|
303
|
+
if isinstance(name, str) and name:
|
|
304
|
+
page_children.append((name, children))
|
|
305
|
+
page_parents.append((name, page.get("parent")))
|
|
306
|
+
_check_relative_file(result, output_dir, page.get("markdown"), "ai_manifest.page.markdown")
|
|
307
|
+
if page.get("comments"):
|
|
308
|
+
_check_relative_file(result, output_dir, page.get("comments"), "ai_manifest.page.comments")
|
|
309
|
+
assets = page.get("assets") if isinstance(page.get("assets"), list) else []
|
|
310
|
+
for asset in assets:
|
|
311
|
+
if not isinstance(asset, dict):
|
|
312
|
+
_error(result, "ERR_VALIDATION_REQUIRED", "AI manifest asset entry is not a mapping.", {})
|
|
313
|
+
continue
|
|
314
|
+
_check_relative_file(result, output_dir, asset.get("path"), "ai_manifest.asset.path")
|
|
315
|
+
sidecars = asset.get("sidecars") if isinstance(asset.get("sidecars"), list) else []
|
|
316
|
+
for sidecar in sidecars:
|
|
317
|
+
_check_relative_file(result, output_dir, sidecar, "ai_manifest.asset.sidecar")
|
|
318
|
+
for name, children in page_children:
|
|
319
|
+
for child in children:
|
|
320
|
+
if not isinstance(child, str) or child not in seen_names:
|
|
321
|
+
_error(
|
|
322
|
+
result,
|
|
323
|
+
"ERR_VALIDATION_REQUIRED",
|
|
324
|
+
"AI manifest child reference does not match a page name.",
|
|
325
|
+
{"page": name, "child": child},
|
|
326
|
+
)
|
|
327
|
+
for name, parent in page_parents:
|
|
328
|
+
if parent is not None and (not isinstance(parent, str) or parent not in seen_names):
|
|
329
|
+
_error(
|
|
330
|
+
result,
|
|
331
|
+
"ERR_VALIDATION_REQUIRED",
|
|
332
|
+
"AI manifest parent reference does not match a page name.",
|
|
333
|
+
{"page": name, "parent": parent},
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def _check_path_base(result: ValidationResult, path_base: object, label: str) -> None:
|
|
338
|
+
if not isinstance(path_base, dict):
|
|
339
|
+
_error(result, "ERR_VALIDATION_REQUIRED", f"{label} must be a mapping.", {})
|
|
340
|
+
return
|
|
341
|
+
if path_base.get("kind") != "package_root":
|
|
342
|
+
_error(
|
|
343
|
+
result,
|
|
344
|
+
"ERR_VALIDATION_REQUIRED",
|
|
345
|
+
f"{label}.kind must be 'package_root'.",
|
|
346
|
+
{"kind": path_base.get("kind")},
|
|
347
|
+
)
|
|
348
|
+
if path_base.get("root") != ".":
|
|
349
|
+
_error(
|
|
350
|
+
result,
|
|
351
|
+
"ERR_VALIDATION_REQUIRED",
|
|
352
|
+
f"{label}.root must be '.'.",
|
|
353
|
+
{"root": path_base.get("root")},
|
|
354
|
+
)
|
|
355
|
+
rule = path_base.get("rule")
|
|
356
|
+
if not isinstance(rule, str) or (label == "ai_manifest.path_base" and "directory containing" not in rule):
|
|
357
|
+
_error(result, "ERR_VALIDATION_REQUIRED", f"{label}.rule must explain the package-root base.", {})
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
def _markdown_link_destination(raw: str) -> str:
|
|
361
|
+
target = raw.strip()
|
|
362
|
+
if target.startswith("<"):
|
|
363
|
+
end = target.find(">")
|
|
364
|
+
return target[1:end] if end != -1 else target.strip("<>")
|
|
365
|
+
for marker in (' "', " '", "\t\"", "\t'"):
|
|
366
|
+
if marker in target:
|
|
367
|
+
return target.split(marker, 1)[0].strip()
|
|
368
|
+
return target
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def _scan_for_secret_markers(result: ValidationResult, output_dir: Path) -> None:
|
|
372
|
+
for path in output_dir.rglob("*"):
|
|
373
|
+
if not path.is_file() or path.suffix.lower() not in {".yaml", ".yml", ".json", ".jsonl", ".md", ".html", ".xml"}:
|
|
374
|
+
continue
|
|
375
|
+
text = path.read_text(encoding="utf-8", errors="ignore")
|
|
376
|
+
if contains_secret_text(text):
|
|
377
|
+
_error(
|
|
378
|
+
result,
|
|
379
|
+
"ERR_VALIDATION_SECRET_PATTERN",
|
|
380
|
+
"A secret-like marker was found in a text output file.",
|
|
381
|
+
{"path": str(path.relative_to(output_dir))},
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def _error(
|
|
386
|
+
result: ValidationResult, code: str, message: str, details: dict[str, Any]
|
|
387
|
+
) -> ValidationResult:
|
|
388
|
+
result.ok = False
|
|
389
|
+
result.errors.append(
|
|
390
|
+
{
|
|
391
|
+
"code": code,
|
|
392
|
+
"message": message,
|
|
393
|
+
"retryable": False,
|
|
394
|
+
"suggested_action": "Regenerate the package or inspect the referenced file.",
|
|
395
|
+
"details": details,
|
|
396
|
+
}
|
|
397
|
+
)
|
|
398
|
+
return result
|