agentsgen 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentsgen/__init__.py +3 -0
- agentsgen/__main__.py +7 -0
- agentsgen/_smoke.py +123 -0
- agentsgen/actions.py +61 -0
- agentsgen/analyze.py +370 -0
- agentsgen/cli.py +35 -0
- agentsgen/cli_core.py +414 -0
- agentsgen/cli_extra.py +232 -0
- agentsgen/cli_pack.py +266 -0
- agentsgen/cli_support.py +213 -0
- agentsgen/cli_task.py +209 -0
- agentsgen/compat.py +18 -0
- agentsgen/config.py +248 -0
- agentsgen/config_io.py +15 -0
- agentsgen/constants.py +55 -0
- agentsgen/contracts.py +1262 -0
- agentsgen/detect/__init__.py +6 -0
- agentsgen/detect/detect.py +222 -0
- agentsgen/detect/fs.py +28 -0
- agentsgen/detect/github.py +21 -0
- agentsgen/detect/makefile.py +40 -0
- agentsgen/detect/model.py +35 -0
- agentsgen/detect/node.py +76 -0
- agentsgen/detect/python.py +127 -0
- agentsgen/flow_ops.py +249 -0
- agentsgen/generate.py +271 -0
- agentsgen/generated_artifacts.py +54 -0
- agentsgen/io_utils.py +35 -0
- agentsgen/llm.py +167 -0
- agentsgen/markers.py +167 -0
- agentsgen/mcp_server.py +384 -0
- agentsgen/meta.py +110 -0
- agentsgen/model.py +86 -0
- agentsgen/normalize.py +24 -0
- agentsgen/pack_engine.py +994 -0
- agentsgen/patch_engine.py +369 -0
- agentsgen/presets/__init__.py +63 -0
- agentsgen/presets/cli-node.agentsgen.json +52 -0
- agentsgen/presets/cli-python.agentsgen.json +53 -0
- agentsgen/presets/fastapi.agentsgen.json +52 -0
- agentsgen/presets/monorepo-js.agentsgen.json +53 -0
- agentsgen/presets/nextjs.agentsgen.json +53 -0
- agentsgen/presets/python-lib.agentsgen.json +52 -0
- agentsgen/providers/__init__.py +1 -0
- agentsgen/providers/anthropic.py +36 -0
- agentsgen/providers/openai.py +36 -0
- agentsgen/render.py +21 -0
- agentsgen/result_types.py +104 -0
- agentsgen/shared_sections.py +321 -0
- agentsgen/site_pack.py +154 -0
- agentsgen/stacks/__init__.py +18 -0
- agentsgen/stacks/base.py +20 -0
- agentsgen/stacks/node.py +79 -0
- agentsgen/stacks/python.py +60 -0
- agentsgen/stacks/static.py +27 -0
- agentsgen/task_loop.py +487 -0
- agentsgen/templates/common/prompts/execspec.md +14 -0
- agentsgen/templates/node/AGENTS.md.tpl +60 -0
- agentsgen/templates/node/RUNBOOK.md.tpl +19 -0
- agentsgen/templates/pack/node/CONTRIBUTING_AI.md.tpl +16 -0
- agentsgen/templates/pack/node/LLMS.md.tpl +25 -0
- agentsgen/templates/pack/node/README_SNIPPETS.md.tpl +23 -0
- agentsgen/templates/pack/node/SECURITY_AI.md.tpl +14 -0
- agentsgen/templates/pack/node/architecture.md.tpl +14 -0
- agentsgen/templates/pack/node/data-contracts.md.tpl +16 -0
- agentsgen/templates/pack/node/how-to-run.md.tpl +18 -0
- agentsgen/templates/pack/node/how-to-test.md.tpl +19 -0
- agentsgen/templates/pack/node/llms.txt.tpl +23 -0
- agentsgen/templates/pack/python/CONTRIBUTING_AI.md.tpl +16 -0
- agentsgen/templates/pack/python/LLMS.md.tpl +25 -0
- agentsgen/templates/pack/python/README_SNIPPETS.md.tpl +23 -0
- agentsgen/templates/pack/python/SECURITY_AI.md.tpl +14 -0
- agentsgen/templates/pack/python/architecture.md.tpl +14 -0
- agentsgen/templates/pack/python/data-contracts.md.tpl +16 -0
- agentsgen/templates/pack/python/how-to-run.md.tpl +18 -0
- agentsgen/templates/pack/python/how-to-test.md.tpl +19 -0
- agentsgen/templates/pack/python/llms.txt.tpl +23 -0
- agentsgen/templates/pack/static/CONTRIBUTING_AI.md.tpl +16 -0
- agentsgen/templates/pack/static/LLMS.md.tpl +25 -0
- agentsgen/templates/pack/static/README_SNIPPETS.md.tpl +23 -0
- agentsgen/templates/pack/static/SECURITY_AI.md.tpl +14 -0
- agentsgen/templates/pack/static/architecture.md.tpl +14 -0
- agentsgen/templates/pack/static/data-contracts.md.tpl +16 -0
- agentsgen/templates/pack/static/how-to-run.md.tpl +18 -0
- agentsgen/templates/pack/static/how-to-test.md.tpl +19 -0
- agentsgen/templates/pack/static/llms.txt.tpl +23 -0
- agentsgen/templates/python/AGENTS.md.tpl +60 -0
- agentsgen/templates/python/RUNBOOK.md.tpl +19 -0
- agentsgen/templates/shared/guardrails.md.j2 +59 -0
- agentsgen/templates/shared/repo_context.md.j2 +124 -0
- agentsgen/templates/shared/style.md.j2 +31 -0
- agentsgen/templates/shared/verification.md.j2 +30 -0
- agentsgen/templates/shared/workflow.md.j2 +43 -0
- agentsgen/templates/static/AGENTS.md.tpl +60 -0
- agentsgen/templates/static/RUNBOOK.md.tpl +19 -0
- agentsgen/templates.py +19 -0
- agentsgen/understand.py +51 -0
- agentsgen/understand_ast.py +326 -0
- agentsgen/understand_context.py +936 -0
- agentsgen/validators.py +168 -0
- agentsgen-0.2.1.dist-info/METADATA +538 -0
- agentsgen-0.2.1.dist-info/RECORD +106 -0
- agentsgen-0.2.1.dist-info/WHEEL +5 -0
- agentsgen-0.2.1.dist-info/entry_points.txt +3 -0
- agentsgen-0.2.1.dist-info/licenses/LICENSE +21 -0
- agentsgen-0.2.1.dist-info/top_level.txt +1 -0
agentsgen/__init__.py
ADDED
agentsgen/__main__.py
ADDED
agentsgen/_smoke.py
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import tempfile
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from agentsgen.actions import apply_config, save_tool_config
|
|
8
|
+
from agentsgen.config import ToolConfig
|
|
9
|
+
from agentsgen.model import ProjectInfo
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _read(p: Path) -> str:
|
|
13
|
+
return p.read_text(encoding="utf-8")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test1_init_creates_files_and_config() -> None:
|
|
17
|
+
with tempfile.TemporaryDirectory() as td:
|
|
18
|
+
target = Path(td)
|
|
19
|
+
info = ProjectInfo(project_name="demo", stack="node").normalized()
|
|
20
|
+
info.package_manager = "npm"
|
|
21
|
+
info.commands = {
|
|
22
|
+
"install": "npm install",
|
|
23
|
+
"dev": "npm run dev",
|
|
24
|
+
"test": "npm test",
|
|
25
|
+
}
|
|
26
|
+
cfg = ToolConfig.from_project_info(info)
|
|
27
|
+
save_tool_config(target, cfg)
|
|
28
|
+
apply_config(target, cfg, write_prompts=True, dry_run=False, print_diff=False)
|
|
29
|
+
|
|
30
|
+
assert (target / ".agentsgen.json").is_file()
|
|
31
|
+
assert (target / "AGENTS.md").is_file()
|
|
32
|
+
assert (target / "RUNBOOK.md").is_file()
|
|
33
|
+
assert (target / "prompt" / "execspec.md").is_file()
|
|
34
|
+
|
|
35
|
+
cfg = json.loads(_read(target / ".agentsgen.json"))
|
|
36
|
+
assert cfg["version"] == 1
|
|
37
|
+
assert cfg["project"]["name"] == "demo"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test2_update_preserves_outside_markers() -> None:
|
|
41
|
+
with tempfile.TemporaryDirectory() as td:
|
|
42
|
+
target = Path(td)
|
|
43
|
+
info = ProjectInfo(project_name="demo", stack="node").normalized()
|
|
44
|
+
info.package_manager = "npm"
|
|
45
|
+
info.commands = {
|
|
46
|
+
"install": "npm install",
|
|
47
|
+
"dev": "npm run dev",
|
|
48
|
+
"test": "npm test",
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
cfg = ToolConfig.from_project_info(info)
|
|
52
|
+
save_tool_config(target, cfg)
|
|
53
|
+
apply_config(target, cfg, write_prompts=False, dry_run=False, print_diff=False)
|
|
54
|
+
|
|
55
|
+
agents = target / "AGENTS.md"
|
|
56
|
+
original = _read(agents)
|
|
57
|
+
# Add user content outside markers at end.
|
|
58
|
+
agents.write_text(
|
|
59
|
+
original + "\n## User Notes\n\nDo not delete this.\n", encoding="utf-8"
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
# Change a command and re-run.
|
|
63
|
+
info.commands["test"] = "npm test -- --runInBand"
|
|
64
|
+
cfg = ToolConfig.from_project_info(info)
|
|
65
|
+
save_tool_config(target, cfg)
|
|
66
|
+
apply_config(target, cfg, write_prompts=False, dry_run=False, print_diff=False)
|
|
67
|
+
|
|
68
|
+
updated = _read(agents)
|
|
69
|
+
assert "## User Notes" in updated
|
|
70
|
+
assert "Do not delete this." in updated
|
|
71
|
+
assert "npm test -- --runInBand" in updated
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def test3_no_markers_creates_generated_files() -> None:
|
|
75
|
+
with tempfile.TemporaryDirectory() as td:
|
|
76
|
+
target = Path(td)
|
|
77
|
+
agents = target / "AGENTS.md"
|
|
78
|
+
runbook = target / "RUNBOOK.md"
|
|
79
|
+
agents.write_text("# Custom AGENTS\nNo markers here\n", encoding="utf-8")
|
|
80
|
+
runbook.write_text("# Custom RUNBOOK\nNo markers here\n", encoding="utf-8")
|
|
81
|
+
|
|
82
|
+
info = ProjectInfo(project_name="demo", stack="node").normalized()
|
|
83
|
+
info.package_manager = "npm"
|
|
84
|
+
info.commands = {
|
|
85
|
+
"install": "npm install",
|
|
86
|
+
"dev": "npm run dev",
|
|
87
|
+
"test": "npm test",
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
cfg = ToolConfig.from_project_info(info)
|
|
91
|
+
save_tool_config(target, cfg)
|
|
92
|
+
apply_config(target, cfg, write_prompts=False, dry_run=False, print_diff=False)
|
|
93
|
+
|
|
94
|
+
assert agents.is_file()
|
|
95
|
+
assert runbook.is_file()
|
|
96
|
+
assert (target / "AGENTS.generated.md").is_file()
|
|
97
|
+
assert (target / "RUNBOOK.generated.md").is_file()
|
|
98
|
+
|
|
99
|
+
# Original untouched.
|
|
100
|
+
assert _read(agents) == "# Custom AGENTS\nNo markers here\n"
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def main() -> None:
|
|
104
|
+
tests = [
|
|
105
|
+
test1_init_creates_files_and_config,
|
|
106
|
+
test2_update_preserves_outside_markers,
|
|
107
|
+
test3_no_markers_creates_generated_files,
|
|
108
|
+
]
|
|
109
|
+
|
|
110
|
+
failed = 0
|
|
111
|
+
for t in tests:
|
|
112
|
+
try:
|
|
113
|
+
t()
|
|
114
|
+
print(f"PASS {t.__name__}")
|
|
115
|
+
except Exception as e:
|
|
116
|
+
failed += 1
|
|
117
|
+
print(f"FAIL {t.__name__}: {e}")
|
|
118
|
+
|
|
119
|
+
raise SystemExit(1 if failed else 0)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
if __name__ == "__main__":
|
|
123
|
+
main()
|
agentsgen/actions.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from .config import ToolConfig
|
|
6
|
+
from . import config_io as _config_io
|
|
7
|
+
from . import pack_engine as _pack_engine
|
|
8
|
+
from . import patch_engine as _patch_engine
|
|
9
|
+
from .site_pack import build_site_llms_manifest
|
|
10
|
+
|
|
11
|
+
FileResult = _patch_engine.FileResult
|
|
12
|
+
load_tool_config = _config_io.load_tool_config
|
|
13
|
+
save_tool_config = _config_io.save_tool_config
|
|
14
|
+
generate_readme_snippets = _pack_engine.generate_readme_snippets
|
|
15
|
+
check_repo = _pack_engine.check_repo
|
|
16
|
+
run_core_check = _pack_engine.run_core_check
|
|
17
|
+
run_pack_check = _pack_engine.run_pack_check
|
|
18
|
+
run_snippets_check = _pack_engine.run_snippets_check
|
|
19
|
+
aggregate_check = _pack_engine.aggregate_check
|
|
20
|
+
status_repo = _pack_engine.status_repo
|
|
21
|
+
render_shared_blocks = _patch_engine.render_shared_blocks
|
|
22
|
+
render_all = _patch_engine.render_all
|
|
23
|
+
apply_config = _patch_engine.apply_config
|
|
24
|
+
init_or_update = _patch_engine.init_or_update
|
|
25
|
+
update_from_config = _patch_engine.update_from_config
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def pack_plan_specs(
|
|
29
|
+
target: Path,
|
|
30
|
+
cfg: ToolConfig,
|
|
31
|
+
*,
|
|
32
|
+
autodetect: bool,
|
|
33
|
+
site_url: str | None = None,
|
|
34
|
+
):
|
|
35
|
+
return _pack_engine.pack_plan_specs(
|
|
36
|
+
target,
|
|
37
|
+
cfg,
|
|
38
|
+
autodetect=autodetect,
|
|
39
|
+
site_url=site_url,
|
|
40
|
+
site_manifest_builder=build_site_llms_manifest,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def apply_pack(
|
|
45
|
+
target: Path,
|
|
46
|
+
cfg: ToolConfig,
|
|
47
|
+
*,
|
|
48
|
+
autodetect: bool,
|
|
49
|
+
site_url: str | None = None,
|
|
50
|
+
dry_run: bool,
|
|
51
|
+
print_diff: bool,
|
|
52
|
+
):
|
|
53
|
+
return _pack_engine.apply_pack(
|
|
54
|
+
target,
|
|
55
|
+
cfg,
|
|
56
|
+
autodetect=autodetect,
|
|
57
|
+
site_url=site_url,
|
|
58
|
+
site_manifest_builder=build_site_llms_manifest,
|
|
59
|
+
dry_run=dry_run,
|
|
60
|
+
print_diff=print_diff,
|
|
61
|
+
)
|
agentsgen/analyze.py
ADDED
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
from urllib.parse import urljoin, urlparse
|
|
11
|
+
from urllib.request import Request, urlopen
|
|
12
|
+
|
|
13
|
+
from .generated_artifacts import handle_generated_json_artifact
|
|
14
|
+
from .result_types import FileResult
|
|
15
|
+
from .validators import validate_analysis_payload
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
_TITLE_RE = re.compile(r"<title[^>]*>(.*?)</title>", re.IGNORECASE | re.DOTALL)
|
|
19
|
+
_META_DESCRIPTION_RE = re.compile(
|
|
20
|
+
r'<meta[^>]+name=["\']description["\'][^>]+content=["\'](.*?)["\']',
|
|
21
|
+
re.IGNORECASE | re.DOTALL,
|
|
22
|
+
)
|
|
23
|
+
_SCRIPT_STYLE_RE = re.compile(
|
|
24
|
+
r"<(?:script|style)[^>]*>[\s\S]*?</(?:script|style)>",
|
|
25
|
+
re.IGNORECASE,
|
|
26
|
+
)
|
|
27
|
+
_TAG_RE = re.compile(r"<[^>]+>")
|
|
28
|
+
_WHITESPACE_RE = re.compile(r"\s+")
|
|
29
|
+
_HEADING_RE = re.compile(r"<h([1-6])[^>]*>", re.IGNORECASE)
|
|
30
|
+
_SEMANTIC_TAGS = ("main", "article", "section", "nav", "header", "footer")
|
|
31
|
+
_JSON_LD_RE = re.compile(
|
|
32
|
+
r'<script[^>]+type=["\']application/ld\+json["\'][^>]*>',
|
|
33
|
+
re.IGNORECASE,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass(frozen=True)
|
|
38
|
+
class UrlFetch:
|
|
39
|
+
url: str
|
|
40
|
+
status: int
|
|
41
|
+
text: str
|
|
42
|
+
headers: dict[str, str]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _utc_now_iso() -> str:
|
|
46
|
+
return (
|
|
47
|
+
datetime.now(timezone.utc)
|
|
48
|
+
.replace(microsecond=0)
|
|
49
|
+
.isoformat()
|
|
50
|
+
.replace("+00:00", "Z")
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _normalize_url(url: str) -> str:
|
|
55
|
+
raw = (url or "").strip()
|
|
56
|
+
if not raw:
|
|
57
|
+
raise ValueError("URL is required.")
|
|
58
|
+
parsed = urlparse(raw)
|
|
59
|
+
if not parsed.scheme:
|
|
60
|
+
raw = f"https://{raw}"
|
|
61
|
+
parsed = urlparse(raw)
|
|
62
|
+
if parsed.scheme not in {"http", "https"} or not parsed.netloc:
|
|
63
|
+
raise ValueError("Provide a full http(s) URL.")
|
|
64
|
+
return raw
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _fetch_url(url: str, *, timeout: float = 10.0) -> UrlFetch:
|
|
68
|
+
request = Request(
|
|
69
|
+
url,
|
|
70
|
+
headers={
|
|
71
|
+
"User-Agent": "agentsgen/0.1 (+https://github.com/markoblogo/AGENTS.md_generator)"
|
|
72
|
+
},
|
|
73
|
+
)
|
|
74
|
+
with urlopen(request, timeout=timeout) as response:
|
|
75
|
+
body = response.read()
|
|
76
|
+
charset = response.headers.get_content_charset() or "utf-8"
|
|
77
|
+
text = body.decode(charset, errors="replace")
|
|
78
|
+
headers = {str(k).lower(): str(v) for k, v in response.headers.items()}
|
|
79
|
+
return UrlFetch(
|
|
80
|
+
url=response.geturl(),
|
|
81
|
+
status=getattr(response, "status", 200) or 200,
|
|
82
|
+
text=text,
|
|
83
|
+
headers=headers,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _probe_url(url: str, *, timeout: float = 10.0) -> bool:
|
|
88
|
+
try:
|
|
89
|
+
_fetch_url(url, timeout=timeout)
|
|
90
|
+
return True
|
|
91
|
+
except Exception:
|
|
92
|
+
return False
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _extract_title(html: str) -> str:
|
|
96
|
+
match = _TITLE_RE.search(html)
|
|
97
|
+
if not match:
|
|
98
|
+
return ""
|
|
99
|
+
return _WHITESPACE_RE.sub(" ", match.group(1)).strip()
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _extract_meta_description(html: str) -> str:
|
|
103
|
+
match = _META_DESCRIPTION_RE.search(html)
|
|
104
|
+
if not match:
|
|
105
|
+
return ""
|
|
106
|
+
return _WHITESPACE_RE.sub(" ", match.group(1)).strip()
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _extract_text_content(html: str) -> str:
|
|
110
|
+
text = _SCRIPT_STYLE_RE.sub(" ", html)
|
|
111
|
+
text = _TAG_RE.sub(" ", text)
|
|
112
|
+
return _WHITESPACE_RE.sub(" ", text).strip()
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _semantic_tag_count(html: str) -> int:
|
|
116
|
+
count = 0
|
|
117
|
+
for tag in _SEMANTIC_TAGS:
|
|
118
|
+
count += len(re.findall(rf"<{tag}\b", html, flags=re.IGNORECASE))
|
|
119
|
+
return count
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _heading_count(html: str) -> int:
|
|
123
|
+
return len(_HEADING_RE.findall(html))
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _has_json_ld(html: str) -> bool:
|
|
127
|
+
return bool(_JSON_LD_RE.search(html))
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _heuristic_factors(
|
|
131
|
+
html: str, text_content: str, *, has_llms_txt: bool, has_sitemap: bool
|
|
132
|
+
) -> tuple[dict[str, int], dict[str, Any]]:
|
|
133
|
+
title = _extract_title(html)
|
|
134
|
+
description = _extract_meta_description(html)
|
|
135
|
+
headings = _heading_count(html)
|
|
136
|
+
semantic_tags = _semantic_tag_count(html)
|
|
137
|
+
json_ld = _has_json_ld(html)
|
|
138
|
+
word_count = len([part for part in text_content.split(" ") if part])
|
|
139
|
+
|
|
140
|
+
factors: dict[str, int] = {}
|
|
141
|
+
evidence: dict[str, Any] = {
|
|
142
|
+
"title_present": bool(title),
|
|
143
|
+
"meta_description_present": bool(description),
|
|
144
|
+
"headings_count": headings,
|
|
145
|
+
"semantic_tags_count": semantic_tags,
|
|
146
|
+
"json_ld_present": json_ld,
|
|
147
|
+
"word_count": word_count,
|
|
148
|
+
"llms_txt_present": has_llms_txt,
|
|
149
|
+
"sitemap_present": has_sitemap,
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
factors["llms_txt"] = 30 if has_llms_txt else 0
|
|
153
|
+
factors["sitemap"] = 10 if has_sitemap else 0
|
|
154
|
+
|
|
155
|
+
if title and description:
|
|
156
|
+
factors["meta_tags"] = 15
|
|
157
|
+
elif title or description:
|
|
158
|
+
factors["meta_tags"] = 8
|
|
159
|
+
else:
|
|
160
|
+
factors["meta_tags"] = 0
|
|
161
|
+
|
|
162
|
+
if headings >= 3:
|
|
163
|
+
factors["heading_structure"] = 15
|
|
164
|
+
elif headings >= 1:
|
|
165
|
+
factors["heading_structure"] = 8
|
|
166
|
+
else:
|
|
167
|
+
factors["heading_structure"] = 0
|
|
168
|
+
|
|
169
|
+
if semantic_tags >= 3:
|
|
170
|
+
factors["semantic_html"] = 10
|
|
171
|
+
elif semantic_tags >= 1:
|
|
172
|
+
factors["semantic_html"] = 5
|
|
173
|
+
else:
|
|
174
|
+
factors["semantic_html"] = 0
|
|
175
|
+
|
|
176
|
+
factors["structured_data"] = 10 if json_ld else 0
|
|
177
|
+
|
|
178
|
+
if word_count >= 300:
|
|
179
|
+
factors["content_clarity"] = 10
|
|
180
|
+
elif word_count >= 100:
|
|
181
|
+
factors["content_clarity"] = 5
|
|
182
|
+
else:
|
|
183
|
+
factors["content_clarity"] = 0
|
|
184
|
+
|
|
185
|
+
return factors, evidence
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _visibility(score: int) -> str:
|
|
189
|
+
if score > 70:
|
|
190
|
+
return "high"
|
|
191
|
+
if score > 40:
|
|
192
|
+
return "medium"
|
|
193
|
+
return "low"
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _recommendations(factors: dict[str, int], evidence: dict[str, Any]) -> list[str]:
|
|
197
|
+
recommendations: list[str] = []
|
|
198
|
+
if not evidence.get("llms_txt_present"):
|
|
199
|
+
recommendations.append(
|
|
200
|
+
"Create and publish an llms.txt file to improve AI discoverability."
|
|
201
|
+
)
|
|
202
|
+
if not evidence.get("sitemap_present"):
|
|
203
|
+
recommendations.append(
|
|
204
|
+
"Publish a sitemap.xml file for crawlable site structure."
|
|
205
|
+
)
|
|
206
|
+
if factors.get("meta_tags", 0) < 15:
|
|
207
|
+
recommendations.append("Add both a clear HTML title and meta description.")
|
|
208
|
+
if factors.get("heading_structure", 0) < 15:
|
|
209
|
+
recommendations.append(
|
|
210
|
+
"Strengthen heading structure with a clear H1 and section headings."
|
|
211
|
+
)
|
|
212
|
+
if factors.get("semantic_html", 0) < 10:
|
|
213
|
+
recommendations.append(
|
|
214
|
+
"Use semantic HTML landmarks such as main, article, section, and nav."
|
|
215
|
+
)
|
|
216
|
+
if factors.get("structured_data", 0) == 0:
|
|
217
|
+
recommendations.append(
|
|
218
|
+
"Add schema.org JSON-LD where it reflects real page entities."
|
|
219
|
+
)
|
|
220
|
+
if factors.get("content_clarity", 0) < 10:
|
|
221
|
+
recommendations.append(
|
|
222
|
+
"Expose more crawlable text content instead of relying on sparse hero-only copy."
|
|
223
|
+
)
|
|
224
|
+
if not recommendations:
|
|
225
|
+
recommendations.append(
|
|
226
|
+
"This site already shows strong AI-oriented discoverability signals."
|
|
227
|
+
)
|
|
228
|
+
return recommendations
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def _summary(score: int, visibility: str) -> str:
|
|
232
|
+
if visibility == "high":
|
|
233
|
+
return f"Strong baseline AI discoverability signals detected ({score}/100)."
|
|
234
|
+
if visibility == "medium":
|
|
235
|
+
return f"Mixed AI discoverability signals detected ({score}/100)."
|
|
236
|
+
return f"Weak AI discoverability signals detected ({score}/100)."
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def _parse_json_object(raw_text: str) -> dict[str, Any]:
|
|
240
|
+
try:
|
|
241
|
+
parsed = json.loads(raw_text)
|
|
242
|
+
except json.JSONDecodeError:
|
|
243
|
+
match = re.search(r"\{[\s\S]*\}", raw_text)
|
|
244
|
+
if not match:
|
|
245
|
+
raise ValueError("OpenAI response did not contain valid JSON.")
|
|
246
|
+
parsed = json.loads(match.group(0))
|
|
247
|
+
if not isinstance(parsed, dict):
|
|
248
|
+
raise ValueError("OpenAI response must be a JSON object.")
|
|
249
|
+
return parsed
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def _openai_chat_json(
|
|
253
|
+
*, system_prompt: str, user_prompt: str, temperature: float = 0.0
|
|
254
|
+
) -> dict[str, Any]:
|
|
255
|
+
api_key = os.getenv("OPENAI_API_KEY", "").strip()
|
|
256
|
+
if not api_key:
|
|
257
|
+
raise ValueError("OPENAI_API_KEY is required for this command.")
|
|
258
|
+
|
|
259
|
+
payload = {
|
|
260
|
+
"model": "gpt-4o-mini",
|
|
261
|
+
"messages": [
|
|
262
|
+
{"role": "system", "content": system_prompt},
|
|
263
|
+
{"role": "user", "content": user_prompt},
|
|
264
|
+
],
|
|
265
|
+
"temperature": temperature,
|
|
266
|
+
}
|
|
267
|
+
request = Request(
|
|
268
|
+
"https://api.openai.com/v1/chat/completions",
|
|
269
|
+
data=json.dumps(payload).encode("utf-8"),
|
|
270
|
+
headers={
|
|
271
|
+
"Content-Type": "application/json",
|
|
272
|
+
"Authorization": f"Bearer {api_key}",
|
|
273
|
+
},
|
|
274
|
+
method="POST",
|
|
275
|
+
)
|
|
276
|
+
with urlopen(request, timeout=30) as response:
|
|
277
|
+
raw = json.loads(response.read().decode("utf-8"))
|
|
278
|
+
content = raw["choices"][0]["message"]["content"]
|
|
279
|
+
return _parse_json_object(content)
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def _openai_review(url: str, html: str, text_content: str) -> dict[str, Any]:
|
|
283
|
+
del html
|
|
284
|
+
prompt = (
|
|
285
|
+
"Analyze this website for AI discoverability. "
|
|
286
|
+
"Return only valid JSON with keys summary, reasons, recommendations. "
|
|
287
|
+
"Keep reasons and recommendations short.\n\n"
|
|
288
|
+
f"URL: {url}\n"
|
|
289
|
+
f"Content sample:\n{text_content[:2000]}"
|
|
290
|
+
)
|
|
291
|
+
return _openai_chat_json(
|
|
292
|
+
system_prompt="You are an expert reviewer. Return only valid JSON.",
|
|
293
|
+
user_prompt=prompt,
|
|
294
|
+
temperature=0.0,
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def _stable_payload_without_timestamp(payload: dict[str, Any]) -> str:
|
|
299
|
+
clone = json.loads(json.dumps(payload))
|
|
300
|
+
clone["generated_at"] = ""
|
|
301
|
+
return json.dumps(clone, sort_keys=True, separators=(",", ":"))
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def build_analysis_payload(url: str, *, use_ai: bool = False) -> dict[str, Any]:
|
|
305
|
+
normalized_url = _normalize_url(url)
|
|
306
|
+
fetch = _fetch_url(normalized_url)
|
|
307
|
+
html = fetch.text
|
|
308
|
+
text_content = _extract_text_content(html)
|
|
309
|
+
has_llms_txt = _probe_url(urljoin(fetch.url.rstrip("/") + "/", "llms.txt"))
|
|
310
|
+
has_sitemap = _probe_url(urljoin(fetch.url.rstrip("/") + "/", "sitemap.xml"))
|
|
311
|
+
factors, evidence = _heuristic_factors(
|
|
312
|
+
html, text_content, has_llms_txt=has_llms_txt, has_sitemap=has_sitemap
|
|
313
|
+
)
|
|
314
|
+
score = sum(factors.values())
|
|
315
|
+
visibility = _visibility(score)
|
|
316
|
+
payload: dict[str, Any] = {
|
|
317
|
+
"version": 1,
|
|
318
|
+
"generated_by": "agentsgen",
|
|
319
|
+
"generated_at": "",
|
|
320
|
+
"url": normalized_url,
|
|
321
|
+
"final_url": fetch.url,
|
|
322
|
+
"mode": "ai-assisted" if use_ai else "heuristic",
|
|
323
|
+
"score": score,
|
|
324
|
+
"visibility": visibility,
|
|
325
|
+
"summary": _summary(score, visibility),
|
|
326
|
+
"factors": factors,
|
|
327
|
+
"evidence": evidence,
|
|
328
|
+
"recommendations": _recommendations(factors, evidence),
|
|
329
|
+
}
|
|
330
|
+
if use_ai:
|
|
331
|
+
payload["ai_review"] = _openai_review(fetch.url, html, text_content)
|
|
332
|
+
payload["generated_at"] = _utc_now_iso()
|
|
333
|
+
validate_analysis_payload(payload)
|
|
334
|
+
return payload
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def apply_analysis(
|
|
338
|
+
root: Path,
|
|
339
|
+
*,
|
|
340
|
+
url: str,
|
|
341
|
+
output_path: Path,
|
|
342
|
+
use_ai: bool = False,
|
|
343
|
+
dry_run: bool = False,
|
|
344
|
+
) -> tuple[list[FileResult], dict[str, Any]]:
|
|
345
|
+
del (
|
|
346
|
+
root
|
|
347
|
+
) # Output path is explicit; keep signature aligned with other apply_* helpers.
|
|
348
|
+
payload = build_analysis_payload(url, use_ai=use_ai)
|
|
349
|
+
|
|
350
|
+
if output_path.exists():
|
|
351
|
+
try:
|
|
352
|
+
existing = json.loads(output_path.read_text(encoding="utf-8"))
|
|
353
|
+
except Exception:
|
|
354
|
+
existing = None
|
|
355
|
+
if (
|
|
356
|
+
isinstance(existing, dict)
|
|
357
|
+
and str(existing.get("generated_by", "")) == "agentsgen"
|
|
358
|
+
):
|
|
359
|
+
if _stable_payload_without_timestamp(
|
|
360
|
+
existing
|
|
361
|
+
) == _stable_payload_without_timestamp(payload):
|
|
362
|
+
payload["generated_at"] = str(existing.get("generated_at", "") or "")
|
|
363
|
+
|
|
364
|
+
result = handle_generated_json_artifact(
|
|
365
|
+
output_path,
|
|
366
|
+
json.dumps(payload, indent=2) + "\n",
|
|
367
|
+
dry_run=dry_run,
|
|
368
|
+
print_diff=False,
|
|
369
|
+
)
|
|
370
|
+
return [result], payload
|
agentsgen/cli.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
|
|
7
|
+
from .cli_core import register_core_commands
|
|
8
|
+
from .cli_extra import register_extra_commands
|
|
9
|
+
from .cli_pack import register_pack_commands
|
|
10
|
+
from .cli_task import register_task_commands
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
app = typer.Typer(
|
|
14
|
+
add_completion=False,
|
|
15
|
+
help="Generate and safely update AGENTS.md/RUNBOOK.md",
|
|
16
|
+
invoke_without_command=True,
|
|
17
|
+
no_args_is_help=True,
|
|
18
|
+
)
|
|
19
|
+
task_app = typer.Typer(
|
|
20
|
+
add_completion=False,
|
|
21
|
+
help="Manage proof-loop task artifacts under docs/ai/tasks/.",
|
|
22
|
+
)
|
|
23
|
+
app.add_typer(task_app, name="task")
|
|
24
|
+
register_core_commands(app)
|
|
25
|
+
register_pack_commands(app)
|
|
26
|
+
register_task_commands(task_app)
|
|
27
|
+
register_extra_commands(app)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def main(argv: list[str] | None = None) -> None:
|
|
31
|
+
app(prog_name="agentsgen")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
if __name__ == "__main__":
|
|
35
|
+
main(sys.argv[1:])
|