app-auditor 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- app_auditor-0.1.0/.github/workflows/ci.yml +31 -0
- app_auditor-0.1.0/.gitignore +13 -0
- app_auditor-0.1.0/PKG-INFO +162 -0
- app_auditor-0.1.0/README.md +146 -0
- app_auditor-0.1.0/app_auditor/__init__.py +10 -0
- app_auditor-0.1.0/app_auditor/cli.py +80 -0
- app_auditor-0.1.0/app_auditor/github_auditor.py +173 -0
- app_auditor-0.1.0/app_auditor/website_auditor.py +115 -0
- app_auditor-0.1.0/examples/supabase-audit.txt +8 -0
- app_auditor-0.1.0/examples/vercel-audit.txt +7 -0
- app_auditor-0.1.0/pyproject.toml +31 -0
- app_auditor-0.1.0/src/app_auditor/__init__.py +10 -0
- app_auditor-0.1.0/src/app_auditor/cli.py +80 -0
- app_auditor-0.1.0/src/app_auditor/github_auditor.py +173 -0
- app_auditor-0.1.0/src/app_auditor/website_auditor.py +114 -0
- app_auditor-0.1.0/tests/__init__.py +0 -0
- app_auditor-0.1.0/tests/test_auditors.py +111 -0
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
test:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
strategy:
|
|
12
|
+
matrix:
|
|
13
|
+
python-version: ["3.10", "3.11", "3.12"]
|
|
14
|
+
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
|
|
18
|
+
- uses: actions/setup-python@v5
|
|
19
|
+
with:
|
|
20
|
+
python-version: ${{ matrix.python-version }}
|
|
21
|
+
|
|
22
|
+
- name: Install package
|
|
23
|
+
run: pip install -e ".[dev]"
|
|
24
|
+
|
|
25
|
+
- name: Run tests
|
|
26
|
+
run: pytest tests/ -v
|
|
27
|
+
|
|
28
|
+
- name: Lint
|
|
29
|
+
run: |
|
|
30
|
+
pip install ruff
|
|
31
|
+
ruff check src/
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: app-auditor
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Detect tech stack and production-readiness issues from a live URL or GitHub repo. Zero config, one command.
|
|
5
|
+
Project-URL: Repository, https://github.com/nometria/app-auditor
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: audit,cli,devtools,github,production,stack-detection
|
|
8
|
+
Requires-Python: >=3.10
|
|
9
|
+
Requires-Dist: beautifulsoup4>=4.12.0
|
|
10
|
+
Requires-Dist: requests>=2.31.0
|
|
11
|
+
Provides-Extra: dev
|
|
12
|
+
Requires-Dist: pytest-cov; extra == 'dev'
|
|
13
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
14
|
+
Requires-Dist: responses>=0.25.0; extra == 'dev'
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
|
|
17
|
+
# app-auditor
|
|
18
|
+
|
|
19
|
+
> Detect tech stack and surface production-readiness issues from any live URL or GitHub repo. One command, zero config.
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## Quick start
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
# Clone and install
|
|
27
|
+
git clone https://github.com/ownmy-app/app-auditor
|
|
28
|
+
cd app-auditor
|
|
29
|
+
pip install -e .
|
|
30
|
+
|
|
31
|
+
# Audit a live URL
|
|
32
|
+
app-audit url https://vercel.com
|
|
33
|
+
|
|
34
|
+
# Audit a GitHub repo
|
|
35
|
+
app-audit repo myorg/myrepo
|
|
36
|
+
|
|
37
|
+
# JSON output
|
|
38
|
+
app-audit url https://myapp.com --format json
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## Usage
|
|
44
|
+
|
|
45
|
+
### Audit a live URL
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
app-audit url https://myapp.com
|
|
49
|
+
|
|
50
|
+
# JSON output
|
|
51
|
+
app-audit url https://myapp.com --format json
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Output:
|
|
55
|
+
```
|
|
56
|
+
URL: https://myapp.com
|
|
57
|
+
Title: My App
|
|
58
|
+
Stack: nextjs, react, supabase
|
|
59
|
+
|
|
60
|
+
Risks:
|
|
61
|
+
• Supabase client: verify auth flow, RLS, and env key exposure in client.
|
|
62
|
+
• Hosting on Vercel/Netlify: ensure env vars and serverless limits are documented.
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
### Audit a GitHub repo
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
app-audit repo github.com/myorg/myrepo
|
|
69
|
+
# or shorthand
|
|
70
|
+
app-audit repo myorg/myrepo
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
Output:
|
|
74
|
+
```
|
|
75
|
+
Repo: myorg/myrepo
|
|
76
|
+
Detected: vite, react, supabase
|
|
77
|
+
|
|
78
|
+
Missing:
|
|
79
|
+
⚠ No Dockerfile found — containerization recommended for production.
|
|
80
|
+
⚠ No GitHub Actions workflows — consider adding CI/CD.
|
|
81
|
+
|
|
82
|
+
Suggestions:
|
|
83
|
+
→ Vite SPA: add Dockerfile and ensure server rewrite rules for SPA routing.
|
|
84
|
+
→ Supabase: verify RLS, auth flow, and env key exposure in client.
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### Set GitHub token to avoid rate limiting
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
export GITHUB_TOKEN=ghp_...
|
|
91
|
+
app-audit repo myorg/myrepo
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
---
|
|
95
|
+
|
|
96
|
+
## Use as a library
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
from app_auditor import audit_url, analyze_repo_url
|
|
100
|
+
|
|
101
|
+
# Website audit
|
|
102
|
+
result = audit_url("https://myapp.com")
|
|
103
|
+
print(result["detected_stack"]) # {"nextjs": True, "react": True, ...}
|
|
104
|
+
print(result["risks"]) # ["Supabase client: verify RLS...", ...]
|
|
105
|
+
|
|
106
|
+
# GitHub repo audit
|
|
107
|
+
result = analyze_repo_url("https://github.com/vercel/next.js")
|
|
108
|
+
print(result["detected"]) # {"nextjs": True, "docker": False, ...}
|
|
109
|
+
print(result["missing"]) # ["No Dockerfile found..."]
|
|
110
|
+
print(result["suggestions"]) # ["Next.js: check output mode..."]
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
## Detected stack signals
|
|
116
|
+
|
|
117
|
+
| Signal | Detection method |
|
|
118
|
+
|--------|-----------------|
|
|
119
|
+
| Next.js | `__next` in HTML, `_next/` paths, `next.js` in server header |
|
|
120
|
+
| Vite | `/assets/` + `modulepreload` in HTML |
|
|
121
|
+
| React | `react` / `reactdom` in HTML or `package.json` |
|
|
122
|
+
| Vue | `v-bind` or `vue` in HTML |
|
|
123
|
+
| Supabase | `supabase` in HTML or repo file paths |
|
|
124
|
+
| Vercel | `vercel` in HTML, server header, or `vercel.json` |
|
|
125
|
+
| Netlify | `netlify` in HTML, header, or config files |
|
|
126
|
+
| Docker | `Dockerfile` in repo tree |
|
|
127
|
+
| GitHub Actions | `.github/workflows/` in repo tree |
|
|
128
|
+
|
|
129
|
+
---
|
|
130
|
+
|
|
131
|
+
## Commercial viability
|
|
132
|
+
|
|
133
|
+
- Free tier: CLI and library (open source)
|
|
134
|
+
- Paid: API with bulk auditing, team dashboards, Slack notifications
|
|
135
|
+
- Inbound funnel: developers debugging production issues → upgrade path to managed services
|
|
136
|
+
|
|
137
|
+
---
|
|
138
|
+
|
|
139
|
+
## Example output
|
|
140
|
+
|
|
141
|
+
### `app-audit url https://vercel.com`
|
|
142
|
+
|
|
143
|
+
```
|
|
144
|
+
URL: https://vercel.com/
|
|
145
|
+
Title: Vercel: Build and deploy the best web experiences with the AI Cloud
|
|
146
|
+
Stack: nextjs, react, vercel
|
|
147
|
+
|
|
148
|
+
Risks:
|
|
149
|
+
• Hosting on Vercel/Netlify: ensure env vars and serverless limits are documented.
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
### `app-audit url https://supabase.com`
|
|
153
|
+
|
|
154
|
+
```
|
|
155
|
+
URL: https://supabase.com/
|
|
156
|
+
Title: Supabase | The Postgres Development Platform.
|
|
157
|
+
Stack: nextjs, react, vue, supabase, vercel
|
|
158
|
+
|
|
159
|
+
Risks:
|
|
160
|
+
• Supabase client: verify auth flow, RLS, and env key exposure in client.
|
|
161
|
+
• Hosting on Vercel/Netlify: ensure env vars and serverless limits are documented.
|
|
162
|
+
```
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
# app-auditor
|
|
2
|
+
|
|
3
|
+
> Detect tech stack and surface production-readiness issues from any live URL or GitHub repo. One command, zero config.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Quick start
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
# Clone and install
|
|
11
|
+
git clone https://github.com/ownmy-app/app-auditor
|
|
12
|
+
cd app-auditor
|
|
13
|
+
pip install -e .
|
|
14
|
+
|
|
15
|
+
# Audit a live URL
|
|
16
|
+
app-audit url https://vercel.com
|
|
17
|
+
|
|
18
|
+
# Audit a GitHub repo
|
|
19
|
+
app-audit repo myorg/myrepo
|
|
20
|
+
|
|
21
|
+
# JSON output
|
|
22
|
+
app-audit url https://myapp.com --format json
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Usage
|
|
28
|
+
|
|
29
|
+
### Audit a live URL
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
app-audit url https://myapp.com
|
|
33
|
+
|
|
34
|
+
# JSON output
|
|
35
|
+
app-audit url https://myapp.com --format json
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Output:
|
|
39
|
+
```
|
|
40
|
+
URL: https://myapp.com
|
|
41
|
+
Title: My App
|
|
42
|
+
Stack: nextjs, react, supabase
|
|
43
|
+
|
|
44
|
+
Risks:
|
|
45
|
+
• Supabase client: verify auth flow, RLS, and env key exposure in client.
|
|
46
|
+
• Hosting on Vercel/Netlify: ensure env vars and serverless limits are documented.
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### Audit a GitHub repo
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
app-audit repo github.com/myorg/myrepo
|
|
53
|
+
# or shorthand
|
|
54
|
+
app-audit repo myorg/myrepo
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Output:
|
|
58
|
+
```
|
|
59
|
+
Repo: myorg/myrepo
|
|
60
|
+
Detected: vite, react, supabase
|
|
61
|
+
|
|
62
|
+
Missing:
|
|
63
|
+
⚠ No Dockerfile found — containerization recommended for production.
|
|
64
|
+
⚠ No GitHub Actions workflows — consider adding CI/CD.
|
|
65
|
+
|
|
66
|
+
Suggestions:
|
|
67
|
+
→ Vite SPA: add Dockerfile and ensure server rewrite rules for SPA routing.
|
|
68
|
+
→ Supabase: verify RLS, auth flow, and env key exposure in client.
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
### Set GitHub token to avoid rate limiting
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
export GITHUB_TOKEN=ghp_...
|
|
75
|
+
app-audit repo myorg/myrepo
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
---
|
|
79
|
+
|
|
80
|
+
## Use as a library
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
from app_auditor import audit_url, analyze_repo_url
|
|
84
|
+
|
|
85
|
+
# Website audit
|
|
86
|
+
result = audit_url("https://myapp.com")
|
|
87
|
+
print(result["detected_stack"]) # {"nextjs": True, "react": True, ...}
|
|
88
|
+
print(result["risks"]) # ["Supabase client: verify RLS...", ...]
|
|
89
|
+
|
|
90
|
+
# GitHub repo audit
|
|
91
|
+
result = analyze_repo_url("https://github.com/vercel/next.js")
|
|
92
|
+
print(result["detected"]) # {"nextjs": True, "docker": False, ...}
|
|
93
|
+
print(result["missing"]) # ["No Dockerfile found..."]
|
|
94
|
+
print(result["suggestions"]) # ["Next.js: check output mode..."]
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
---
|
|
98
|
+
|
|
99
|
+
## Detected stack signals
|
|
100
|
+
|
|
101
|
+
| Signal | Detection method |
|
|
102
|
+
|--------|-----------------|
|
|
103
|
+
| Next.js | `__next` in HTML, `_next/` paths, `next.js` in server header |
|
|
104
|
+
| Vite | `/assets/` + `modulepreload` in HTML |
|
|
105
|
+
| React | `react` / `reactdom` in HTML or `package.json` |
|
|
106
|
+
| Vue | `v-bind` or `vue` in HTML |
|
|
107
|
+
| Supabase | `supabase` in HTML or repo file paths |
|
|
108
|
+
| Vercel | `vercel` in HTML, server header, or `vercel.json` |
|
|
109
|
+
| Netlify | `netlify` in HTML, header, or config files |
|
|
110
|
+
| Docker | `Dockerfile` in repo tree |
|
|
111
|
+
| GitHub Actions | `.github/workflows/` in repo tree |
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
## Commercial viability
|
|
116
|
+
|
|
117
|
+
- Free tier: CLI and library (open source)
|
|
118
|
+
- Paid: API with bulk auditing, team dashboards, Slack notifications
|
|
119
|
+
- Inbound funnel: developers debugging production issues → upgrade path to managed services
|
|
120
|
+
|
|
121
|
+
---
|
|
122
|
+
|
|
123
|
+
## Example output
|
|
124
|
+
|
|
125
|
+
### `app-audit url https://vercel.com`
|
|
126
|
+
|
|
127
|
+
```
|
|
128
|
+
URL: https://vercel.com/
|
|
129
|
+
Title: Vercel: Build and deploy the best web experiences with the AI Cloud
|
|
130
|
+
Stack: nextjs, react, vercel
|
|
131
|
+
|
|
132
|
+
Risks:
|
|
133
|
+
• Hosting on Vercel/Netlify: ensure env vars and serverless limits are documented.
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
### `app-audit url https://supabase.com`
|
|
137
|
+
|
|
138
|
+
```
|
|
139
|
+
URL: https://supabase.com/
|
|
140
|
+
Title: Supabase | The Postgres Development Platform.
|
|
141
|
+
Stack: nextjs, react, vue, supabase, vercel
|
|
142
|
+
|
|
143
|
+
Risks:
|
|
144
|
+
• Supabase client: verify auth flow, RLS, and env key exposure in client.
|
|
145
|
+
• Hosting on Vercel/Netlify: ensure env vars and serverless limits are documented.
|
|
146
|
+
```
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from .website_auditor import audit_url, detect_stack_from_html
|
|
2
|
+
from .github_auditor import analyze_repo_url, analyze_repo, parse_repo_url
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"audit_url",
|
|
6
|
+
"detect_stack_from_html",
|
|
7
|
+
"analyze_repo_url",
|
|
8
|
+
"analyze_repo",
|
|
9
|
+
"parse_repo_url",
|
|
10
|
+
]
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
app-auditor CLI
|
|
4
|
+
|
|
5
|
+
Commands:
|
|
6
|
+
app-audit url <url> Audit a website URL (stack detection + production risks)
|
|
7
|
+
app-audit repo <url> Audit a GitHub repo (structure, missing files, suggestions)
|
|
8
|
+
"""
|
|
9
|
+
import argparse
|
|
10
|
+
import json
|
|
11
|
+
import logging
|
|
12
|
+
import sys
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def main():
|
|
16
|
+
parser = argparse.ArgumentParser(
|
|
17
|
+
prog="app-audit",
|
|
18
|
+
description="Detect tech stack and surface production-readiness issues.",
|
|
19
|
+
)
|
|
20
|
+
parser.add_argument(
|
|
21
|
+
"command",
|
|
22
|
+
choices=["url", "repo"],
|
|
23
|
+
help="What to audit: a live URL or a GitHub repo",
|
|
24
|
+
)
|
|
25
|
+
parser.add_argument("target", help="URL or GitHub repo (owner/repo or full URL)")
|
|
26
|
+
parser.add_argument(
|
|
27
|
+
"--format",
|
|
28
|
+
choices=["json", "text"],
|
|
29
|
+
default="text",
|
|
30
|
+
help="Output format (default: text)",
|
|
31
|
+
)
|
|
32
|
+
parser.add_argument("--verbose", "-v", action="store_true", help="Debug logging")
|
|
33
|
+
|
|
34
|
+
args = parser.parse_args()
|
|
35
|
+
|
|
36
|
+
if args.verbose:
|
|
37
|
+
logging.basicConfig(level=logging.DEBUG)
|
|
38
|
+
else:
|
|
39
|
+
logging.basicConfig(level=logging.WARNING)
|
|
40
|
+
|
|
41
|
+
if args.command == "url":
|
|
42
|
+
from .website_auditor import audit_url
|
|
43
|
+
result = audit_url(args.target)
|
|
44
|
+
else:
|
|
45
|
+
from .github_auditor import analyze_repo_url
|
|
46
|
+
result = analyze_repo_url(args.target)
|
|
47
|
+
|
|
48
|
+
if args.format == "json":
|
|
49
|
+
print(json.dumps(result, indent=2))
|
|
50
|
+
return
|
|
51
|
+
|
|
52
|
+
# Human-readable text output
|
|
53
|
+
if not result.get("ok"):
|
|
54
|
+
print(f"ERROR: {result.get('error', 'Unknown error')}", file=sys.stderr)
|
|
55
|
+
sys.exit(1)
|
|
56
|
+
|
|
57
|
+
if args.command == "url":
|
|
58
|
+
print(f"\nURL: {result['url']}")
|
|
59
|
+
print(f"Title: {result.get('title') or '(none)'}")
|
|
60
|
+
stack = [k for k, v in result.get("detected_stack", {}).items() if v]
|
|
61
|
+
print(f"Stack: {', '.join(stack) or 'unknown'}")
|
|
62
|
+
print("\nRisks:")
|
|
63
|
+
for r in result.get("risks", []):
|
|
64
|
+
print(f" • {r}")
|
|
65
|
+
else:
|
|
66
|
+
print(f"\nRepo: {result.get('owner')}/{result.get('repo')}")
|
|
67
|
+
detected = [k for k, v in result.get("detected", {}).items() if v]
|
|
68
|
+
print(f"Detected: {', '.join(detected) or 'nothing notable'}")
|
|
69
|
+
if result.get("missing"):
|
|
70
|
+
print("\nMissing:")
|
|
71
|
+
for m in result["missing"]:
|
|
72
|
+
print(f" ⚠ {m}")
|
|
73
|
+
if result.get("suggestions"):
|
|
74
|
+
print("\nSuggestions:")
|
|
75
|
+
for s in result["suggestions"]:
|
|
76
|
+
print(f" → {s}")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
if __name__ == "__main__":
|
|
80
|
+
main()
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
"""
|
|
2
|
+
GitHub repo auditor: analyze a public repo's structure and report stack, missing files, and suggestions.
|
|
3
|
+
"""
|
|
4
|
+
import base64
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
import re
|
|
8
|
+
from typing import Any, Dict, List, Optional
|
|
9
|
+
|
|
10
|
+
import requests
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
# Timeouts and retries
|
|
15
|
+
GITHUB_TIMEOUT = 25
|
|
16
|
+
GITHUB_HEADERS: Optional[Dict[str, str]] = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _github_headers() -> Dict[str, str]:
|
|
20
|
+
global GITHUB_HEADERS
|
|
21
|
+
if GITHUB_HEADERS is not None:
|
|
22
|
+
return GITHUB_HEADERS
|
|
23
|
+
import os
|
|
24
|
+
token = os.getenv("GITHUB_TOKEN")
|
|
25
|
+
h = {"Accept": "application/vnd.github+json"}
|
|
26
|
+
if token:
|
|
27
|
+
h["Authorization"] = f"Bearer {token}"
|
|
28
|
+
GITHUB_HEADERS = h
|
|
29
|
+
return h
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def parse_repo_url(url: str) -> Optional[tuple[str, str]]:
|
|
33
|
+
"""Return (owner, repo) or None if invalid."""
|
|
34
|
+
url = url.strip().rstrip("/")
|
|
35
|
+
# https://github.com/owner/repo or github.com/owner/repo
|
|
36
|
+
m = re.match(r"(?:https?://)?(?:www\.)?github\.com/([^/]+)/([^/]+?)(?:\.git)?/?$", url, re.I)
|
|
37
|
+
if m:
|
|
38
|
+
return m.group(1), m.group(2)
|
|
39
|
+
# owner/repo
|
|
40
|
+
if "/" in url and " " not in url and len(url) < 100:
|
|
41
|
+
parts = url.split("/", 1)
|
|
42
|
+
if len(parts) == 2 and parts[0] and parts[1]:
|
|
43
|
+
return parts[0], parts[1]
|
|
44
|
+
return None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def get_repo_tree(owner: str, repo: str) -> List[Dict[str, Any]]:
|
|
48
|
+
"""Fetch repo file tree (recursive). Raises on HTTP error."""
|
|
49
|
+
url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/HEAD?recursive=1"
|
|
50
|
+
r = requests.get(url, headers=_github_headers(), timeout=GITHUB_TIMEOUT)
|
|
51
|
+
r.raise_for_status()
|
|
52
|
+
data = r.json()
|
|
53
|
+
return data.get("tree") or []
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def get_repo_info(owner: str, repo: str) -> Dict[str, Any]:
|
|
57
|
+
"""Fetch repo metadata (description, default branch, etc.)."""
|
|
58
|
+
url = f"https://api.github.com/repos/{owner}/{repo}"
|
|
59
|
+
r = requests.get(url, headers=_github_headers(), timeout=GITHUB_TIMEOUT)
|
|
60
|
+
if r.status_code != 200:
|
|
61
|
+
return {}
|
|
62
|
+
return r.json()
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def get_readme(owner: str, repo: str) -> Optional[str]:
|
|
66
|
+
"""Fetch README content if present."""
|
|
67
|
+
url = f"https://api.github.com/repos/{owner}/{repo}/readme"
|
|
68
|
+
r = requests.get(url, headers={**_github_headers(), "Accept": "application/vnd.github.raw"}, timeout=GITHUB_TIMEOUT)
|
|
69
|
+
if r.status_code != 200:
|
|
70
|
+
return None
|
|
71
|
+
return r.text[:8000]
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def analyze_repo(owner: str, repo: str) -> Dict[str, Any]:
|
|
75
|
+
"""
|
|
76
|
+
Analyze repo structure and return detected stack, missing items, and migration suggestions.
|
|
77
|
+
"""
|
|
78
|
+
try:
|
|
79
|
+
tree = get_repo_tree(owner, repo)
|
|
80
|
+
except requests.RequestException as e:
|
|
81
|
+
logger.warning("get_repo_tree failed: %s", e)
|
|
82
|
+
return {
|
|
83
|
+
"ok": False,
|
|
84
|
+
"error": str(e),
|
|
85
|
+
"detected": {},
|
|
86
|
+
"missing": [],
|
|
87
|
+
"suggestions": ["Could not fetch repo; check URL and token."],
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
paths = [item["path"] for item in tree]
|
|
91
|
+
detected = {
|
|
92
|
+
"nextjs": any(
|
|
93
|
+
p in paths for p in ("next.config.js", "next.config.mjs", "next.config.ts")
|
|
94
|
+
),
|
|
95
|
+
"vite": any(
|
|
96
|
+
p in paths for p in ("vite.config.ts", "vite.config.js", "vite.config.mts")
|
|
97
|
+
),
|
|
98
|
+
"react": False,
|
|
99
|
+
"docker": any(p in paths for p in ("Dockerfile", "Dockerfile.dev")),
|
|
100
|
+
"github_actions": any(p.startswith(".github/workflows/") for p in paths),
|
|
101
|
+
"supabase": any("supabase" in p.lower() for p in paths),
|
|
102
|
+
"vercel": "vercel.json" in paths or any(p.startswith(".vercel") for p in paths),
|
|
103
|
+
"env_example": ".env.example" in paths or "env.example" in paths,
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
if "package.json" in paths:
|
|
107
|
+
try:
|
|
108
|
+
content = _fetch_file_content(owner, repo, "package.json")
|
|
109
|
+
if content and "react" in content.lower():
|
|
110
|
+
detected["react"] = True
|
|
111
|
+
except Exception:
|
|
112
|
+
pass
|
|
113
|
+
if not detected["react"] and "react" in " ".join(paths).lower():
|
|
114
|
+
detected["react"] = True
|
|
115
|
+
|
|
116
|
+
missing = []
|
|
117
|
+
if not detected["docker"]:
|
|
118
|
+
missing.append("No Dockerfile found — containerization recommended for production.")
|
|
119
|
+
if not detected["github_actions"]:
|
|
120
|
+
missing.append("No GitHub Actions workflows — consider adding CI/CD.")
|
|
121
|
+
if not detected["env_example"]:
|
|
122
|
+
missing.append("No .env.example — document required env vars for deployment.")
|
|
123
|
+
|
|
124
|
+
suggestions = []
|
|
125
|
+
if detected["vite"] and not detected["docker"]:
|
|
126
|
+
suggestions.append("Vite SPA: add Dockerfile and ensure server rewrite rules for SPA routing.")
|
|
127
|
+
if detected["supabase"]:
|
|
128
|
+
suggestions.append("Supabase: verify RLS, auth flow, and env key exposure in client.")
|
|
129
|
+
if detected["nextjs"]:
|
|
130
|
+
suggestions.append("Next.js: check output mode (standalone/docker) and env at build time.")
|
|
131
|
+
|
|
132
|
+
return {
|
|
133
|
+
"ok": True,
|
|
134
|
+
"owner": owner,
|
|
135
|
+
"repo": repo,
|
|
136
|
+
"detected": detected,
|
|
137
|
+
"missing": missing,
|
|
138
|
+
"suggestions": suggestions or ["Review security and env handling before production."],
|
|
139
|
+
"repo_info": get_repo_info(owner, repo),
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _fetch_file_content(owner: str, repo: str, path: str) -> Optional[str]:
|
|
144
|
+
url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}"
|
|
145
|
+
r = requests.get(url, headers=_github_headers(), timeout=GITHUB_TIMEOUT)
|
|
146
|
+
if r.status_code != 200:
|
|
147
|
+
return None
|
|
148
|
+
data = r.json()
|
|
149
|
+
if data.get("encoding") == "base64":
|
|
150
|
+
return base64.b64decode(data.get("content", "")).decode("utf-8", errors="ignore")
|
|
151
|
+
return None
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def analyze_repo_url(repo_url: str) -> Dict[str, Any]:
|
|
155
|
+
"""Convenience: parse URL and run analyze_repo."""
|
|
156
|
+
parsed = parse_repo_url(repo_url)
|
|
157
|
+
if not parsed:
|
|
158
|
+
return {
|
|
159
|
+
"ok": False,
|
|
160
|
+
"error": "Invalid GitHub repo URL or owner/repo",
|
|
161
|
+
"detected": {},
|
|
162
|
+
"missing": [],
|
|
163
|
+
"suggestions": [],
|
|
164
|
+
}
|
|
165
|
+
return analyze_repo(parsed[0], parsed[1])
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
if __name__ == "__main__":
|
|
169
|
+
import json
|
|
170
|
+
logging.basicConfig(level=logging.INFO)
|
|
171
|
+
url = sys.argv[1] if len(sys.argv) > 1 else "https://github.com/vercel/next.js"
|
|
172
|
+
result = analyze_repo_url(url)
|
|
173
|
+
print(json.dumps(result, indent=2))
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Website auditor: detect tech stack from HTML/headers and report production-readiness risks.
|
|
3
|
+
"""
|
|
4
|
+
import logging
|
|
5
|
+
import re
|
|
6
|
+
import sys
|
|
7
|
+
from typing import Any, Dict, List, Optional
|
|
8
|
+
|
|
9
|
+
import requests
|
|
10
|
+
from bs4 import BeautifulSoup
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
REQUEST_TIMEOUT = 15
|
|
15
|
+
USER_AGENT = "GrowthToolsAuditor/1.0 (compatible; +https://github.com)"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def fetch_page(url: str) -> tuple[Optional[str], Optional[str], Optional[Dict], int]:
|
|
19
|
+
"""
|
|
20
|
+
Fetch URL. Returns (html, final_url, headers_dict, status_code).
|
|
21
|
+
"""
|
|
22
|
+
try:
|
|
23
|
+
r = requests.get(
|
|
24
|
+
url,
|
|
25
|
+
timeout=REQUEST_TIMEOUT,
|
|
26
|
+
headers={"User-Agent": USER_AGENT},
|
|
27
|
+
allow_redirects=True,
|
|
28
|
+
)
|
|
29
|
+
r.raise_for_status()
|
|
30
|
+
return r.text, r.url, dict(r.headers), r.status_code
|
|
31
|
+
except requests.RequestException as e:
|
|
32
|
+
logger.warning("fetch_page failed: %s", e)
|
|
33
|
+
return None, None, None, getattr(e, "response", None) and getattr(e.response, "status_code", None) or 0
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def detect_stack_from_html(html: str, headers: Optional[Dict[str, str]] = None) -> Dict[str, bool]:
|
|
37
|
+
"""Detect likely tech stack from HTML and optional response headers."""
|
|
38
|
+
headers = headers or {}
|
|
39
|
+
html_lower = html.lower() if html else ""
|
|
40
|
+
# Server header can reveal platform
|
|
41
|
+
server = (headers.get("Server") or headers.get("x-powered-by") or "").lower()
|
|
42
|
+
|
|
43
|
+
detected = {
|
|
44
|
+
"nextjs": "__next" in html_lower or "_next/" in html_lower or "next.js" in server or "next" in html_lower[:2000],
|
|
45
|
+
"vite": "/assets/" in html_lower and "modulepreload" in html_lower,
|
|
46
|
+
"react": "react" in html_lower or "reactdom" in html_lower,
|
|
47
|
+
"vue": "vue" in html_lower or "v-bind" in html_lower,
|
|
48
|
+
"supabase": "supabase" in html_lower,
|
|
49
|
+
"vercel": "vercel" in html_lower or "vercel" in server,
|
|
50
|
+
"netlify": "netlify" in html_lower or "netlify" in server,
|
|
51
|
+
}
|
|
52
|
+
return detected
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def infer_risks(detected: Dict[str, bool]) -> List[str]:
|
|
56
|
+
"""Suggest risks based on detected stack."""
|
|
57
|
+
risks = []
|
|
58
|
+
if detected.get("vite"):
|
|
59
|
+
risks.append("Likely SPA (Vite): check rewrite rules and SEO/SSR if needed.")
|
|
60
|
+
if detected.get("supabase"):
|
|
61
|
+
risks.append("Supabase client: verify auth flow, RLS, and env key exposure in client.")
|
|
62
|
+
if detected.get("react") and not detected.get("nextjs"):
|
|
63
|
+
risks.append("Client-side React: consider SSR/SSG for SEO and first load.")
|
|
64
|
+
if detected.get("vercel") or detected.get("netlify"):
|
|
65
|
+
risks.append("Hosting on Vercel/Netlify: ensure env vars and serverless limits are documented.")
|
|
66
|
+
if not risks:
|
|
67
|
+
risks.append("Review security headers and CSP for production.")
|
|
68
|
+
return risks
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def audit_url(url: str) -> Dict[str, Any]:
|
|
72
|
+
"""
|
|
73
|
+
Full audit: fetch URL, detect stack, return title, detected_stack, risks, next_step.
|
|
74
|
+
"""
|
|
75
|
+
url = url.strip()
|
|
76
|
+
if not url.startswith(("http://", "https://")):
|
|
77
|
+
url = "https://" + url
|
|
78
|
+
|
|
79
|
+
html, final_url, headers, status = fetch_page(url)
|
|
80
|
+
if html is None:
|
|
81
|
+
return {
|
|
82
|
+
"ok": False,
|
|
83
|
+
"url": url,
|
|
84
|
+
"error": "Could not fetch URL (timeout or HTTP error)",
|
|
85
|
+
"status_code": status,
|
|
86
|
+
"title": None,
|
|
87
|
+
"detected_stack": {},
|
|
88
|
+
"risks": [],
|
|
89
|
+
"next_step": "Check URL and try again.",
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
soup = BeautifulSoup(html, "html.parser")
|
|
93
|
+
title = None
|
|
94
|
+
if soup.title and soup.title.string:
|
|
95
|
+
title = soup.title.string.strip()[:500]
|
|
96
|
+
|
|
97
|
+
detected = detect_stack_from_html(html, headers)
|
|
98
|
+
risks = infer_risks(detected)
|
|
99
|
+
|
|
100
|
+
return {
|
|
101
|
+
"ok": True,
|
|
102
|
+
"url": final_url or url,
|
|
103
|
+
"status_code": status,
|
|
104
|
+
"title": title,
|
|
105
|
+
"detected_stack": detected,
|
|
106
|
+
"risks": risks,
|
|
107
|
+
"next_step": "Connect GitHub repo for a deeper production audit and migration checklist.",
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
if __name__ == "__main__":
|
|
112
|
+
import json
|
|
113
|
+
logging.basicConfig(level=logging.INFO)
|
|
114
|
+
u = sys.argv[1] if len(sys.argv) > 1 else "https://example.com"
|
|
115
|
+
print(json.dumps(audit_url(u), indent=2))
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
|
|
2
|
+
URL: https://supabase.com/
|
|
3
|
+
Title: Supabase | The Postgres Development Platform.
|
|
4
|
+
Stack: nextjs, react, vue, supabase, vercel
|
|
5
|
+
|
|
6
|
+
Risks:
|
|
7
|
+
• Supabase client: verify auth flow, RLS, and env key exposure in client.
|
|
8
|
+
• Hosting on Vercel/Netlify: ensure env vars and serverless limits are documented.
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "app-auditor"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Detect tech stack and production-readiness issues from a live URL or GitHub repo. Zero config, one command."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
keywords = ["audit", "stack-detection", "github", "production", "devtools", "cli"]
|
|
13
|
+
dependencies = [
|
|
14
|
+
"requests>=2.31.0",
|
|
15
|
+
"beautifulsoup4>=4.12.0",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
[project.optional-dependencies]
|
|
19
|
+
dev = ["pytest>=8.0", "pytest-cov", "responses>=0.25.0"]
|
|
20
|
+
|
|
21
|
+
[project.scripts]
|
|
22
|
+
app-audit = "app_auditor.cli:main"
|
|
23
|
+
|
|
24
|
+
[tool.hatch.build.targets.wheel]
|
|
25
|
+
packages = ["src/app_auditor"]
|
|
26
|
+
|
|
27
|
+
[tool.pytest.ini_options]
|
|
28
|
+
testpaths = ["tests"]
|
|
29
|
+
|
|
30
|
+
[project.urls]
|
|
31
|
+
Repository = "https://github.com/nometria/app-auditor"
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from .website_auditor import audit_url, detect_stack_from_html
|
|
2
|
+
from .github_auditor import analyze_repo_url, analyze_repo, parse_repo_url
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"audit_url",
|
|
6
|
+
"detect_stack_from_html",
|
|
7
|
+
"analyze_repo_url",
|
|
8
|
+
"analyze_repo",
|
|
9
|
+
"parse_repo_url",
|
|
10
|
+
]
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
app-auditor CLI
|
|
4
|
+
|
|
5
|
+
Commands:
|
|
6
|
+
app-audit url <url> Audit a website URL (stack detection + production risks)
|
|
7
|
+
app-audit repo <url> Audit a GitHub repo (structure, missing files, suggestions)
|
|
8
|
+
"""
|
|
9
|
+
import argparse
|
|
10
|
+
import json
|
|
11
|
+
import logging
|
|
12
|
+
import sys
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def main():
|
|
16
|
+
parser = argparse.ArgumentParser(
|
|
17
|
+
prog="app-audit",
|
|
18
|
+
description="Detect tech stack and surface production-readiness issues.",
|
|
19
|
+
)
|
|
20
|
+
parser.add_argument(
|
|
21
|
+
"command",
|
|
22
|
+
choices=["url", "repo"],
|
|
23
|
+
help="What to audit: a live URL or a GitHub repo",
|
|
24
|
+
)
|
|
25
|
+
parser.add_argument("target", help="URL or GitHub repo (owner/repo or full URL)")
|
|
26
|
+
parser.add_argument(
|
|
27
|
+
"--format",
|
|
28
|
+
choices=["json", "text"],
|
|
29
|
+
default="text",
|
|
30
|
+
help="Output format (default: text)",
|
|
31
|
+
)
|
|
32
|
+
parser.add_argument("--verbose", "-v", action="store_true", help="Debug logging")
|
|
33
|
+
|
|
34
|
+
args = parser.parse_args()
|
|
35
|
+
|
|
36
|
+
if args.verbose:
|
|
37
|
+
logging.basicConfig(level=logging.DEBUG)
|
|
38
|
+
else:
|
|
39
|
+
logging.basicConfig(level=logging.WARNING)
|
|
40
|
+
|
|
41
|
+
if args.command == "url":
|
|
42
|
+
from .website_auditor import audit_url
|
|
43
|
+
result = audit_url(args.target)
|
|
44
|
+
else:
|
|
45
|
+
from .github_auditor import analyze_repo_url
|
|
46
|
+
result = analyze_repo_url(args.target)
|
|
47
|
+
|
|
48
|
+
if args.format == "json":
|
|
49
|
+
print(json.dumps(result, indent=2))
|
|
50
|
+
return
|
|
51
|
+
|
|
52
|
+
# Human-readable text output
|
|
53
|
+
if not result.get("ok"):
|
|
54
|
+
print(f"ERROR: {result.get('error', 'Unknown error')}", file=sys.stderr)
|
|
55
|
+
sys.exit(1)
|
|
56
|
+
|
|
57
|
+
if args.command == "url":
|
|
58
|
+
print(f"\nURL: {result['url']}")
|
|
59
|
+
print(f"Title: {result.get('title') or '(none)'}")
|
|
60
|
+
stack = [k for k, v in result.get("detected_stack", {}).items() if v]
|
|
61
|
+
print(f"Stack: {', '.join(stack) or 'unknown'}")
|
|
62
|
+
print("\nRisks:")
|
|
63
|
+
for r in result.get("risks", []):
|
|
64
|
+
print(f" • {r}")
|
|
65
|
+
else:
|
|
66
|
+
print(f"\nRepo: {result.get('owner')}/{result.get('repo')}")
|
|
67
|
+
detected = [k for k, v in result.get("detected", {}).items() if v]
|
|
68
|
+
print(f"Detected: {', '.join(detected) or 'nothing notable'}")
|
|
69
|
+
if result.get("missing"):
|
|
70
|
+
print("\nMissing:")
|
|
71
|
+
for m in result["missing"]:
|
|
72
|
+
print(f" ⚠ {m}")
|
|
73
|
+
if result.get("suggestions"):
|
|
74
|
+
print("\nSuggestions:")
|
|
75
|
+
for s in result["suggestions"]:
|
|
76
|
+
print(f" → {s}")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
if __name__ == "__main__":
|
|
80
|
+
main()
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
"""
|
|
2
|
+
GitHub repo auditor: analyze a public repo's structure and report stack, missing files, and suggestions.
|
|
3
|
+
"""
|
|
4
|
+
import base64
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
import re
|
|
8
|
+
from typing import Any, Dict, List, Optional
|
|
9
|
+
|
|
10
|
+
import requests
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
# Timeouts and retries
|
|
15
|
+
GITHUB_TIMEOUT = 25
|
|
16
|
+
GITHUB_HEADERS: Optional[Dict[str, str]] = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _github_headers() -> Dict[str, str]:
|
|
20
|
+
global GITHUB_HEADERS
|
|
21
|
+
if GITHUB_HEADERS is not None:
|
|
22
|
+
return GITHUB_HEADERS
|
|
23
|
+
token = os.getenv("GITHUB_TOKEN")
|
|
24
|
+
h = {"Accept": "application/vnd.github+json"}
|
|
25
|
+
if token:
|
|
26
|
+
h["Authorization"] = f"Bearer {token}"
|
|
27
|
+
GITHUB_HEADERS = h
|
|
28
|
+
return h
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def parse_repo_url(url: str) -> Optional[tuple[str, str]]:
|
|
32
|
+
"""Return (owner, repo) or None if invalid."""
|
|
33
|
+
url = url.strip().rstrip("/")
|
|
34
|
+
# https://github.com/owner/repo or github.com/owner/repo
|
|
35
|
+
m = re.match(r"(?:https?://)?(?:www\.)?github\.com/([^/]+)/([^/]+?)(?:\.git)?/?$", url, re.I)
|
|
36
|
+
if m:
|
|
37
|
+
return m.group(1), m.group(2)
|
|
38
|
+
# owner/repo
|
|
39
|
+
if "/" in url and " " not in url and len(url) < 100:
|
|
40
|
+
parts = url.split("/", 1)
|
|
41
|
+
if len(parts) == 2 and parts[0] and parts[1]:
|
|
42
|
+
return parts[0], parts[1]
|
|
43
|
+
return None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def get_repo_tree(owner: str, repo: str) -> List[Dict[str, Any]]:
|
|
47
|
+
"""Fetch repo file tree (recursive). Raises on HTTP error."""
|
|
48
|
+
url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/HEAD?recursive=1"
|
|
49
|
+
r = requests.get(url, headers=_github_headers(), timeout=GITHUB_TIMEOUT)
|
|
50
|
+
r.raise_for_status()
|
|
51
|
+
data = r.json()
|
|
52
|
+
return data.get("tree") or []
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def get_repo_info(owner: str, repo: str) -> Dict[str, Any]:
|
|
56
|
+
"""Fetch repo metadata (description, default branch, etc.)."""
|
|
57
|
+
url = f"https://api.github.com/repos/{owner}/{repo}"
|
|
58
|
+
r = requests.get(url, headers=_github_headers(), timeout=GITHUB_TIMEOUT)
|
|
59
|
+
if r.status_code != 200:
|
|
60
|
+
return {}
|
|
61
|
+
return r.json()
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def get_readme(owner: str, repo: str) -> Optional[str]:
|
|
65
|
+
"""Fetch README content if present."""
|
|
66
|
+
url = f"https://api.github.com/repos/{owner}/{repo}/readme"
|
|
67
|
+
r = requests.get(url, headers={**_github_headers(), "Accept": "application/vnd.github.raw"}, timeout=GITHUB_TIMEOUT)
|
|
68
|
+
if r.status_code != 200:
|
|
69
|
+
return None
|
|
70
|
+
return r.text[:8000]
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def analyze_repo(owner: str, repo: str) -> Dict[str, Any]:
|
|
74
|
+
"""
|
|
75
|
+
Analyze repo structure and return detected stack, missing items, and migration suggestions.
|
|
76
|
+
"""
|
|
77
|
+
try:
|
|
78
|
+
tree = get_repo_tree(owner, repo)
|
|
79
|
+
except requests.RequestException as e:
|
|
80
|
+
logger.warning("get_repo_tree failed: %s", e)
|
|
81
|
+
return {
|
|
82
|
+
"ok": False,
|
|
83
|
+
"error": str(e),
|
|
84
|
+
"detected": {},
|
|
85
|
+
"missing": [],
|
|
86
|
+
"suggestions": ["Could not fetch repo; check URL and token."],
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
paths = [item["path"] for item in tree]
|
|
90
|
+
detected = {
|
|
91
|
+
"nextjs": any(
|
|
92
|
+
p in paths for p in ("next.config.js", "next.config.mjs", "next.config.ts")
|
|
93
|
+
),
|
|
94
|
+
"vite": any(
|
|
95
|
+
p in paths for p in ("vite.config.ts", "vite.config.js", "vite.config.mts")
|
|
96
|
+
),
|
|
97
|
+
"react": False,
|
|
98
|
+
"docker": any(p in paths for p in ("Dockerfile", "Dockerfile.dev")),
|
|
99
|
+
"github_actions": any(p.startswith(".github/workflows/") for p in paths),
|
|
100
|
+
"supabase": any("supabase" in p.lower() for p in paths),
|
|
101
|
+
"vercel": "vercel.json" in paths or any(p.startswith(".vercel") for p in paths),
|
|
102
|
+
"env_example": ".env.example" in paths or "env.example" in paths,
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
if "package.json" in paths:
|
|
106
|
+
try:
|
|
107
|
+
content = _fetch_file_content(owner, repo, "package.json")
|
|
108
|
+
if content and "react" in content.lower():
|
|
109
|
+
detected["react"] = True
|
|
110
|
+
except Exception:
|
|
111
|
+
pass
|
|
112
|
+
if not detected["react"] and "react" in " ".join(paths).lower():
|
|
113
|
+
detected["react"] = True
|
|
114
|
+
|
|
115
|
+
missing = []
|
|
116
|
+
if not detected["docker"]:
|
|
117
|
+
missing.append("No Dockerfile found — containerization recommended for production.")
|
|
118
|
+
if not detected["github_actions"]:
|
|
119
|
+
missing.append("No GitHub Actions workflows — consider adding CI/CD.")
|
|
120
|
+
if not detected["env_example"]:
|
|
121
|
+
missing.append("No .env.example — document required env vars for deployment.")
|
|
122
|
+
|
|
123
|
+
suggestions = []
|
|
124
|
+
if detected["vite"] and not detected["docker"]:
|
|
125
|
+
suggestions.append("Vite SPA: add Dockerfile and ensure server rewrite rules for SPA routing.")
|
|
126
|
+
if detected["supabase"]:
|
|
127
|
+
suggestions.append("Supabase: verify RLS, auth flow, and env key exposure in client.")
|
|
128
|
+
if detected["nextjs"]:
|
|
129
|
+
suggestions.append("Next.js: check output mode (standalone/docker) and env at build time.")
|
|
130
|
+
|
|
131
|
+
return {
|
|
132
|
+
"ok": True,
|
|
133
|
+
"owner": owner,
|
|
134
|
+
"repo": repo,
|
|
135
|
+
"detected": detected,
|
|
136
|
+
"missing": missing,
|
|
137
|
+
"suggestions": suggestions or ["Review security and env handling before production."],
|
|
138
|
+
"repo_info": get_repo_info(owner, repo),
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _fetch_file_content(owner: str, repo: str, path: str) -> Optional[str]:
|
|
143
|
+
url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}"
|
|
144
|
+
r = requests.get(url, headers=_github_headers(), timeout=GITHUB_TIMEOUT)
|
|
145
|
+
if r.status_code != 200:
|
|
146
|
+
return None
|
|
147
|
+
data = r.json()
|
|
148
|
+
if data.get("encoding") == "base64":
|
|
149
|
+
return base64.b64decode(data.get("content", "")).decode("utf-8", errors="ignore")
|
|
150
|
+
return None
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def analyze_repo_url(repo_url: str) -> Dict[str, Any]:
|
|
154
|
+
"""Convenience: parse URL and run analyze_repo."""
|
|
155
|
+
parsed = parse_repo_url(repo_url)
|
|
156
|
+
if not parsed:
|
|
157
|
+
return {
|
|
158
|
+
"ok": False,
|
|
159
|
+
"error": "Invalid GitHub repo URL or owner/repo",
|
|
160
|
+
"detected": {},
|
|
161
|
+
"missing": [],
|
|
162
|
+
"suggestions": [],
|
|
163
|
+
}
|
|
164
|
+
return analyze_repo(parsed[0], parsed[1])
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
if __name__ == "__main__":
|
|
168
|
+
import sys
|
|
169
|
+
import json
|
|
170
|
+
logging.basicConfig(level=logging.INFO)
|
|
171
|
+
url = sys.argv[1] if len(sys.argv) > 1 else "https://github.com/vercel/next.js"
|
|
172
|
+
result = analyze_repo_url(url)
|
|
173
|
+
print(json.dumps(result, indent=2))
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Website auditor: detect tech stack from HTML/headers and report production-readiness risks.
|
|
3
|
+
"""
|
|
4
|
+
import logging
|
|
5
|
+
import sys
|
|
6
|
+
from typing import Any, Dict, List, Optional
|
|
7
|
+
|
|
8
|
+
import requests
|
|
9
|
+
from bs4 import BeautifulSoup
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
REQUEST_TIMEOUT = 15
|
|
14
|
+
USER_AGENT = "GrowthToolsAuditor/1.0 (compatible; +https://github.com)"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def fetch_page(url: str) -> tuple[Optional[str], Optional[str], Optional[Dict], int]:
|
|
18
|
+
"""
|
|
19
|
+
Fetch URL. Returns (html, final_url, headers_dict, status_code).
|
|
20
|
+
"""
|
|
21
|
+
try:
|
|
22
|
+
r = requests.get(
|
|
23
|
+
url,
|
|
24
|
+
timeout=REQUEST_TIMEOUT,
|
|
25
|
+
headers={"User-Agent": USER_AGENT},
|
|
26
|
+
allow_redirects=True,
|
|
27
|
+
)
|
|
28
|
+
r.raise_for_status()
|
|
29
|
+
return r.text, r.url, dict(r.headers), r.status_code
|
|
30
|
+
except requests.RequestException as e:
|
|
31
|
+
logger.warning("fetch_page failed: %s", e)
|
|
32
|
+
return None, None, None, getattr(e, "response", None) and getattr(e.response, "status_code", None) or 0
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def detect_stack_from_html(html: str, headers: Optional[Dict[str, str]] = None) -> Dict[str, bool]:
|
|
36
|
+
"""Detect likely tech stack from HTML and optional response headers."""
|
|
37
|
+
headers = headers or {}
|
|
38
|
+
html_lower = html.lower() if html else ""
|
|
39
|
+
# Server header can reveal platform
|
|
40
|
+
server = (headers.get("Server") or headers.get("x-powered-by") or "").lower()
|
|
41
|
+
|
|
42
|
+
detected = {
|
|
43
|
+
"nextjs": "__next" in html_lower or "_next/" in html_lower or "next.js" in server or "next" in html_lower[:2000],
|
|
44
|
+
"vite": "/assets/" in html_lower and "modulepreload" in html_lower,
|
|
45
|
+
"react": "react" in html_lower or "reactdom" in html_lower,
|
|
46
|
+
"vue": "vue" in html_lower or "v-bind" in html_lower,
|
|
47
|
+
"supabase": "supabase" in html_lower,
|
|
48
|
+
"vercel": "vercel" in html_lower or "vercel" in server,
|
|
49
|
+
"netlify": "netlify" in html_lower or "netlify" in server,
|
|
50
|
+
}
|
|
51
|
+
return detected
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def infer_risks(detected: Dict[str, bool]) -> List[str]:
|
|
55
|
+
"""Suggest risks based on detected stack."""
|
|
56
|
+
risks = []
|
|
57
|
+
if detected.get("vite"):
|
|
58
|
+
risks.append("Likely SPA (Vite): check rewrite rules and SEO/SSR if needed.")
|
|
59
|
+
if detected.get("supabase"):
|
|
60
|
+
risks.append("Supabase client: verify auth flow, RLS, and env key exposure in client.")
|
|
61
|
+
if detected.get("react") and not detected.get("nextjs"):
|
|
62
|
+
risks.append("Client-side React: consider SSR/SSG for SEO and first load.")
|
|
63
|
+
if detected.get("vercel") or detected.get("netlify"):
|
|
64
|
+
risks.append("Hosting on Vercel/Netlify: ensure env vars and serverless limits are documented.")
|
|
65
|
+
if not risks:
|
|
66
|
+
risks.append("Review security headers and CSP for production.")
|
|
67
|
+
return risks
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def audit_url(url: str) -> Dict[str, Any]:
|
|
71
|
+
"""
|
|
72
|
+
Full audit: fetch URL, detect stack, return title, detected_stack, risks, next_step.
|
|
73
|
+
"""
|
|
74
|
+
url = url.strip()
|
|
75
|
+
if not url.startswith(("http://", "https://")):
|
|
76
|
+
url = "https://" + url
|
|
77
|
+
|
|
78
|
+
html, final_url, headers, status = fetch_page(url)
|
|
79
|
+
if html is None:
|
|
80
|
+
return {
|
|
81
|
+
"ok": False,
|
|
82
|
+
"url": url,
|
|
83
|
+
"error": "Could not fetch URL (timeout or HTTP error)",
|
|
84
|
+
"status_code": status,
|
|
85
|
+
"title": None,
|
|
86
|
+
"detected_stack": {},
|
|
87
|
+
"risks": [],
|
|
88
|
+
"next_step": "Check URL and try again.",
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
soup = BeautifulSoup(html, "html.parser")
|
|
92
|
+
title = None
|
|
93
|
+
if soup.title and soup.title.string:
|
|
94
|
+
title = soup.title.string.strip()[:500]
|
|
95
|
+
|
|
96
|
+
detected = detect_stack_from_html(html, headers)
|
|
97
|
+
risks = infer_risks(detected)
|
|
98
|
+
|
|
99
|
+
return {
|
|
100
|
+
"ok": True,
|
|
101
|
+
"url": final_url or url,
|
|
102
|
+
"status_code": status,
|
|
103
|
+
"title": title,
|
|
104
|
+
"detected_stack": detected,
|
|
105
|
+
"risks": risks,
|
|
106
|
+
"next_step": "Connect GitHub repo for a deeper production audit and migration checklist.",
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
if __name__ == "__main__":
|
|
111
|
+
import json
|
|
112
|
+
logging.basicConfig(level=logging.INFO)
|
|
113
|
+
u = sys.argv[1] if len(sys.argv) > 1 else "https://example.com"
|
|
114
|
+
print(json.dumps(audit_url(u), indent=2))
|
|
File without changes
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""Tests for app-auditor — no live HTTP required (uses mocking)."""
|
|
2
|
+
import sys
|
|
3
|
+
import os
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
# ---------------------------------------------------------------------------
|
|
10
|
+
# website_auditor tests
|
|
11
|
+
# ---------------------------------------------------------------------------
|
|
12
|
+
|
|
13
|
+
def test_detect_nextjs_from_html():
|
|
14
|
+
from website_auditor import detect_stack_from_html
|
|
15
|
+
html = '<html><head></head><body><div id="__next"></div></body></html>'
|
|
16
|
+
result = detect_stack_from_html(html)
|
|
17
|
+
assert result["nextjs"] is True
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def test_detect_vite_from_html():
|
|
21
|
+
from website_auditor import detect_stack_from_html
|
|
22
|
+
html = '<html><head><link rel="modulepreload" href="/assets/index-abc.js"></head></html>'
|
|
23
|
+
result = detect_stack_from_html(html)
|
|
24
|
+
assert result["vite"] is True
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_detect_supabase_from_html():
|
|
28
|
+
from website_auditor import detect_stack_from_html
|
|
29
|
+
html = '<script src="https://cdn.supabase.co/supabase.js"></script>'
|
|
30
|
+
result = detect_stack_from_html(html)
|
|
31
|
+
assert result["supabase"] is True
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def test_empty_html_returns_all_false():
|
|
35
|
+
from website_auditor import detect_stack_from_html
|
|
36
|
+
result = detect_stack_from_html("")
|
|
37
|
+
assert all(v is False for v in result.values())
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_infer_risks_vite():
|
|
41
|
+
from website_auditor import infer_risks
|
|
42
|
+
risks = infer_risks({"vite": True, "supabase": False, "react": False, "nextjs": False, "vercel": False, "netlify": False, "vue": False})
|
|
43
|
+
assert any("vite" in r.lower() or "spa" in r.lower() for r in risks)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def test_infer_risks_supabase():
|
|
47
|
+
from website_auditor import infer_risks
|
|
48
|
+
risks = infer_risks({"vite": False, "supabase": True, "react": False, "nextjs": False, "vercel": False, "netlify": False, "vue": False})
|
|
49
|
+
assert any("supabase" in r.lower() or "rls" in r.lower() for r in risks)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def test_audit_url_unreachable_returns_ok_false():
|
|
53
|
+
from website_auditor import audit_url
|
|
54
|
+
result = audit_url("https://this-url-does-not-exist-xyz-12345.invalid")
|
|
55
|
+
assert result["ok"] is False
|
|
56
|
+
assert "error" in result
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
# ---------------------------------------------------------------------------
|
|
60
|
+
# github_auditor tests
|
|
61
|
+
# ---------------------------------------------------------------------------
|
|
62
|
+
|
|
63
|
+
def test_parse_repo_url_full():
|
|
64
|
+
from github_auditor import parse_repo_url
|
|
65
|
+
assert parse_repo_url("https://github.com/vercel/next.js") == ("vercel", "next.js")
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def test_parse_repo_url_shorthand():
|
|
69
|
+
from github_auditor import parse_repo_url
|
|
70
|
+
assert parse_repo_url("facebook/react") == ("facebook", "react")
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def test_parse_repo_url_invalid():
|
|
74
|
+
from github_auditor import parse_repo_url
|
|
75
|
+
assert parse_repo_url("not-a-url") is None
|
|
76
|
+
assert parse_repo_url("") is None
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def test_analyze_repo_url_invalid_returns_error():
|
|
80
|
+
from github_auditor import analyze_repo_url
|
|
81
|
+
result = analyze_repo_url("not-a-github-url")
|
|
82
|
+
assert result["ok"] is False
|
|
83
|
+
assert "error" in result
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def test_analyze_repo_detects_docker(monkeypatch):
|
|
87
|
+
"""Mock GitHub API to return a tree with Dockerfile."""
|
|
88
|
+
import github_auditor
|
|
89
|
+
|
|
90
|
+
def mock_get_repo_tree(owner, repo):
|
|
91
|
+
return [
|
|
92
|
+
{"path": "Dockerfile"},
|
|
93
|
+
{"path": "package.json"},
|
|
94
|
+
{"path": ".github/workflows/ci.yml"},
|
|
95
|
+
{"path": ".env.example"},
|
|
96
|
+
]
|
|
97
|
+
|
|
98
|
+
def mock_get_repo_info(owner, repo):
|
|
99
|
+
return {"full_name": f"{owner}/{repo}", "default_branch": "main"}
|
|
100
|
+
|
|
101
|
+
monkeypatch.setattr(github_auditor, "get_repo_tree", mock_get_repo_tree)
|
|
102
|
+
monkeypatch.setattr(github_auditor, "get_repo_info", mock_get_repo_info)
|
|
103
|
+
monkeypatch.setattr(github_auditor, "_fetch_file_content", lambda *a: None)
|
|
104
|
+
|
|
105
|
+
result = github_auditor.analyze_repo("myorg", "myrepo")
|
|
106
|
+
assert result["ok"] is True
|
|
107
|
+
assert result["detected"]["docker"] is True
|
|
108
|
+
assert result["detected"]["github_actions"] is True
|
|
109
|
+
assert result["detected"]["env_example"] is True
|
|
110
|
+
# No Dockerfile missing warning since it's present
|
|
111
|
+
assert not any("dockerfile" in m.lower() for m in result["missing"])
|