@opendirectory.dev/skills 0.1.21 → 0.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/registry.json +8 -0
- package/skills/pricing-page-psychology-audit/.env.example +26 -0
- package/skills/pricing-page-psychology-audit/README.md +166 -0
- package/skills/pricing-page-psychology-audit/SKILL.md +154 -0
- package/skills/pricing-page-psychology-audit/scripts/scrape_pricing.py +318 -0
package/package.json
CHANGED
package/registry.json
CHANGED
|
@@ -159,6 +159,14 @@
|
|
|
159
159
|
"version": "1.0.0",
|
|
160
160
|
"path": "skills/pr-description-writer"
|
|
161
161
|
},
|
|
162
|
+
{
|
|
163
|
+
"name": "pricing-page-psychology-audit",
|
|
164
|
+
"description": "Audits any SaaS pricing page URL against 12 pricing psychology principles and outputs a ranked improvement report with specific rewrite suggestions...",
|
|
165
|
+
"tags": [],
|
|
166
|
+
"author": "ajaycodesitbetter",
|
|
167
|
+
"version": "1.0.0",
|
|
168
|
+
"path": "skills/pricing-page-psychology-audit"
|
|
169
|
+
},
|
|
162
170
|
{
|
|
163
171
|
"name": "producthunt-launch-kit",
|
|
164
172
|
"description": "Generate every asset you need for a Product Hunt launch: listing copy, maker comment, and day-one social posts.",
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# ─────────────────────────────────────────────────────────────
|
|
2
|
+
# pricing-page-psychology-audit — Environment Variables
|
|
3
|
+
# ─────────────────────────────────────────────────────────────
|
|
4
|
+
#
|
|
5
|
+
# NO API KEYS REQUIRED.
|
|
6
|
+
# This skill uses only requests + BeautifulSoup4 to scrape.
|
|
7
|
+
# All analysis is done by the AI agent using scraped text.
|
|
8
|
+
#
|
|
9
|
+
# ─────────────────────────────────────────────────────────────
|
|
10
|
+
# OPTIONAL: Proxy support (use if target site blocks direct requests)
|
|
11
|
+
# ─────────────────────────────────────────────────────────────
|
|
12
|
+
|
|
13
|
+
# HTTP_PROXY=http://your-proxy-host:port
|
|
14
|
+
# HTTPS_PROXY=http://your-proxy-host:port
|
|
15
|
+
|
|
16
|
+
# ─────────────────────────────────────────────────────────────
|
|
17
|
+
# OPTIONAL: Custom request timeout in seconds (default: 15)
|
|
18
|
+
# ─────────────────────────────────────────────────────────────
|
|
19
|
+
|
|
20
|
+
# SCRAPE_TIMEOUT=15
|
|
21
|
+
|
|
22
|
+
# ─────────────────────────────────────────────────────────────
|
|
23
|
+
# OPTIONAL: Custom User-Agent string (override default browser UA)
|
|
24
|
+
# ─────────────────────────────────────────────────────────────
|
|
25
|
+
|
|
26
|
+
# SCRAPE_USER_AGENT=Mozilla/5.0 ...
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
<img src="https://images.unsplash.com/photo-1551288049-bebda4e38f71?w=1280&h=640&fit=crop&q=80" width="100%" alt="cover" />
|
|
2
|
+
|
|
3
|
+
# pricing-page-psychology-audit
|
|
4
|
+
|
|
5
|
+
> Paste any SaaS pricing page URL. Get a full audit against 12 pricing
|
|
6
|
+
> psychology principles — with scores, specific rewrites, and your Top 3
|
|
7
|
+
> Quick Wins ranked by impact.
|
|
8
|
+
|
|
9
|
+
[](https://opendirectory.dev)
|
|
10
|
+
[](https://github.com/Varnan-Tech/opendirectory)
|
|
11
|
+
[](https://opensource.org/licenses/MIT)
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## What It Does
|
|
16
|
+
|
|
17
|
+
Most SaaS pricing pages leave money on the table — not because of bad pricing,
|
|
18
|
+
but because of bad psychology. This skill scrapes any pricing page and audits it
|
|
19
|
+
against 12 proven pricing psychology principles used by companies like Notion,
|
|
20
|
+
Linear, and Vercel.
|
|
21
|
+
|
|
22
|
+
**12 Principles Audited:**
|
|
23
|
+
|
|
24
|
+
| # | Principle | What It Checks |
|
|
25
|
+
|---|-----------|----------------|
|
|
26
|
+
| 1 | Anchoring | Is the priciest plan shown first to anchor perception? |
|
|
27
|
+
| 2 | Decoy Effect | Is there a tier that makes the top plan look like great value? |
|
|
28
|
+
| 3 | Loss Aversion Framing | Does copy use "don't lose access" vs purely gain language? |
|
|
29
|
+
| 4 | Feature-vs-Value Naming | Do tiers sell outcomes or just list features? |
|
|
30
|
+
| 5 | Social Proof Placement | Are testimonials/logos visible near the pricing tiers? |
|
|
31
|
+
| 6 | Urgency / Scarcity | Are there "limited time" signals or countdown elements? |
|
|
32
|
+
| 7 | Plan Naming Psychology | Are names aspirational (Growth, Scale) vs generic (Pro, Basic)? |
|
|
33
|
+
| 8 | CTA Button Copy | Do CTAs say "Start closing more deals" vs "Sign up"? |
|
|
34
|
+
| 9 | Free Trial vs Freemium | Is the free offer framed clearly without confusion? |
|
|
35
|
+
| 10 | Price Ending Tactics | Do prices end in 9 ($49) or round ($50)? |
|
|
36
|
+
| 11 | Visual Hierarchy | Is the recommended tier visually distinct (badge, highlight)? |
|
|
37
|
+
| 12 | Guarantee / Trust Signals | Is there a money-back guarantee near the CTA? |
|
|
38
|
+
|
|
39
|
+
**Output includes:**
|
|
40
|
+
- ✅ / ⚠️ / ❌ score per principle
|
|
41
|
+
- Specific rewrite suggestions per tier
|
|
42
|
+
- **Top 3 Quick Wins** — highest-leverage changes, prioritized by impact vs effort
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
## How It Works
|
|
47
|
+
|
|
48
|
+
```
|
|
49
|
+
User provides URL
|
|
50
|
+
↓
|
|
51
|
+
scripts/scrape_pricing.py fetches and extracts:
|
|
52
|
+
- Plan names & prices
|
|
53
|
+
- CTA button copy
|
|
54
|
+
- Feature list items
|
|
55
|
+
- Full visible text
|
|
56
|
+
↓
|
|
57
|
+
AI evaluates scraped content against 12 psychology principles
|
|
58
|
+
↓
|
|
59
|
+
Structured Markdown audit report output
|
|
60
|
+
+ Top 3 Quick Wins
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
## Prerequisites
|
|
66
|
+
|
|
67
|
+
- Python 3.10+
|
|
68
|
+
- pip packages:
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
pip install requests beautifulsoup4
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
- Works with: **Claude Code · Gemini CLI · Cursor · Antigravity**
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
## Install
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
npx @opendirectory.dev/skills install pricing-page-psychology-audit
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
---
|
|
85
|
+
|
|
86
|
+
## Usage
|
|
87
|
+
|
|
88
|
+
### Basic audit:
|
|
89
|
+
```
|
|
90
|
+
"Use pricing-page-psychology-audit to audit https://linear.app/pricing"
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### More examples:
|
|
94
|
+
```
|
|
95
|
+
"Audit the pricing page at https://notion.so/pricing"
|
|
96
|
+
"Run a psychology audit on https://vercel.com/pricing"
|
|
97
|
+
"What's wrong with https://stripe.com/pricing from a psychology perspective?"
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
---
|
|
101
|
+
|
|
102
|
+
## Example Output
|
|
103
|
+
|
|
104
|
+
```markdown
|
|
105
|
+
# Pricing Page Psychology Audit
|
|
106
|
+
**URL:** https://linear.app/pricing
|
|
107
|
+
**Audited on:** 2026-04-18
|
|
108
|
+
**Overall Score:** 9/12 principles passing
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
|
|
112
|
+
## Audit Results
|
|
113
|
+
|
|
114
|
+
### 1. Anchoring — ✅ Pass
|
|
115
|
+
**What we found:** Enterprise plan is listed last but priced highest at
|
|
116
|
+
custom pricing, creating an anchor that makes the $16/seat Business plan
|
|
117
|
+
feel accessible.
|
|
118
|
+
**Suggestion:** Consider moving Enterprise to first position for stronger
|
|
119
|
+
anchoring effect.
|
|
120
|
+
|
|
121
|
+
### 2. Decoy Effect — ⚠️ Needs Work
|
|
122
|
+
**What we found:** The Business tier exists between Free and Enterprise
|
|
123
|
+
but is not clearly positioned as the "sweet spot."
|
|
124
|
+
**Suggestion:** Add a "Most Popular" badge to Business and increase visual
|
|
125
|
+
size to activate the decoy effect.
|
|
126
|
+
|
|
127
|
+
[... 10 more principles ...]
|
|
128
|
+
|
|
129
|
+
---
|
|
130
|
+
|
|
131
|
+
## 🏆 Top 3 Quick Wins
|
|
132
|
+
|
|
133
|
+
**Quick Win #1 — CTA Button Copy**
|
|
134
|
+
Current: "Get started"
|
|
135
|
+
Rewrite to: "Start shipping faster — free"
|
|
136
|
+
Why: Action-outcome CTAs convert 14% higher than generic "Get started" copy.
|
|
137
|
+
|
|
138
|
+
**Quick Win #2 — Social Proof Placement**
|
|
139
|
+
Current: Logos shown on a separate /customers page
|
|
140
|
+
Rewrite to: Add 3 customer logos directly below the pricing tiers
|
|
141
|
+
Why: Social proof near the decision point reduces purchase anxiety.
|
|
142
|
+
|
|
143
|
+
**Quick Win #3 — Guarantee / Trust Signal**
|
|
144
|
+
Current: No guarantee mentioned on pricing page
|
|
145
|
+
Rewrite to: Add "30-day money-back guarantee. No questions asked." below CTAs
|
|
146
|
+
Why: Guarantees have been shown to increase conversion by up to 21%.
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
---
|
|
150
|
+
|
|
151
|
+
## Project Structure
|
|
152
|
+
|
|
153
|
+
```
|
|
154
|
+
pricing-page-psychology-audit/
|
|
155
|
+
├── SKILL.md ← AI instructions (the brain)
|
|
156
|
+
├── README.md ← This file
|
|
157
|
+
├── .env.example ← No API keys required
|
|
158
|
+
└── scripts/
|
|
159
|
+
└── scrape_pricing.py ← Python scraper (requests + BeautifulSoup)
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
---
|
|
163
|
+
|
|
164
|
+
## License
|
|
165
|
+
|
|
166
|
+
MIT — Built by [@ajaycodesitbetter](https://github.com/ajaycodesitbetter)
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: pricing-page-psychology-audit
|
|
3
|
+
description: Audits any SaaS pricing page URL against 12 pricing psychology principles and outputs a ranked improvement report with specific rewrite suggestions and quick wins.
|
|
4
|
+
author: ajaycodesitbetter
|
|
5
|
+
version: 1.0.0
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
# Pricing Page Psychology Audit
|
|
9
|
+
|
|
10
|
+
Scrape any SaaS pricing page and audit it against 12 proven pricing psychology
|
|
11
|
+
principles. Get a scored Markdown report with specific rewrite suggestions per
|
|
12
|
+
tier and a "Top 3 Quick Wins" section.
|
|
13
|
+
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
## Step 1: Get the Target URL
|
|
17
|
+
|
|
18
|
+
Ask the user:
|
|
19
|
+
"Which SaaS pricing page should I audit? Share the full URL
|
|
20
|
+
(e.g. https://linear.app/pricing)"
|
|
21
|
+
|
|
22
|
+
If no URL is provided, stop and ask. Do not proceed without a valid URL
|
|
23
|
+
starting with http:// or https://.
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Step 2: Run the Scraper
|
|
28
|
+
|
|
29
|
+
Run the scraper script with the URL:
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
python scripts/scrape_pricing.py "URL_HERE"
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
The script outputs structured text to stdout. Capture the output — it contains:
|
|
36
|
+
- Page title
|
|
37
|
+
- All visible text content
|
|
38
|
+
- Button labels (CTAs)
|
|
39
|
+
- Plan names and prices
|
|
40
|
+
- Feature list items
|
|
41
|
+
|
|
42
|
+
If the script fails (timeout, blocked, invalid URL), tell the user:
|
|
43
|
+
"The page could not be scraped: [error]. Try a different URL or check
|
|
44
|
+
if the site blocks bots."
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## Step 3: Evaluate Against 12 Psychology Principles
|
|
49
|
+
|
|
50
|
+
Analyze the scraped content against each principle. For each, assign:
|
|
51
|
+
- ✅ Pass — clearly present and well-executed
|
|
52
|
+
- ⚠️ Needs Work — present but weak or could be improved
|
|
53
|
+
- ❌ Missing — not present at all
|
|
54
|
+
|
|
55
|
+
### The 12 Principles:
|
|
56
|
+
|
|
57
|
+
1. **Anchoring** — Is there a high-priced plan shown first or prominently to
|
|
58
|
+
make others feel cheaper?
|
|
59
|
+
|
|
60
|
+
2. **Decoy Effect** — Is there a middle-tier plan designed to make the top
|
|
61
|
+
tier look like better value?
|
|
62
|
+
|
|
63
|
+
3. **Loss Aversion Framing** — Does copy use "don't miss out", "limited",
|
|
64
|
+
"you'll lose access" rather than purely gain language?
|
|
65
|
+
|
|
66
|
+
4. **Feature-vs-Value Naming** — Do plan names/descriptions highlight
|
|
67
|
+
outcomes ("Close more deals") vs just features ("10 seats")?
|
|
68
|
+
|
|
69
|
+
5. **Social Proof Placement** — Are testimonials, logos, or user counts
|
|
70
|
+
shown near pricing tiers (not just on a separate page)?
|
|
71
|
+
|
|
72
|
+
6. **Urgency / Scarcity Signals** — Is there a countdown timer, limited
|
|
73
|
+
spots badge, or "offer ends" language?
|
|
74
|
+
|
|
75
|
+
7. **Plan Naming Psychology** — Are plan names aspirational
|
|
76
|
+
(Starter/Growth/Scale) vs generic (Basic/Pro/Enterprise)?
|
|
77
|
+
|
|
78
|
+
8. **CTA Button Copy** — Do CTAs say action-outcome ("Start growing free")
|
|
79
|
+
vs generic ("Sign up" or "Get started")?
|
|
80
|
+
|
|
81
|
+
9. **Free Trial vs Freemium Framing** — Is the free offer framed clearly?
|
|
82
|
+
Does it reduce friction or create confusion?
|
|
83
|
+
|
|
84
|
+
10. **Price Ending Tactics** — Do prices end in 9 ($49, $99) for perceived
|
|
85
|
+
value, or round numbers ($50, $100) for premium feel?
|
|
86
|
+
|
|
87
|
+
11. **Visual Hierarchy of Tiers** — Is the recommended/popular plan visually
|
|
88
|
+
highlighted (badge, border, size difference)?
|
|
89
|
+
|
|
90
|
+
12. **Guarantee / Trust Signal Presence** — Is there a money-back guarantee,
|
|
91
|
+
"no credit card required", or security badge near the CTA?
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
## Step 4: Generate the Audit Report
|
|
96
|
+
|
|
97
|
+
Output the report in this exact Markdown structure:
|
|
98
|
+
|
|
99
|
+
```
|
|
100
|
+
# Pricing Page Psychology Audit
|
|
101
|
+
**URL:** [URL]
|
|
102
|
+
**Audited on:** [today's date]
|
|
103
|
+
**Overall Score:** X/12 principles passing
|
|
104
|
+
|
|
105
|
+
---
|
|
106
|
+
|
|
107
|
+
## Audit Results
|
|
108
|
+
|
|
109
|
+
### 1. Anchoring — ✅ Pass / ⚠️ Needs Work / ❌ Missing
|
|
110
|
+
**What we found:** [1-2 sentences from the page]
|
|
111
|
+
**Suggestion:** [Specific rewrite or change to make]
|
|
112
|
+
|
|
113
|
+
[Repeat for all 12 principles]
|
|
114
|
+
|
|
115
|
+
---
|
|
116
|
+
|
|
117
|
+
## 🏆 Top 3 Quick Wins
|
|
118
|
+
|
|
119
|
+
These are your highest-leverage changes, prioritized by impact vs effort:
|
|
120
|
+
|
|
121
|
+
**Quick Win #1 — [Principle name]**
|
|
122
|
+
Current: "[exact copy from page]"
|
|
123
|
+
Rewrite to: "[your improved version]"
|
|
124
|
+
Why: [1 sentence on the psychological mechanism]
|
|
125
|
+
|
|
126
|
+
**Quick Win #2 — [Principle name]**
|
|
127
|
+
...
|
|
128
|
+
|
|
129
|
+
**Quick Win #3 — [Principle name]**
|
|
130
|
+
...
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
135
|
+
## Step 5: Self-QA Before Output
|
|
136
|
+
|
|
137
|
+
Check before presenting the report:
|
|
138
|
+
- [ ] All 12 principles are scored (none skipped)
|
|
139
|
+
- [ ] Each "Suggestion" is specific — no generic advice like "add social proof"
|
|
140
|
+
- [ ] Quick Wins cite actual copy from the page (not invented)
|
|
141
|
+
- [ ] Scores reflect what is literally present in the scraped content
|
|
142
|
+
- [ ] Date is today's actual date
|
|
143
|
+
|
|
144
|
+
Fix any violation before output.
|
|
145
|
+
|
|
146
|
+
---
|
|
147
|
+
|
|
148
|
+
## Step 6: Offer Follow-ups
|
|
149
|
+
|
|
150
|
+
After presenting the report, offer:
|
|
151
|
+
1. "Export this as a PDF-ready Markdown file"
|
|
152
|
+
2. "Generate rewrite copy for all CTAs on this page"
|
|
153
|
+
3. "Compare against a competitor's pricing page"
|
|
154
|
+
4. "Build a prioritized action plan for the dev team"
|
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
scrape_pricing.py — Pricing Page Scraper
|
|
4
|
+
Part of: pricing-page-psychology-audit skill
|
|
5
|
+
Author: ajaycodesitbetter
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
python scripts/scrape_pricing.py "https://linear.app/pricing"
|
|
9
|
+
|
|
10
|
+
Output:
|
|
11
|
+
Structured plain-text to stdout for AI analysis.
|
|
12
|
+
Errors are printed to stderr so stdout stays clean.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
import sys
|
|
17
|
+
import re
|
|
18
|
+
import requests
|
|
19
|
+
from bs4 import BeautifulSoup
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# ── Constants ────────────────────────────────────────────────────────────────
|
|
23
|
+
|
|
24
|
+
TIMEOUT_SECONDS = int(os.environ.get("SCRAPE_TIMEOUT", 15))
|
|
25
|
+
|
|
26
|
+
# Browser-like headers to reduce bot-blocking (no API key needed)
|
|
27
|
+
HEADERS = {
|
|
28
|
+
"User-Agent": (
|
|
29
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
|
30
|
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
31
|
+
"Chrome/124.0.0.0 Safari/537.36"
|
|
32
|
+
),
|
|
33
|
+
"Accept-Language": "en-US,en;q=0.9",
|
|
34
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
# Allow env var override of User-Agent (e.g. for custom scraping setups)
|
|
38
|
+
_custom_ua = os.environ.get("SCRAPE_USER_AGENT")
|
|
39
|
+
if _custom_ua:
|
|
40
|
+
HEADERS["User-Agent"] = _custom_ua
|
|
41
|
+
|
|
42
|
+
# HTML tags that carry pricing-relevant content
|
|
43
|
+
CONTENT_TAGS = ["h1", "h2", "h3", "h4", "p", "li", "span", "button", "a"]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# ── Helpers ──────────────────────────────────────────────────────────────────
|
|
47
|
+
|
|
48
|
+
def clean(text: str) -> str:
|
|
49
|
+
"""Strip extra whitespace and blank lines from a string."""
|
|
50
|
+
return re.sub(r"\s+", " ", text).strip()
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def validate_url(url: str) -> bool:
|
|
54
|
+
"""Return True if the URL looks valid (starts with http/https)."""
|
|
55
|
+
return url.startswith("http://") or url.startswith("https://")
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def fetch_page(url: str) -> str:
|
|
59
|
+
"""
|
|
60
|
+
Fetch the raw HTML of a URL.
|
|
61
|
+
Raises a descriptive RuntimeError on failure.
|
|
62
|
+
"""
|
|
63
|
+
try:
|
|
64
|
+
response = requests.get(url, headers=HEADERS, timeout=TIMEOUT_SECONDS)
|
|
65
|
+
response.raise_for_status()
|
|
66
|
+
return response.text
|
|
67
|
+
|
|
68
|
+
except requests.exceptions.MissingSchema:
|
|
69
|
+
raise RuntimeError(f"Invalid URL format: '{url}'. Include http:// or https://")
|
|
70
|
+
|
|
71
|
+
except requests.exceptions.ConnectionError:
|
|
72
|
+
raise RuntimeError(f"Could not connect to '{url}'. Check the URL or your internet.")
|
|
73
|
+
|
|
74
|
+
except requests.exceptions.Timeout:
|
|
75
|
+
raise RuntimeError(f"Request timed out after {TIMEOUT_SECONDS}s for '{url}'.")
|
|
76
|
+
|
|
77
|
+
except requests.exceptions.HTTPError as e:
|
|
78
|
+
code = e.response.status_code
|
|
79
|
+
if code == 403:
|
|
80
|
+
raise RuntimeError(
|
|
81
|
+
f"Access blocked (HTTP 403) — '{url}' may have bot protection. "
|
|
82
|
+
"Try opening it in a browser and using the page source manually."
|
|
83
|
+
)
|
|
84
|
+
raise RuntimeError(f"HTTP error {code} for '{url}': {e}")
|
|
85
|
+
|
|
86
|
+
except requests.exceptions.RequestException as e:
|
|
87
|
+
raise RuntimeError(f"Unexpected request error for '{url}': {e}")
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
# ── Extraction ────────────────────────────────────────────────────────────────
|
|
91
|
+
|
|
92
|
+
def extract_page_title(soup: BeautifulSoup) -> str:
|
|
93
|
+
"""Get the <title> tag text."""
|
|
94
|
+
tag = soup.find("title")
|
|
95
|
+
return clean(tag.get_text()) if tag else "No title found"
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def extract_buttons(soup: BeautifulSoup) -> list:
|
|
99
|
+
"""
|
|
100
|
+
Extract all button and CTA link text.
|
|
101
|
+
Targets <button> tags and <a> tags with common CTA class names.
|
|
102
|
+
"""
|
|
103
|
+
buttons = []
|
|
104
|
+
|
|
105
|
+
# All <button> elements
|
|
106
|
+
for btn in soup.find_all("button"):
|
|
107
|
+
text = clean(btn.get_text())
|
|
108
|
+
if text:
|
|
109
|
+
buttons.append(text)
|
|
110
|
+
|
|
111
|
+
# <a> tags that look like CTAs (common class keywords)
|
|
112
|
+
cta_keywords = ["btn", "button", "cta", "action", "signup", "start", "trial"]
|
|
113
|
+
for link in soup.find_all("a", href=True):
|
|
114
|
+
classes = " ".join(link.get("class", [])).lower()
|
|
115
|
+
if any(kw in classes for kw in cta_keywords):
|
|
116
|
+
text = clean(link.get_text())
|
|
117
|
+
if text:
|
|
118
|
+
buttons.append(text)
|
|
119
|
+
|
|
120
|
+
# Deduplicate while preserving order
|
|
121
|
+
seen = set()
|
|
122
|
+
unique = []
|
|
123
|
+
for b in buttons:
|
|
124
|
+
if b.lower() not in seen:
|
|
125
|
+
seen.add(b.lower())
|
|
126
|
+
unique.append(b)
|
|
127
|
+
|
|
128
|
+
return unique
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def extract_prices(soup: BeautifulSoup) -> list:
|
|
132
|
+
"""
|
|
133
|
+
Extract price strings using regex on page text.
|
|
134
|
+
Catches formats like: $49, $49/mo, $49/month, EUR99, GBP19.99, Free
|
|
135
|
+
"""
|
|
136
|
+
text = soup.get_text(" ", strip=True)
|
|
137
|
+
price_pattern = re.compile(
|
|
138
|
+
r"(Free|free|\$[\d,.]+(?:/\w+)?|€[\d,.]+(?:/\w+)?|£[\d,.]+(?:/\w+)?|"
|
|
139
|
+
r"[\d,.]+\s*(?:USD|EUR|GBP)(?:/\w+)?)"
|
|
140
|
+
)
|
|
141
|
+
matches = price_pattern.findall(text)
|
|
142
|
+
|
|
143
|
+
# Deduplicate while preserving order
|
|
144
|
+
seen = set()
|
|
145
|
+
unique = []
|
|
146
|
+
for m in matches:
|
|
147
|
+
val = m.strip()
|
|
148
|
+
if val.lower() not in seen:
|
|
149
|
+
seen.add(val.lower())
|
|
150
|
+
unique.append(val)
|
|
151
|
+
|
|
152
|
+
return unique
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def extract_plan_names(soup: BeautifulSoup) -> list:
|
|
156
|
+
"""
|
|
157
|
+
Extract likely plan/tier names from headings and elements
|
|
158
|
+
with pricing-related class names.
|
|
159
|
+
"""
|
|
160
|
+
plan_keywords = ["plan", "tier", "pricing", "package"]
|
|
161
|
+
candidates = []
|
|
162
|
+
|
|
163
|
+
# Check headings inside pricing-related sections
|
|
164
|
+
for tag in soup.find_all(["h2", "h3", "h4"]):
|
|
165
|
+
parent = tag.find_parent()
|
|
166
|
+
parent_classes = " ".join(parent.get("class", [])).lower() if parent else ""
|
|
167
|
+
if any(kw in parent_classes for kw in plan_keywords):
|
|
168
|
+
text = clean(tag.get_text())
|
|
169
|
+
if text and len(text) < 50: # plan names are short
|
|
170
|
+
candidates.append(text)
|
|
171
|
+
|
|
172
|
+
# Also grab standalone headings that look like tier names
|
|
173
|
+
tier_hints = re.compile(
|
|
174
|
+
r"^(free|starter|basic|pro|growth|scale|business|enterprise|team|"
|
|
175
|
+
r"plus|premium|advanced|essentials|standard)$",
|
|
176
|
+
re.IGNORECASE,
|
|
177
|
+
)
|
|
178
|
+
for tag in soup.find_all(["h2", "h3", "h4", "span", "p"]):
|
|
179
|
+
text = clean(tag.get_text())
|
|
180
|
+
if tier_hints.match(text):
|
|
181
|
+
candidates.append(text)
|
|
182
|
+
|
|
183
|
+
# Deduplicate
|
|
184
|
+
seen = set()
|
|
185
|
+
unique = []
|
|
186
|
+
for c in candidates:
|
|
187
|
+
if c.lower() not in seen:
|
|
188
|
+
seen.add(c.lower())
|
|
189
|
+
unique.append(c)
|
|
190
|
+
|
|
191
|
+
return unique
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def extract_features(soup: BeautifulSoup) -> list:
|
|
195
|
+
"""
|
|
196
|
+
Extract feature list items — typically <li> elements inside
|
|
197
|
+
pricing card sections.
|
|
198
|
+
"""
|
|
199
|
+
features = []
|
|
200
|
+
for li in soup.find_all("li"):
|
|
201
|
+
text = clean(li.get_text())
|
|
202
|
+
# Feature items are usually one short line
|
|
203
|
+
if text and 3 < len(text) < 120:
|
|
204
|
+
features.append(text)
|
|
205
|
+
|
|
206
|
+
# Deduplicate
|
|
207
|
+
seen = set()
|
|
208
|
+
unique = []
|
|
209
|
+
for f in features:
|
|
210
|
+
if f.lower() not in seen:
|
|
211
|
+
seen.add(f.lower())
|
|
212
|
+
unique.append(f)
|
|
213
|
+
|
|
214
|
+
return unique[:60] # Cap at 60 to keep output focused
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def extract_all_text(soup: BeautifulSoup) -> str:
|
|
218
|
+
"""
|
|
219
|
+
Extract all visible text in document order for full-context analysis.
|
|
220
|
+
Removes <script>, <style>, <nav>, <footer> noise.
|
|
221
|
+
"""
|
|
222
|
+
# Remove noisy tags
|
|
223
|
+
for tag in soup(["script", "style", "nav", "footer", "noscript", "meta"]):
|
|
224
|
+
tag.decompose()
|
|
225
|
+
|
|
226
|
+
lines = []
|
|
227
|
+
for tag in soup.find_all(CONTENT_TAGS):
|
|
228
|
+
text = clean(tag.get_text())
|
|
229
|
+
if text and len(text) > 2:
|
|
230
|
+
lines.append(text)
|
|
231
|
+
|
|
232
|
+
# Deduplicate consecutive duplicates (common in SPAs)
|
|
233
|
+
deduped = []
|
|
234
|
+
prev = None
|
|
235
|
+
for line in lines:
|
|
236
|
+
if line != prev:
|
|
237
|
+
deduped.append(line)
|
|
238
|
+
prev = line
|
|
239
|
+
|
|
240
|
+
return "\n".join(deduped)
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
# ── Main ──────────────────────────────────────────────────────────────────────
|
|
244
|
+
|
|
245
|
+
def main():
|
|
246
|
+
# Validate arguments
|
|
247
|
+
if len(sys.argv) < 2:
|
|
248
|
+
print("ERROR: No URL provided.", file=sys.stderr)
|
|
249
|
+
print("Usage: python scripts/scrape_pricing.py <URL>", file=sys.stderr)
|
|
250
|
+
sys.exit(1)
|
|
251
|
+
|
|
252
|
+
url = sys.argv[1].strip()
|
|
253
|
+
|
|
254
|
+
if not validate_url(url):
|
|
255
|
+
print(f"ERROR: Invalid URL '{url}'. Must start with http:// or https://", file=sys.stderr)
|
|
256
|
+
sys.exit(1)
|
|
257
|
+
|
|
258
|
+
# Fetch the page
|
|
259
|
+
try:
|
|
260
|
+
html = fetch_page(url)
|
|
261
|
+
except RuntimeError as e:
|
|
262
|
+
print(f"ERROR: {e}", file=sys.stderr)
|
|
263
|
+
sys.exit(1)
|
|
264
|
+
|
|
265
|
+
# Parse HTML
|
|
266
|
+
soup = BeautifulSoup(html, "html.parser")
|
|
267
|
+
|
|
268
|
+
# Extract structured data
|
|
269
|
+
title = extract_page_title(soup)
|
|
270
|
+
buttons = extract_buttons(soup)
|
|
271
|
+
prices = extract_prices(soup)
|
|
272
|
+
plan_names = extract_plan_names(soup)
|
|
273
|
+
features = extract_features(soup)
|
|
274
|
+
all_text = extract_all_text(soup)
|
|
275
|
+
|
|
276
|
+
# ── Output to stdout (clean structured text for AI) ──────────────────────
|
|
277
|
+
separator = "\u2500" * 60
|
|
278
|
+
|
|
279
|
+
print(f"PAGE TITLE: {title}")
|
|
280
|
+
print(f"URL: {url}")
|
|
281
|
+
print(separator)
|
|
282
|
+
|
|
283
|
+
print("\n## PLAN NAMES DETECTED")
|
|
284
|
+
if plan_names:
|
|
285
|
+
for name in plan_names:
|
|
286
|
+
print(f" - {name}")
|
|
287
|
+
else:
|
|
288
|
+
print(" (none detected — check page structure)")
|
|
289
|
+
|
|
290
|
+
print("\n## PRICES DETECTED")
|
|
291
|
+
if prices:
|
|
292
|
+
for price in prices:
|
|
293
|
+
print(f" - {price}")
|
|
294
|
+
else:
|
|
295
|
+
print(" (none detected — page may use dynamic pricing)")
|
|
296
|
+
|
|
297
|
+
print("\n## CTA BUTTON TEXT")
|
|
298
|
+
if buttons:
|
|
299
|
+
for btn in buttons:
|
|
300
|
+
print(f" - {btn}")
|
|
301
|
+
else:
|
|
302
|
+
print(" (none detected)")
|
|
303
|
+
|
|
304
|
+
print("\n## FEATURE LIST ITEMS")
|
|
305
|
+
if features:
|
|
306
|
+
for feat in features[:30]: # Show top 30 for readability
|
|
307
|
+
print(f" - {feat}")
|
|
308
|
+
else:
|
|
309
|
+
print(" (none detected)")
|
|
310
|
+
|
|
311
|
+
print(f"\n{separator}")
|
|
312
|
+
print("## FULL PAGE TEXT (for AI analysis)")
|
|
313
|
+
print(separator)
|
|
314
|
+
print(all_text)
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
if __name__ == "__main__":
|
|
318
|
+
main()
|