create-claudeportal 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.html CHANGED
@@ -4,7 +4,7 @@
4
4
  <meta charset="UTF-8" />
5
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
6
  <title>Claude Portal</title>
7
- <script type="module" crossorigin src="/assets/index-ChGdxmzn.js"></script>
7
+ <script type="module" crossorigin src="/assets/index-cqtKQEpE.js"></script>
8
8
  <link rel="stylesheet" crossorigin href="/assets/index-BG0yZd9Y.css">
9
9
  </head>
10
10
  <body>
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "create-claudeportal",
3
- "version": "0.2.2",
3
+ "version": "0.3.0",
4
4
  "description": "Get from npx to a working app in under 5 minutes — Claude Code setup wizard",
5
5
  "bin": {
6
6
  "create-claudeportal": "bin/cli.js"
package/server/index.js CHANGED
@@ -19,6 +19,7 @@ const { validateProjectPath } = require('./lib/validate-path')
19
19
  const folderRoutes = require('./routes/folder')
20
20
  const { createDocEventsRouter } = require('./routes/doc-events')
21
21
  const { createPreviewProxy } = require('./routes/preview-proxy')
22
+ const brainRoutes = require('./routes/brain')
22
23
 
23
24
  function startServer(port) {
24
25
  return new Promise((resolve) => {
@@ -112,6 +113,7 @@ function startServer(port) {
112
113
  app.use('/api', createEventsRouter(sseManager, fileWatcher))
113
114
  app.use('/api', folderRoutes)
114
115
  app.use('/api', createDocEventsRouter(sseManager))
116
+ app.use('/api', brainRoutes)
115
117
 
116
118
  // Preview proxy — captures browser console errors and feeds them to terminal
117
119
  app.use('/api', createPreviewProxy(() => activePty))
@@ -0,0 +1,180 @@
1
+ const fs = require('fs')
2
+ const path = require('path')
3
+ const os = require('os')
4
+
5
+ let DEFAULT_BRAIN_DIR = path.join(os.homedir(), 'Claude', 'brain')
6
+
7
+ const CATEGORY_TITLES = {
8
+ business: 'Business Context',
9
+ industry: 'Industry Context',
10
+ voice: 'Voice Context',
11
+ competitors: 'Competitors Context',
12
+ }
13
+
14
+ const EXPECTED_FIELDS = {
15
+ business: ['Name', 'Type', 'Type Label', 'Description', 'Location', 'Website', 'Email List Detected', 'Social Profiles', 'Reviews', 'Dream Outcome', 'Pricing', 'Value Stack', 'Bonuses', 'Guarantee', 'Urgency', 'Revenue Baseline', 'Team Size'],
16
+ industry: ['Sector', 'Target Audience', 'Avg Customer Value', 'Seasonal Patterns', 'Market Trends', 'Common Objections', 'Buying Triggers', 'ICP Segment 1', 'ICP Segment 2', 'ICP Segment 3'],
17
+ voice: ['Template', 'Tone', 'Words To Avoid', 'Preferred Length', 'Social Tone', 'Email Tone', 'Visual Style', 'Energy Level'],
18
+ competitors: ['Identified', 'Their Pricing', 'Their Offers', 'Their Strengths', 'Their Weaknesses', 'Differentiator', 'AI Visibility Score', 'Competitor Ad Angles', 'Longest Running Ads'],
19
+ }
20
+
21
+ function parseMdFields(content) {
22
+ const fields = {}
23
+ const lines = content.split('\n')
24
+ for (const line of lines) {
25
+ const match = line.match(/^- \*\*(.+?):\*\*\s*(.+)$/)
26
+ if (match) {
27
+ fields[match[1]] = match[2].trim()
28
+ }
29
+ }
30
+ return fields
31
+ }
32
+
33
+ function fieldsToMd(category, fields) {
34
+ const title = CATEGORY_TITLES[category] || category
35
+ let md = `# ${title}\n\n`
36
+ for (const [key, value] of Object.entries(fields)) {
37
+ if (value && value.trim()) {
38
+ md += `- **${key}:** ${value.trim()}\n`
39
+ }
40
+ }
41
+ return md
42
+ }
43
+
44
+ function saveBrainCategory(category, fields, brainDir = DEFAULT_BRAIN_DIR) {
45
+ fs.mkdirSync(brainDir, { recursive: true })
46
+ const filePath = path.join(brainDir, `${category}.md`)
47
+
48
+ let existing = {}
49
+ let rawSection = ''
50
+ if (fs.existsSync(filePath)) {
51
+ const content = fs.readFileSync(filePath, 'utf8')
52
+ existing = parseMdFields(content)
53
+ const rawMatch = content.match(/\n## Raw Context\n([\s\S]*)$/)
54
+ if (rawMatch) rawSection = rawMatch[0]
55
+ }
56
+
57
+ const merged = { ...existing }
58
+ for (const [key, value] of Object.entries(fields)) {
59
+ if (value && value.trim()) {
60
+ merged[key] = value.trim()
61
+ }
62
+ }
63
+
64
+ fs.writeFileSync(filePath, fieldsToMd(category, merged) + rawSection)
65
+ }
66
+
67
+ function appendRawContext(category, text, brainDir = DEFAULT_BRAIN_DIR) {
68
+ fs.mkdirSync(brainDir, { recursive: true })
69
+ const filePath = path.join(brainDir, `${category}.md`)
70
+
71
+ let content = ''
72
+ if (fs.existsSync(filePath)) {
73
+ content = fs.readFileSync(filePath, 'utf8')
74
+ } else {
75
+ const title = CATEGORY_TITLES[category] || category
76
+ content = `# ${title}\n\n`
77
+ }
78
+
79
+ if (!content.includes('## Raw Context')) {
80
+ content += '\n## Raw Context\n\n'
81
+ }
82
+ content += text.trim() + '\n\n'
83
+
84
+ fs.writeFileSync(filePath, content)
85
+ }
86
+
87
+ function countFields(content, expectedFields) {
88
+ let filled = 0
89
+ for (const field of expectedFields) {
90
+ const regex = new RegExp(`^- \\*\\*${field.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}:\\*\\*\\s*(.+)$`, 'm')
91
+ const match = content.match(regex)
92
+ if (match && match[1].trim().length > 0) filled++
93
+ }
94
+ return filled
95
+ }
96
+
97
+ function getBrainStatus(brainDir = DEFAULT_BRAIN_DIR) {
98
+ if (!fs.existsSync(brainDir)) {
99
+ return {
100
+ exists: false,
101
+ businessName: null,
102
+ completeness: 0,
103
+ categories: {
104
+ business: { filled: 0, total: 17, percent: 0 },
105
+ industry: { filled: 0, total: 10, percent: 0 },
106
+ voice: { filled: 0, total: 8, percent: 0 },
107
+ competitors: { filled: 0, total: 9, percent: 0 },
108
+ },
109
+ }
110
+ }
111
+
112
+ const categories = {}
113
+ let totalFilled = 0
114
+ let totalFields = 0
115
+ let businessName = null
116
+
117
+ for (const [cat, fields] of Object.entries(EXPECTED_FIELDS)) {
118
+ const filePath = path.join(brainDir, `${cat}.md`)
119
+ let filled = 0
120
+ if (fs.existsSync(filePath)) {
121
+ const content = fs.readFileSync(filePath, 'utf8')
122
+ filled = countFields(content, fields)
123
+ if (cat === 'business') {
124
+ const nameMatch = content.match(/^- \*\*Name:\*\*\s*(.+)$/m)
125
+ if (nameMatch) businessName = nameMatch[1].trim()
126
+ }
127
+ }
128
+ categories[cat] = { filled, total: fields.length, percent: Math.round((filled / fields.length) * 100) }
129
+ totalFilled += filled
130
+ totalFields += fields.length
131
+ }
132
+
133
+ return {
134
+ exists: true,
135
+ businessName,
136
+ completeness: Math.round((totalFilled / totalFields) * 100),
137
+ categories,
138
+ }
139
+ }
140
+
141
+ function ensureClaudeMdReference(brainDir = DEFAULT_BRAIN_DIR) {
142
+ const claudeMdPath = path.join(os.homedir(), '.claude', 'CLAUDE.md')
143
+ const claudeDir = path.join(os.homedir(), '.claude')
144
+ const lockPath = claudeMdPath + '.brain-lock'
145
+
146
+ fs.mkdirSync(claudeDir, { recursive: true })
147
+
148
+ try {
149
+ fs.writeFileSync(lockPath, String(process.pid), { flag: 'wx' })
150
+ } catch {
151
+ return false
152
+ }
153
+
154
+ try {
155
+ let content = ''
156
+ if (fs.existsSync(claudeMdPath)) {
157
+ content = fs.readFileSync(claudeMdPath, 'utf8')
158
+ }
159
+
160
+ if (content.includes('## Business Context')) return false
161
+
162
+ const section = `\n## Business Context\nFor business-specific context about this user, read:\n- ${brainDir}/business.md — identity, offer, pricing, operations\n- ${brainDir}/industry.md — audience, market, ICP segments\n- ${brainDir}/voice.md — communication style, tone preferences\n- ${brainDir}/competitors.md — competitive landscape\nReference documents: ${brainDir}/documents/\n`
163
+
164
+ fs.appendFileSync(claudeMdPath, section)
165
+ return true
166
+ } finally {
167
+ try { fs.unlinkSync(lockPath) } catch {}
168
+ }
169
+ }
170
+
171
+ module.exports = {
172
+ saveBrainCategory,
173
+ appendRawContext,
174
+ getBrainStatus,
175
+ ensureClaudeMdReference,
176
+ parseMdFields,
177
+ EXPECTED_FIELDS,
178
+ get DEFAULT_BRAIN_DIR() { return DEFAULT_BRAIN_DIR },
179
+ set DEFAULT_BRAIN_DIR(v) { DEFAULT_BRAIN_DIR = v },
180
+ }
@@ -0,0 +1,59 @@
1
+ function parseImport(text) {
2
+ const sections = { business: '', industry: '', voice: '', competitors: '' }
3
+ let current = null
4
+
5
+ let matched = false
6
+ for (const line of text.split('\n')) {
7
+ const lower = line.toLowerCase().trim()
8
+ if (/^#{1,3}\s*business/i.test(lower) || lower === '## business') { current = 'business'; matched = true; continue }
9
+ if (/^#{1,3}\s*industry/i.test(lower) || lower === '## industry') { current = 'industry'; matched = true; continue }
10
+ if (/^#{1,3}\s*voice/i.test(lower) || lower === '## voice') { current = 'voice'; matched = true; continue }
11
+ if (/^#{1,3}\s*competitor/i.test(lower) || lower === '## competitors') { current = 'competitors'; matched = true; continue }
12
+ if (current && line.trim()) sections[current] += line.trim() + '\n'
13
+ }
14
+ if (matched) return trimSections(sections)
15
+
16
+ current = null
17
+ for (const line of text.split('\n')) {
18
+ const lower = line.toLowerCase().trim()
19
+ if (/^\*{2}business\*{2}/i.test(lower)) { current = 'business'; matched = true; continue }
20
+ if (/^\*{2}industry\*{2}/i.test(lower)) { current = 'industry'; matched = true; continue }
21
+ if (/^\*{2}voice\*{2}/i.test(lower)) { current = 'voice'; matched = true; continue }
22
+ if (/^\*{2}competitor/i.test(lower)) { current = 'competitors'; matched = true; continue }
23
+ if (current && line.trim()) sections[current] += line.trim() + '\n'
24
+ }
25
+ if (matched) return trimSections(sections)
26
+
27
+ current = null
28
+ for (const line of text.split('\n')) {
29
+ const lower = line.toLowerCase().trim()
30
+ if (/^business\s*:/i.test(lower)) { current = 'business'; matched = true; continue }
31
+ if (/^industry\s*:/i.test(lower)) { current = 'industry'; matched = true; continue }
32
+ if (/^voice\s*:/i.test(lower)) { current = 'voice'; matched = true; continue }
33
+ if (/^competitor/i.test(lower) && lower.includes(':')) { current = 'competitors'; matched = true; continue }
34
+ if (current && line.trim()) sections[current] += line.trim() + '\n'
35
+ }
36
+ if (matched) return trimSections(sections)
37
+
38
+ sections.business = text.trim()
39
+ return sections
40
+ }
41
+
42
+ function trimSections(sections) {
43
+ return {
44
+ business: sections.business.trim(),
45
+ industry: sections.industry.trim(),
46
+ voice: sections.voice.trim(),
47
+ competitors: sections.competitors.trim(),
48
+ }
49
+ }
50
+
51
+ function guessCategoryFromFilename(filename) {
52
+ const lower = filename.toLowerCase()
53
+ if (/brand|voice|tone|style|messaging/i.test(lower)) return 'voice'
54
+ if (/competitor|competition|rival|versus|vs/i.test(lower)) return 'competitors'
55
+ if (/market|audience|customer|icp|segment|industry/i.test(lower)) return 'industry'
56
+ return 'business'
57
+ }
58
+
59
+ module.exports = { parseImport, guessCategoryFromFilename }
@@ -88,6 +88,23 @@ const RECOMMENDED_MCPS = [
88
88
  usefulFor: ['draft', 'extract', 'transform'],
89
89
  category: 'automation',
90
90
  },
91
+ {
92
+ id: 'composio',
93
+ name: 'Composio',
94
+ description: '250+ app integrations — CRM, email, social, databases',
95
+ installHint: 'claude mcp add composio -- npx -y composio-core mcp',
96
+ usefulFor: ['draft', 'extract', 'analyse', 'transform', 'summarize'],
97
+ category: 'automation',
98
+ },
99
+ // Social Media
100
+ {
101
+ id: 'postiz',
102
+ name: 'Postiz',
103
+ description: 'Schedule and manage social media posts across platforms',
104
+ installHint: 'claude mcp add postiz -- npx -y @postiz/mcp-server',
105
+ usefulFor: ['draft', 'transform'],
106
+ category: 'social',
107
+ },
91
108
  // Meetings & Notes
92
109
  {
93
110
  id: 'fireflies',
@@ -155,6 +172,7 @@ const CATEGORY_LABELS = {
155
172
  design: 'Design & Build',
156
173
  workspace: 'Workspace',
157
174
  automation: 'Automation',
175
+ social: 'Social Media',
158
176
  meetings: 'Meetings',
159
177
  projects: 'Project Management',
160
178
  data: 'Data',
@@ -0,0 +1,367 @@
1
+ /**
2
+ * Website scraper for Claude Portal Business Brain.
3
+ *
4
+ * Fetches a website, extracts business signals, and classifies the business type.
5
+ * Adapted from AI Team Hub scraper — standalone, no LLM dependency for MVP.
6
+ */
7
+
8
+ const SCRAPE_TIMEOUT = 15000
9
+ const MAX_BODY_CHARS = 5000
10
+
11
+ // ─── Helpers ───────────────────────────────────────────────────────
12
+
13
+ function decodeEntities(str) {
14
+ return str
15
+ .replace(/&amp;/gi, '&')
16
+ .replace(/&lt;/gi, '<')
17
+ .replace(/&gt;/gi, '>')
18
+ .replace(/&quot;/gi, '"')
19
+ .replace(/&#039;/gi, "'")
20
+ .replace(/&#x27;/gi, "'")
21
+ .replace(/&#(\d+);/g, (_, n) => String.fromCharCode(n))
22
+ }
23
+
24
+ function normalizeUrl(raw) {
25
+ let url = raw.trim()
26
+ if (!/^https?:\/\//i.test(url)) url = 'https://' + url
27
+ return url
28
+ }
29
+
30
+ function stripTags(html) {
31
+ return html
32
+ .replace(/<script[\s\S]*?<\/script>/gi, ' ')
33
+ .replace(/<style[\s\S]*?<\/style>/gi, ' ')
34
+ .replace(/<nav[\s\S]*?<\/nav>/gi, ' ')
35
+ .replace(/<footer[\s\S]*?<\/footer>/gi, ' ')
36
+ .replace(/<[^>]+>/g, ' ')
37
+ .replace(/&nbsp;/gi, ' ')
38
+ .replace(/&amp;/gi, '&')
39
+ .replace(/&lt;/gi, '<')
40
+ .replace(/&gt;/gi, '>')
41
+ .replace(/&#\d+;/g, ' ')
42
+ .replace(/\s+/g, ' ')
43
+ .trim()
44
+ }
45
+
46
+ // ─── Extraction ────────────────────────────────────────────────────
47
+
48
+ function extractMeta(html) {
49
+ const get = (regex) => {
50
+ const m = html.match(regex)
51
+ return m ? m[1].trim() : ''
52
+ }
53
+
54
+ const title =
55
+ get(/<title[^>]*>([^<]+)<\/title>/i) ||
56
+ get(/<meta[^>]+property="og:title"[^>]+content="([^"]+)"/i) ||
57
+ get(/<meta[^>]+content="([^"]+)"[^>]+property="og:title"/i)
58
+
59
+ const description =
60
+ get(/<meta[^>]+name="description"[^>]+content="([^"]+)"/i) ||
61
+ get(/<meta[^>]+content="([^"]+)"[^>]+name="description"/i) ||
62
+ get(/<meta[^>]+property="og:description"[^>]+content="([^"]+)"/i) ||
63
+ get(/<meta[^>]+content="([^"]+)"[^>]+property="og:description"/i)
64
+
65
+ const ogImage =
66
+ get(/<meta[^>]+property="og:image"[^>]+content="([^"]+)"/i) ||
67
+ get(/<meta[^>]+content="([^"]+)"[^>]+property="og:image"/i)
68
+
69
+ return { title: decodeEntities(title), description: decodeEntities(description), ogImage }
70
+ }
71
+
72
+ function extractBodyText(html) {
73
+ const mainMatch = html.match(/<main[\s\S]*?<\/main>/i) || html.match(/<article[\s\S]*?<\/article>/i)
74
+ const bodyMatch = html.match(/<body[\s\S]*?<\/body>/i)
75
+ const source = mainMatch ? mainMatch[0] : bodyMatch ? bodyMatch[0] : html
76
+ return stripTags(source).slice(0, MAX_BODY_CHARS)
77
+ }
78
+
79
+ function extractLinks(html) {
80
+ const matches = [...html.matchAll(/href="([^"]+)"/gi)]
81
+ return matches.map((m) => m[1])
82
+ }
83
+
84
+ // ─── Signal Detection ──────────────────────────────────────────────
85
+
86
+ function detectSocialLinks(links, html) {
87
+ const profiles = {}
88
+ const patterns = {
89
+ instagram: /instagram\.com\/([a-zA-Z0-9_.]+)/i,
90
+ linkedin: /linkedin\.com\/(company|in)\/([a-zA-Z0-9_-]+)/i,
91
+ facebook: /facebook\.com\/([a-zA-Z0-9_.]+)/i,
92
+ twitter: /(twitter\.com|x\.com)\/([a-zA-Z0-9_]+)/i,
93
+ }
94
+
95
+ const allText = links.join(' ') + ' ' + html
96
+
97
+ for (const [platform, regex] of Object.entries(patterns)) {
98
+ const match = allText.match(regex)
99
+ if (match) {
100
+ const fullUrlMatch = allText.match(new RegExp(`https?://[^"'\\s]*${regex.source}`, 'i'))
101
+ profiles[platform] = { url: fullUrlMatch ? fullUrlMatch[0] : match[0] }
102
+ }
103
+ }
104
+
105
+ return profiles
106
+ }
107
+
108
+ function detectEmailSignup(html) {
109
+ const lower = html.toLowerCase()
110
+ const hasEmailInput = /input[^>]+type\s*=\s*["']email["']/i.test(html)
111
+ const hasSignupText = /subscribe|newsletter|join.*list|get updates|sign\s*up.*email|email.*sign\s*up/i.test(lower)
112
+ const hasMailchimp = /mailchimp|convertkit|activecampaign|klaviyo|mailerlite/i.test(lower)
113
+ return hasEmailInput || hasSignupText || hasMailchimp
114
+ }
115
+
116
+ function detectEcommerce(html, links) {
117
+ const lower = html.toLowerCase()
118
+ const signals = [
119
+ /shopify/i.test(html), /woocommerce|wc-/i.test(html), /bigcommerce/i.test(html),
120
+ links.some((l) => /\/cart|\/products|\/collections/i.test(l)),
121
+ /add.to.cart/i.test(lower), /shipping|free delivery/i.test(lower),
122
+ /sku|product-price|product_price/i.test(html),
123
+ ]
124
+ return signals.filter(Boolean).length
125
+ }
126
+
127
+ function detectBooking(html, links) {
128
+ const lower = html.toLowerCase()
129
+ const signals = [
130
+ links.some((l) => /calendly\.com|acuityscheduling\.com|cal\.com|tidycal\.com/i.test(l)),
131
+ /book\s*(a|an|your|now|today|free|a free)/i.test(lower),
132
+ /schedule\s*(a|an|your|now|today|free|a free)/i.test(lower),
133
+ /booking|appointment|consultation|session/i.test(lower),
134
+ ]
135
+ return signals.filter(Boolean).length
136
+ }
137
+
138
+ function detectSaaS(html, bodyText) {
139
+ const lower = bodyText.toLowerCase()
140
+ const signals = [
141
+ /free\s*trial/i.test(lower), /sign\s*up/i.test(lower) && /pricing/i.test(lower),
142
+ /api|developer|documentation|integrat/i.test(lower), /pricing\s*(plan|tier)/i.test(lower),
143
+ /saas|software|platform|dashboard/i.test(lower), /app\.(io|com|co)|\.app\b/i.test(html),
144
+ ]
145
+ return signals.filter(Boolean).length
146
+ }
147
+
148
+ function detectAgency(bodyText) {
149
+ const lower = bodyText.toLowerCase()
150
+ const signals = [
151
+ /our\s*team|meet\s*the\s*team/i.test(lower), /services|our\s*work|portfolio/i.test(lower),
152
+ /case\s*stud(y|ies)/i.test(lower), /clients|trusted\s*by|worked\s*with/i.test(lower),
153
+ /agency|studio|firm|consultancy/i.test(lower), /project|retainer|proposal/i.test(lower),
154
+ ]
155
+ return signals.filter(Boolean).length
156
+ }
157
+
158
+ function detectCoaching(bodyText) {
159
+ const lower = bodyText.toLowerCase()
160
+ const signals = [
161
+ /coach(ing)?|mentor(ing)?/i.test(lower), /consult(ing|ant)?/i.test(lower),
162
+ /1[\s-]on[\s-]1|one[\s-]on[\s-]one/i.test(lower), /session|program|transformation/i.test(lower),
163
+ /certified|accredited|practitioner/i.test(lower), /work\s*with\s*me|hire\s*me/i.test(lower),
164
+ ]
165
+ return signals.filter(Boolean).length
166
+ }
167
+
168
+ function detectDigitalProduct(bodyText, links) {
169
+ const lower = bodyText.toLowerCase()
170
+ const signals = [
171
+ /course|module|lesson|curriculum|enroll/i.test(lower),
172
+ /template|download|ebook|e-book|guide|playbook/i.test(lower),
173
+ /gumroad|teachable|podia|thinkific|kajabi|udemy/i.test(lower),
174
+ links.some((l) => /gumroad\.com|teachable\.com|podia\.com|thinkific\.com/i.test(l)),
175
+ /digital\s*product|online\s*course|masterclass/i.test(lower),
176
+ /instant\s*access|lifetime\s*access/i.test(lower),
177
+ ]
178
+ return signals.filter(Boolean).length
179
+ }
180
+
181
+ function detectMembership(bodyText) {
182
+ const lower = bodyText.toLowerCase()
183
+ const signals = [
184
+ /membership|member\s*area|members[\s-]only/i.test(lower),
185
+ /\/mo\b|per\s*month|monthly/i.test(lower), /subscription|recurring|renew/i.test(lower),
186
+ /community|private\s*group|inner\s*circle/i.test(lower),
187
+ /cancel\s*anytime|no\s*commitment/i.test(lower),
188
+ ]
189
+ return signals.filter(Boolean).length
190
+ }
191
+
192
+ function detectLocalService(bodyText, html, links) {
193
+ const lower = bodyText.toLowerCase()
194
+ const signals = [
195
+ /plumb(er|ing)|electrician|hvac|roofing|landscap/i.test(lower),
196
+ /physiotherap|chiropract|dentist|optometrist|massage|salon|barber/i.test(lower),
197
+ /personal\s*train(er|ing)|gym|fitness\s*studio|yoga\s*studio|pilates/i.test(lower),
198
+ /free\s*(quote|estimate|inspection|assessment)/i.test(lower),
199
+ /service\s*area|we\s*come\s*to\s*you|mobile\s*service/i.test(lower),
200
+ /call\s*(us|now|today)|emergency|24[\s/]?7/i.test(lower),
201
+ /local|near\s*(you|me)|in\s*your\s*area/i.test(lower),
202
+ links.some((l) => /google\.com\/maps|maps\.google/i.test(l)),
203
+ ]
204
+ return signals.filter(Boolean).length
205
+ }
206
+
207
+ function detectPricing(bodyText) {
208
+ const priceMatches = bodyText.match(/\$\d[\d,]*(?:\.\d{2})?(?:\s*\/\s*mo(?:nth)?)?/gi)
209
+ const euroMatches = bodyText.match(/€\d[\d,]*(?:\.\d{2})?/gi)
210
+ const poundMatches = bodyText.match(/£\d[\d,]*(?:\.\d{2})?/gi)
211
+ const allPrices = [...(priceMatches || []), ...(euroMatches || []), ...(poundMatches || [])]
212
+ if (allPrices.length === 0) return null
213
+ const unique = [...new Set(allPrices)].slice(0, 4)
214
+ return unique.join(', ')
215
+ }
216
+
217
+ function detectLocation(bodyText, html) {
218
+ const meta = html.match(/<meta[^>]+name="geo\.(region|placename)"[^>]+content="([^"]+)"/i)
219
+ if (meta) return meta[2]
220
+
221
+ const addressMatch = bodyText.match(
222
+ /(?:based\s+in|located\s+in|serving|headquarters?)\s+([A-Z][a-zA-Z\s,]+(?:AU|USA|UK|CA|NZ)?\b)/i
223
+ )
224
+ if (addressMatch) return addressMatch[1].trim()
225
+
226
+ const auCities =
227
+ /\b(Sydney|Melbourne|Brisbane|Perth|Adelaide|Gold Coast|Canberra|Hobart|Darwin|Newcastle|Sunshine Coast)\b.*?\b(NSW|VIC|QLD|WA|SA|ACT|TAS|NT|Australia|AU)\b/i
228
+ const auMatch = bodyText.match(auCities)
229
+ if (auMatch) return `${auMatch[1]}, ${auMatch[2]}`
230
+
231
+ const cityMatch = bodyText.match(
232
+ /\b(New York|Los Angeles|London|Toronto|Auckland|Dubai|Singapore|Hong Kong|San Francisco|Chicago|Austin|Miami|Denver|Seattle|Portland|Nashville|Atlanta)\b/i
233
+ )
234
+ if (cityMatch) return cityMatch[1]
235
+
236
+ return null
237
+ }
238
+
239
+ // ─── Business Type Classification ──────────────────────────────────
240
+
241
+ const BUSINESS_TYPE_LABELS = {
242
+ coaching_consulting: 'Coaching / Consulting',
243
+ digital_product: 'Digital Products',
244
+ agency: 'Agency / Services',
245
+ physical_product: 'Physical Product / E-commerce',
246
+ saas_software: 'SaaS / Software',
247
+ membership_subscription: 'Membership / Subscription',
248
+ service_local: 'Local Service',
249
+ hybrid: 'Hybrid / Multiple',
250
+ }
251
+
252
+ function classifyBusiness(scores) {
253
+ const { ecommerce, saas, coaching, agency, digitalProduct, membership, booking, localService } = scores
254
+
255
+ const candidates = [
256
+ { type: 'physical_product', score: ecommerce * 2 },
257
+ { type: 'saas_software', score: saas * 1.8 },
258
+ { type: 'coaching_consulting', score: (coaching + booking * 0.5) * 1.5 },
259
+ { type: 'agency', score: agency * 1.5 },
260
+ { type: 'digital_product', score: digitalProduct * 1.5 },
261
+ { type: 'membership_subscription', score: membership * 1.5 },
262
+ { type: 'service_local', score: (localService + booking * 0.8) * 1.5 },
263
+ ]
264
+
265
+ candidates.sort((a, b) => b.score - a.score)
266
+ if (candidates[0].score < 2) return 'hybrid'
267
+ if (candidates[0].score > 0 && candidates[1].score > 0 && candidates[1].score / candidates[0].score > 0.8) return 'hybrid'
268
+ return candidates[0].type
269
+ }
270
+
271
+ function deriveBusinessName(meta, url) {
272
+ if (meta.title) {
273
+ const cleaned = meta.title.split(/[|\-–—]/)[0].replace(/home/i, '').trim()
274
+ if (cleaned.length > 1 && cleaned.length < 80) return cleaned
275
+ }
276
+ try {
277
+ const hostname = new URL(url).hostname.replace(/^www\./, '')
278
+ const domain = hostname.split('.')[0]
279
+ return domain.charAt(0).toUpperCase() + domain.slice(1)
280
+ } catch {
281
+ return 'Your Business'
282
+ }
283
+ }
284
+
285
+ // ─── Main Scraper ──────────────────────────────────────────────────
286
+
287
+ /**
288
+ * Scrape a website and return a business profile.
289
+ * Uses server-side fetch (no CORS proxy needed).
290
+ *
291
+ * @param {string} rawUrl - The website URL to scrape.
292
+ * @param {(step: string) => void} [onStep] - Optional progress callback.
293
+ * @returns {Promise<object>} businessProfile
294
+ */
295
+ async function scrapeWebsite(rawUrl, onStep) {
296
+ const url = normalizeUrl(rawUrl)
297
+
298
+ onStep?.('Fetching website...')
299
+ const controller = new AbortController()
300
+ const timeout = setTimeout(() => controller.abort(), SCRAPE_TIMEOUT)
301
+
302
+ try {
303
+ const response = await fetch(url, {
304
+ signal: controller.signal,
305
+ headers: {
306
+ 'User-Agent': 'Mozilla/5.0 (compatible; ClaudePortalBot/1.0)',
307
+ 'Accept': 'text/html',
308
+ },
309
+ })
310
+
311
+ clearTimeout(timeout)
312
+
313
+ if (!response.ok) {
314
+ throw new Error(`Failed to fetch website (${response.status})`)
315
+ }
316
+
317
+ const html = await response.text()
318
+ if (!html || html.length < 100) {
319
+ throw new Error('Website returned empty or very short content')
320
+ }
321
+
322
+ onStep?.('Reading page content...')
323
+ const meta = extractMeta(html)
324
+ const bodyText = extractBodyText(html)
325
+ const links = extractLinks(html)
326
+
327
+ onStep?.('Detecting business signals...')
328
+ const socialProfiles = detectSocialLinks(links, html)
329
+ const emailListDetected = detectEmailSignup(html)
330
+ const priceRange = detectPricing(bodyText)
331
+ const location = detectLocation(bodyText, html)
332
+
333
+ const scores = {
334
+ ecommerce: detectEcommerce(html, links),
335
+ booking: detectBooking(html, links),
336
+ saas: detectSaaS(html, bodyText),
337
+ agency: detectAgency(bodyText),
338
+ coaching: detectCoaching(bodyText),
339
+ digitalProduct: detectDigitalProduct(bodyText, links),
340
+ membership: detectMembership(bodyText),
341
+ localService: detectLocalService(bodyText, html, links),
342
+ }
343
+
344
+ onStep?.('Classifying business type...')
345
+ const businessType = classifyBusiness(scores)
346
+ const businessName = deriveBusinessName(meta, url)
347
+ const description = meta.description || bodyText.slice(0, 120).trim() + '...'
348
+
349
+ return {
350
+ businessName,
351
+ businessType,
352
+ businessTypeLabel: BUSINESS_TYPE_LABELS[businessType] || businessType,
353
+ description,
354
+ website: url,
355
+ priceRange,
356
+ location,
357
+ socialProfiles,
358
+ emailListDetected,
359
+ ogImage: meta.ogImage || null,
360
+ bodyTextPreview: bodyText.slice(0, 500),
361
+ }
362
+ } finally {
363
+ clearTimeout(timeout)
364
+ }
365
+ }
366
+
367
+ module.exports = { scrapeWebsite, BUSINESS_TYPE_LABELS }