create-claudeportal 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/assets/index-cqtKQEpE.js +161 -0
- package/dist/index.html +1 -1
- package/package.json +1 -1
- package/server/index.js +2 -0
- package/server/lib/brain-manager.js +180 -0
- package/server/lib/import-parser.js +59 -0
- package/server/lib/mcp-checker.js +18 -0
- package/server/lib/scraper.js +367 -0
- package/server/routes/brain.js +124 -0
- package/server/routes/folder.js +22 -4
- package/tsconfig.tsbuildinfo +1 -1
- package/dist/assets/index-ChGdxmzn.js +0 -132
- package/mockups/01-chat-conversation-v2.html +0 -803
- package/mockups/01-chat-conversation.html +0 -592
- package/mockups/02-activity-feed.html +0 -648
- package/mockups/03-focused-workspace.html +0 -680
- package/mockups/04-documents-mode.html +0 -1556
package/dist/index.html
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
<meta charset="UTF-8" />
|
|
5
5
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
6
6
|
<title>Claude Portal</title>
|
|
7
|
-
<script type="module" crossorigin src="/assets/index-
|
|
7
|
+
<script type="module" crossorigin src="/assets/index-cqtKQEpE.js"></script>
|
|
8
8
|
<link rel="stylesheet" crossorigin href="/assets/index-BG0yZd9Y.css">
|
|
9
9
|
</head>
|
|
10
10
|
<body>
|
package/package.json
CHANGED
package/server/index.js
CHANGED
|
@@ -19,6 +19,7 @@ const { validateProjectPath } = require('./lib/validate-path')
|
|
|
19
19
|
const folderRoutes = require('./routes/folder')
|
|
20
20
|
const { createDocEventsRouter } = require('./routes/doc-events')
|
|
21
21
|
const { createPreviewProxy } = require('./routes/preview-proxy')
|
|
22
|
+
const brainRoutes = require('./routes/brain')
|
|
22
23
|
|
|
23
24
|
function startServer(port) {
|
|
24
25
|
return new Promise((resolve) => {
|
|
@@ -112,6 +113,7 @@ function startServer(port) {
|
|
|
112
113
|
app.use('/api', createEventsRouter(sseManager, fileWatcher))
|
|
113
114
|
app.use('/api', folderRoutes)
|
|
114
115
|
app.use('/api', createDocEventsRouter(sseManager))
|
|
116
|
+
app.use('/api', brainRoutes)
|
|
115
117
|
|
|
116
118
|
// Preview proxy — captures browser console errors and feeds them to terminal
|
|
117
119
|
app.use('/api', createPreviewProxy(() => activePty))
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
const fs = require('fs')
|
|
2
|
+
const path = require('path')
|
|
3
|
+
const os = require('os')
|
|
4
|
+
|
|
5
|
+
let DEFAULT_BRAIN_DIR = path.join(os.homedir(), 'Claude', 'brain')
|
|
6
|
+
|
|
7
|
+
const CATEGORY_TITLES = {
|
|
8
|
+
business: 'Business Context',
|
|
9
|
+
industry: 'Industry Context',
|
|
10
|
+
voice: 'Voice Context',
|
|
11
|
+
competitors: 'Competitors Context',
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
const EXPECTED_FIELDS = {
|
|
15
|
+
business: ['Name', 'Type', 'Type Label', 'Description', 'Location', 'Website', 'Email List Detected', 'Social Profiles', 'Reviews', 'Dream Outcome', 'Pricing', 'Value Stack', 'Bonuses', 'Guarantee', 'Urgency', 'Revenue Baseline', 'Team Size'],
|
|
16
|
+
industry: ['Sector', 'Target Audience', 'Avg Customer Value', 'Seasonal Patterns', 'Market Trends', 'Common Objections', 'Buying Triggers', 'ICP Segment 1', 'ICP Segment 2', 'ICP Segment 3'],
|
|
17
|
+
voice: ['Template', 'Tone', 'Words To Avoid', 'Preferred Length', 'Social Tone', 'Email Tone', 'Visual Style', 'Energy Level'],
|
|
18
|
+
competitors: ['Identified', 'Their Pricing', 'Their Offers', 'Their Strengths', 'Their Weaknesses', 'Differentiator', 'AI Visibility Score', 'Competitor Ad Angles', 'Longest Running Ads'],
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function parseMdFields(content) {
|
|
22
|
+
const fields = {}
|
|
23
|
+
const lines = content.split('\n')
|
|
24
|
+
for (const line of lines) {
|
|
25
|
+
const match = line.match(/^- \*\*(.+?):\*\*\s*(.+)$/)
|
|
26
|
+
if (match) {
|
|
27
|
+
fields[match[1]] = match[2].trim()
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
return fields
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function fieldsToMd(category, fields) {
|
|
34
|
+
const title = CATEGORY_TITLES[category] || category
|
|
35
|
+
let md = `# ${title}\n\n`
|
|
36
|
+
for (const [key, value] of Object.entries(fields)) {
|
|
37
|
+
if (value && value.trim()) {
|
|
38
|
+
md += `- **${key}:** ${value.trim()}\n`
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return md
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function saveBrainCategory(category, fields, brainDir = DEFAULT_BRAIN_DIR) {
|
|
45
|
+
fs.mkdirSync(brainDir, { recursive: true })
|
|
46
|
+
const filePath = path.join(brainDir, `${category}.md`)
|
|
47
|
+
|
|
48
|
+
let existing = {}
|
|
49
|
+
let rawSection = ''
|
|
50
|
+
if (fs.existsSync(filePath)) {
|
|
51
|
+
const content = fs.readFileSync(filePath, 'utf8')
|
|
52
|
+
existing = parseMdFields(content)
|
|
53
|
+
const rawMatch = content.match(/\n## Raw Context\n([\s\S]*)$/)
|
|
54
|
+
if (rawMatch) rawSection = rawMatch[0]
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const merged = { ...existing }
|
|
58
|
+
for (const [key, value] of Object.entries(fields)) {
|
|
59
|
+
if (value && value.trim()) {
|
|
60
|
+
merged[key] = value.trim()
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
fs.writeFileSync(filePath, fieldsToMd(category, merged) + rawSection)
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function appendRawContext(category, text, brainDir = DEFAULT_BRAIN_DIR) {
|
|
68
|
+
fs.mkdirSync(brainDir, { recursive: true })
|
|
69
|
+
const filePath = path.join(brainDir, `${category}.md`)
|
|
70
|
+
|
|
71
|
+
let content = ''
|
|
72
|
+
if (fs.existsSync(filePath)) {
|
|
73
|
+
content = fs.readFileSync(filePath, 'utf8')
|
|
74
|
+
} else {
|
|
75
|
+
const title = CATEGORY_TITLES[category] || category
|
|
76
|
+
content = `# ${title}\n\n`
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if (!content.includes('## Raw Context')) {
|
|
80
|
+
content += '\n## Raw Context\n\n'
|
|
81
|
+
}
|
|
82
|
+
content += text.trim() + '\n\n'
|
|
83
|
+
|
|
84
|
+
fs.writeFileSync(filePath, content)
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function countFields(content, expectedFields) {
|
|
88
|
+
let filled = 0
|
|
89
|
+
for (const field of expectedFields) {
|
|
90
|
+
const regex = new RegExp(`^- \\*\\*${field.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}:\\*\\*\\s*(.+)$`, 'm')
|
|
91
|
+
const match = content.match(regex)
|
|
92
|
+
if (match && match[1].trim().length > 0) filled++
|
|
93
|
+
}
|
|
94
|
+
return filled
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function getBrainStatus(brainDir = DEFAULT_BRAIN_DIR) {
|
|
98
|
+
if (!fs.existsSync(brainDir)) {
|
|
99
|
+
return {
|
|
100
|
+
exists: false,
|
|
101
|
+
businessName: null,
|
|
102
|
+
completeness: 0,
|
|
103
|
+
categories: {
|
|
104
|
+
business: { filled: 0, total: 17, percent: 0 },
|
|
105
|
+
industry: { filled: 0, total: 10, percent: 0 },
|
|
106
|
+
voice: { filled: 0, total: 8, percent: 0 },
|
|
107
|
+
competitors: { filled: 0, total: 9, percent: 0 },
|
|
108
|
+
},
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
const categories = {}
|
|
113
|
+
let totalFilled = 0
|
|
114
|
+
let totalFields = 0
|
|
115
|
+
let businessName = null
|
|
116
|
+
|
|
117
|
+
for (const [cat, fields] of Object.entries(EXPECTED_FIELDS)) {
|
|
118
|
+
const filePath = path.join(brainDir, `${cat}.md`)
|
|
119
|
+
let filled = 0
|
|
120
|
+
if (fs.existsSync(filePath)) {
|
|
121
|
+
const content = fs.readFileSync(filePath, 'utf8')
|
|
122
|
+
filled = countFields(content, fields)
|
|
123
|
+
if (cat === 'business') {
|
|
124
|
+
const nameMatch = content.match(/^- \*\*Name:\*\*\s*(.+)$/m)
|
|
125
|
+
if (nameMatch) businessName = nameMatch[1].trim()
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
categories[cat] = { filled, total: fields.length, percent: Math.round((filled / fields.length) * 100) }
|
|
129
|
+
totalFilled += filled
|
|
130
|
+
totalFields += fields.length
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
return {
|
|
134
|
+
exists: true,
|
|
135
|
+
businessName,
|
|
136
|
+
completeness: Math.round((totalFilled / totalFields) * 100),
|
|
137
|
+
categories,
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
function ensureClaudeMdReference(brainDir = DEFAULT_BRAIN_DIR) {
|
|
142
|
+
const claudeMdPath = path.join(os.homedir(), '.claude', 'CLAUDE.md')
|
|
143
|
+
const claudeDir = path.join(os.homedir(), '.claude')
|
|
144
|
+
const lockPath = claudeMdPath + '.brain-lock'
|
|
145
|
+
|
|
146
|
+
fs.mkdirSync(claudeDir, { recursive: true })
|
|
147
|
+
|
|
148
|
+
try {
|
|
149
|
+
fs.writeFileSync(lockPath, String(process.pid), { flag: 'wx' })
|
|
150
|
+
} catch {
|
|
151
|
+
return false
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
try {
|
|
155
|
+
let content = ''
|
|
156
|
+
if (fs.existsSync(claudeMdPath)) {
|
|
157
|
+
content = fs.readFileSync(claudeMdPath, 'utf8')
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
if (content.includes('## Business Context')) return false
|
|
161
|
+
|
|
162
|
+
const section = `\n## Business Context\nFor business-specific context about this user, read:\n- ${brainDir}/business.md — identity, offer, pricing, operations\n- ${brainDir}/industry.md — audience, market, ICP segments\n- ${brainDir}/voice.md — communication style, tone preferences\n- ${brainDir}/competitors.md — competitive landscape\nReference documents: ${brainDir}/documents/\n`
|
|
163
|
+
|
|
164
|
+
fs.appendFileSync(claudeMdPath, section)
|
|
165
|
+
return true
|
|
166
|
+
} finally {
|
|
167
|
+
try { fs.unlinkSync(lockPath) } catch {}
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
module.exports = {
|
|
172
|
+
saveBrainCategory,
|
|
173
|
+
appendRawContext,
|
|
174
|
+
getBrainStatus,
|
|
175
|
+
ensureClaudeMdReference,
|
|
176
|
+
parseMdFields,
|
|
177
|
+
EXPECTED_FIELDS,
|
|
178
|
+
get DEFAULT_BRAIN_DIR() { return DEFAULT_BRAIN_DIR },
|
|
179
|
+
set DEFAULT_BRAIN_DIR(v) { DEFAULT_BRAIN_DIR = v },
|
|
180
|
+
}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
function parseImport(text) {
|
|
2
|
+
const sections = { business: '', industry: '', voice: '', competitors: '' }
|
|
3
|
+
let current = null
|
|
4
|
+
|
|
5
|
+
let matched = false
|
|
6
|
+
for (const line of text.split('\n')) {
|
|
7
|
+
const lower = line.toLowerCase().trim()
|
|
8
|
+
if (/^#{1,3}\s*business/i.test(lower) || lower === '## business') { current = 'business'; matched = true; continue }
|
|
9
|
+
if (/^#{1,3}\s*industry/i.test(lower) || lower === '## industry') { current = 'industry'; matched = true; continue }
|
|
10
|
+
if (/^#{1,3}\s*voice/i.test(lower) || lower === '## voice') { current = 'voice'; matched = true; continue }
|
|
11
|
+
if (/^#{1,3}\s*competitor/i.test(lower) || lower === '## competitors') { current = 'competitors'; matched = true; continue }
|
|
12
|
+
if (current && line.trim()) sections[current] += line.trim() + '\n'
|
|
13
|
+
}
|
|
14
|
+
if (matched) return trimSections(sections)
|
|
15
|
+
|
|
16
|
+
current = null
|
|
17
|
+
for (const line of text.split('\n')) {
|
|
18
|
+
const lower = line.toLowerCase().trim()
|
|
19
|
+
if (/^\*{2}business\*{2}/i.test(lower)) { current = 'business'; matched = true; continue }
|
|
20
|
+
if (/^\*{2}industry\*{2}/i.test(lower)) { current = 'industry'; matched = true; continue }
|
|
21
|
+
if (/^\*{2}voice\*{2}/i.test(lower)) { current = 'voice'; matched = true; continue }
|
|
22
|
+
if (/^\*{2}competitor/i.test(lower)) { current = 'competitors'; matched = true; continue }
|
|
23
|
+
if (current && line.trim()) sections[current] += line.trim() + '\n'
|
|
24
|
+
}
|
|
25
|
+
if (matched) return trimSections(sections)
|
|
26
|
+
|
|
27
|
+
current = null
|
|
28
|
+
for (const line of text.split('\n')) {
|
|
29
|
+
const lower = line.toLowerCase().trim()
|
|
30
|
+
if (/^business\s*:/i.test(lower)) { current = 'business'; matched = true; continue }
|
|
31
|
+
if (/^industry\s*:/i.test(lower)) { current = 'industry'; matched = true; continue }
|
|
32
|
+
if (/^voice\s*:/i.test(lower)) { current = 'voice'; matched = true; continue }
|
|
33
|
+
if (/^competitor/i.test(lower) && lower.includes(':')) { current = 'competitors'; matched = true; continue }
|
|
34
|
+
if (current && line.trim()) sections[current] += line.trim() + '\n'
|
|
35
|
+
}
|
|
36
|
+
if (matched) return trimSections(sections)
|
|
37
|
+
|
|
38
|
+
sections.business = text.trim()
|
|
39
|
+
return sections
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function trimSections(sections) {
|
|
43
|
+
return {
|
|
44
|
+
business: sections.business.trim(),
|
|
45
|
+
industry: sections.industry.trim(),
|
|
46
|
+
voice: sections.voice.trim(),
|
|
47
|
+
competitors: sections.competitors.trim(),
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function guessCategoryFromFilename(filename) {
|
|
52
|
+
const lower = filename.toLowerCase()
|
|
53
|
+
if (/brand|voice|tone|style|messaging/i.test(lower)) return 'voice'
|
|
54
|
+
if (/competitor|competition|rival|versus|vs/i.test(lower)) return 'competitors'
|
|
55
|
+
if (/market|audience|customer|icp|segment|industry/i.test(lower)) return 'industry'
|
|
56
|
+
return 'business'
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
module.exports = { parseImport, guessCategoryFromFilename }
|
|
@@ -88,6 +88,23 @@ const RECOMMENDED_MCPS = [
|
|
|
88
88
|
usefulFor: ['draft', 'extract', 'transform'],
|
|
89
89
|
category: 'automation',
|
|
90
90
|
},
|
|
91
|
+
{
|
|
92
|
+
id: 'composio',
|
|
93
|
+
name: 'Composio',
|
|
94
|
+
description: '250+ app integrations — CRM, email, social, databases',
|
|
95
|
+
installHint: 'claude mcp add composio -- npx -y composio-core mcp',
|
|
96
|
+
usefulFor: ['draft', 'extract', 'analyse', 'transform', 'summarize'],
|
|
97
|
+
category: 'automation',
|
|
98
|
+
},
|
|
99
|
+
// Social Media
|
|
100
|
+
{
|
|
101
|
+
id: 'postiz',
|
|
102
|
+
name: 'Postiz',
|
|
103
|
+
description: 'Schedule and manage social media posts across platforms',
|
|
104
|
+
installHint: 'claude mcp add postiz -- npx -y @postiz/mcp-server',
|
|
105
|
+
usefulFor: ['draft', 'transform'],
|
|
106
|
+
category: 'social',
|
|
107
|
+
},
|
|
91
108
|
// Meetings & Notes
|
|
92
109
|
{
|
|
93
110
|
id: 'fireflies',
|
|
@@ -155,6 +172,7 @@ const CATEGORY_LABELS = {
|
|
|
155
172
|
design: 'Design & Build',
|
|
156
173
|
workspace: 'Workspace',
|
|
157
174
|
automation: 'Automation',
|
|
175
|
+
social: 'Social Media',
|
|
158
176
|
meetings: 'Meetings',
|
|
159
177
|
projects: 'Project Management',
|
|
160
178
|
data: 'Data',
|
|
@@ -0,0 +1,367 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Website scraper for Claude Portal Business Brain.
|
|
3
|
+
*
|
|
4
|
+
* Fetches a website, extracts business signals, and classifies the business type.
|
|
5
|
+
* Adapted from AI Team Hub scraper — standalone, no LLM dependency for MVP.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
const SCRAPE_TIMEOUT = 15000
|
|
9
|
+
const MAX_BODY_CHARS = 5000
|
|
10
|
+
|
|
11
|
+
// ─── Helpers ───────────────────────────────────────────────────────
|
|
12
|
+
|
|
13
|
+
function decodeEntities(str) {
|
|
14
|
+
return str
|
|
15
|
+
.replace(/&/gi, '&')
|
|
16
|
+
.replace(/</gi, '<')
|
|
17
|
+
.replace(/>/gi, '>')
|
|
18
|
+
.replace(/"/gi, '"')
|
|
19
|
+
.replace(/'/gi, "'")
|
|
20
|
+
.replace(/'/gi, "'")
|
|
21
|
+
.replace(/&#(\d+);/g, (_, n) => String.fromCharCode(n))
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function normalizeUrl(raw) {
|
|
25
|
+
let url = raw.trim()
|
|
26
|
+
if (!/^https?:\/\//i.test(url)) url = 'https://' + url
|
|
27
|
+
return url
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function stripTags(html) {
|
|
31
|
+
return html
|
|
32
|
+
.replace(/<script[\s\S]*?<\/script>/gi, ' ')
|
|
33
|
+
.replace(/<style[\s\S]*?<\/style>/gi, ' ')
|
|
34
|
+
.replace(/<nav[\s\S]*?<\/nav>/gi, ' ')
|
|
35
|
+
.replace(/<footer[\s\S]*?<\/footer>/gi, ' ')
|
|
36
|
+
.replace(/<[^>]+>/g, ' ')
|
|
37
|
+
.replace(/ /gi, ' ')
|
|
38
|
+
.replace(/&/gi, '&')
|
|
39
|
+
.replace(/</gi, '<')
|
|
40
|
+
.replace(/>/gi, '>')
|
|
41
|
+
.replace(/&#\d+;/g, ' ')
|
|
42
|
+
.replace(/\s+/g, ' ')
|
|
43
|
+
.trim()
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// ─── Extraction ────────────────────────────────────────────────────
|
|
47
|
+
|
|
48
|
+
function extractMeta(html) {
|
|
49
|
+
const get = (regex) => {
|
|
50
|
+
const m = html.match(regex)
|
|
51
|
+
return m ? m[1].trim() : ''
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const title =
|
|
55
|
+
get(/<title[^>]*>([^<]+)<\/title>/i) ||
|
|
56
|
+
get(/<meta[^>]+property="og:title"[^>]+content="([^"]+)"/i) ||
|
|
57
|
+
get(/<meta[^>]+content="([^"]+)"[^>]+property="og:title"/i)
|
|
58
|
+
|
|
59
|
+
const description =
|
|
60
|
+
get(/<meta[^>]+name="description"[^>]+content="([^"]+)"/i) ||
|
|
61
|
+
get(/<meta[^>]+content="([^"]+)"[^>]+name="description"/i) ||
|
|
62
|
+
get(/<meta[^>]+property="og:description"[^>]+content="([^"]+)"/i) ||
|
|
63
|
+
get(/<meta[^>]+content="([^"]+)"[^>]+property="og:description"/i)
|
|
64
|
+
|
|
65
|
+
const ogImage =
|
|
66
|
+
get(/<meta[^>]+property="og:image"[^>]+content="([^"]+)"/i) ||
|
|
67
|
+
get(/<meta[^>]+content="([^"]+)"[^>]+property="og:image"/i)
|
|
68
|
+
|
|
69
|
+
return { title: decodeEntities(title), description: decodeEntities(description), ogImage }
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function extractBodyText(html) {
|
|
73
|
+
const mainMatch = html.match(/<main[\s\S]*?<\/main>/i) || html.match(/<article[\s\S]*?<\/article>/i)
|
|
74
|
+
const bodyMatch = html.match(/<body[\s\S]*?<\/body>/i)
|
|
75
|
+
const source = mainMatch ? mainMatch[0] : bodyMatch ? bodyMatch[0] : html
|
|
76
|
+
return stripTags(source).slice(0, MAX_BODY_CHARS)
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function extractLinks(html) {
|
|
80
|
+
const matches = [...html.matchAll(/href="([^"]+)"/gi)]
|
|
81
|
+
return matches.map((m) => m[1])
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// ─── Signal Detection ──────────────────────────────────────────────
|
|
85
|
+
|
|
86
|
+
function detectSocialLinks(links, html) {
|
|
87
|
+
const profiles = {}
|
|
88
|
+
const patterns = {
|
|
89
|
+
instagram: /instagram\.com\/([a-zA-Z0-9_.]+)/i,
|
|
90
|
+
linkedin: /linkedin\.com\/(company|in)\/([a-zA-Z0-9_-]+)/i,
|
|
91
|
+
facebook: /facebook\.com\/([a-zA-Z0-9_.]+)/i,
|
|
92
|
+
twitter: /(twitter\.com|x\.com)\/([a-zA-Z0-9_]+)/i,
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const allText = links.join(' ') + ' ' + html
|
|
96
|
+
|
|
97
|
+
for (const [platform, regex] of Object.entries(patterns)) {
|
|
98
|
+
const match = allText.match(regex)
|
|
99
|
+
if (match) {
|
|
100
|
+
const fullUrlMatch = allText.match(new RegExp(`https?://[^"'\\s]*${regex.source}`, 'i'))
|
|
101
|
+
profiles[platform] = { url: fullUrlMatch ? fullUrlMatch[0] : match[0] }
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
return profiles
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function detectEmailSignup(html) {
|
|
109
|
+
const lower = html.toLowerCase()
|
|
110
|
+
const hasEmailInput = /input[^>]+type\s*=\s*["']email["']/i.test(html)
|
|
111
|
+
const hasSignupText = /subscribe|newsletter|join.*list|get updates|sign\s*up.*email|email.*sign\s*up/i.test(lower)
|
|
112
|
+
const hasMailchimp = /mailchimp|convertkit|activecampaign|klaviyo|mailerlite/i.test(lower)
|
|
113
|
+
return hasEmailInput || hasSignupText || hasMailchimp
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function detectEcommerce(html, links) {
|
|
117
|
+
const lower = html.toLowerCase()
|
|
118
|
+
const signals = [
|
|
119
|
+
/shopify/i.test(html), /woocommerce|wc-/i.test(html), /bigcommerce/i.test(html),
|
|
120
|
+
links.some((l) => /\/cart|\/products|\/collections/i.test(l)),
|
|
121
|
+
/add.to.cart/i.test(lower), /shipping|free delivery/i.test(lower),
|
|
122
|
+
/sku|product-price|product_price/i.test(html),
|
|
123
|
+
]
|
|
124
|
+
return signals.filter(Boolean).length
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function detectBooking(html, links) {
|
|
128
|
+
const lower = html.toLowerCase()
|
|
129
|
+
const signals = [
|
|
130
|
+
links.some((l) => /calendly\.com|acuityscheduling\.com|cal\.com|tidycal\.com/i.test(l)),
|
|
131
|
+
/book\s*(a|an|your|now|today|free|a free)/i.test(lower),
|
|
132
|
+
/schedule\s*(a|an|your|now|today|free|a free)/i.test(lower),
|
|
133
|
+
/booking|appointment|consultation|session/i.test(lower),
|
|
134
|
+
]
|
|
135
|
+
return signals.filter(Boolean).length
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
function detectSaaS(html, bodyText) {
|
|
139
|
+
const lower = bodyText.toLowerCase()
|
|
140
|
+
const signals = [
|
|
141
|
+
/free\s*trial/i.test(lower), /sign\s*up/i.test(lower) && /pricing/i.test(lower),
|
|
142
|
+
/api|developer|documentation|integrat/i.test(lower), /pricing\s*(plan|tier)/i.test(lower),
|
|
143
|
+
/saas|software|platform|dashboard/i.test(lower), /app\.(io|com|co)|\.app\b/i.test(html),
|
|
144
|
+
]
|
|
145
|
+
return signals.filter(Boolean).length
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
function detectAgency(bodyText) {
|
|
149
|
+
const lower = bodyText.toLowerCase()
|
|
150
|
+
const signals = [
|
|
151
|
+
/our\s*team|meet\s*the\s*team/i.test(lower), /services|our\s*work|portfolio/i.test(lower),
|
|
152
|
+
/case\s*stud(y|ies)/i.test(lower), /clients|trusted\s*by|worked\s*with/i.test(lower),
|
|
153
|
+
/agency|studio|firm|consultancy/i.test(lower), /project|retainer|proposal/i.test(lower),
|
|
154
|
+
]
|
|
155
|
+
return signals.filter(Boolean).length
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
function detectCoaching(bodyText) {
|
|
159
|
+
const lower = bodyText.toLowerCase()
|
|
160
|
+
const signals = [
|
|
161
|
+
/coach(ing)?|mentor(ing)?/i.test(lower), /consult(ing|ant)?/i.test(lower),
|
|
162
|
+
/1[\s-]on[\s-]1|one[\s-]on[\s-]one/i.test(lower), /session|program|transformation/i.test(lower),
|
|
163
|
+
/certified|accredited|practitioner/i.test(lower), /work\s*with\s*me|hire\s*me/i.test(lower),
|
|
164
|
+
]
|
|
165
|
+
return signals.filter(Boolean).length
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
function detectDigitalProduct(bodyText, links) {
|
|
169
|
+
const lower = bodyText.toLowerCase()
|
|
170
|
+
const signals = [
|
|
171
|
+
/course|module|lesson|curriculum|enroll/i.test(lower),
|
|
172
|
+
/template|download|ebook|e-book|guide|playbook/i.test(lower),
|
|
173
|
+
/gumroad|teachable|podia|thinkific|kajabi|udemy/i.test(lower),
|
|
174
|
+
links.some((l) => /gumroad\.com|teachable\.com|podia\.com|thinkific\.com/i.test(l)),
|
|
175
|
+
/digital\s*product|online\s*course|masterclass/i.test(lower),
|
|
176
|
+
/instant\s*access|lifetime\s*access/i.test(lower),
|
|
177
|
+
]
|
|
178
|
+
return signals.filter(Boolean).length
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
function detectMembership(bodyText) {
|
|
182
|
+
const lower = bodyText.toLowerCase()
|
|
183
|
+
const signals = [
|
|
184
|
+
/membership|member\s*area|members[\s-]only/i.test(lower),
|
|
185
|
+
/\/mo\b|per\s*month|monthly/i.test(lower), /subscription|recurring|renew/i.test(lower),
|
|
186
|
+
/community|private\s*group|inner\s*circle/i.test(lower),
|
|
187
|
+
/cancel\s*anytime|no\s*commitment/i.test(lower),
|
|
188
|
+
]
|
|
189
|
+
return signals.filter(Boolean).length
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
function detectLocalService(bodyText, html, links) {
|
|
193
|
+
const lower = bodyText.toLowerCase()
|
|
194
|
+
const signals = [
|
|
195
|
+
/plumb(er|ing)|electrician|hvac|roofing|landscap/i.test(lower),
|
|
196
|
+
/physiotherap|chiropract|dentist|optometrist|massage|salon|barber/i.test(lower),
|
|
197
|
+
/personal\s*train(er|ing)|gym|fitness\s*studio|yoga\s*studio|pilates/i.test(lower),
|
|
198
|
+
/free\s*(quote|estimate|inspection|assessment)/i.test(lower),
|
|
199
|
+
/service\s*area|we\s*come\s*to\s*you|mobile\s*service/i.test(lower),
|
|
200
|
+
/call\s*(us|now|today)|emergency|24[\s/]?7/i.test(lower),
|
|
201
|
+
/local|near\s*(you|me)|in\s*your\s*area/i.test(lower),
|
|
202
|
+
links.some((l) => /google\.com\/maps|maps\.google/i.test(l)),
|
|
203
|
+
]
|
|
204
|
+
return signals.filter(Boolean).length
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
function detectPricing(bodyText) {
|
|
208
|
+
const priceMatches = bodyText.match(/\$\d[\d,]*(?:\.\d{2})?(?:\s*\/\s*mo(?:nth)?)?/gi)
|
|
209
|
+
const euroMatches = bodyText.match(/€\d[\d,]*(?:\.\d{2})?/gi)
|
|
210
|
+
const poundMatches = bodyText.match(/£\d[\d,]*(?:\.\d{2})?/gi)
|
|
211
|
+
const allPrices = [...(priceMatches || []), ...(euroMatches || []), ...(poundMatches || [])]
|
|
212
|
+
if (allPrices.length === 0) return null
|
|
213
|
+
const unique = [...new Set(allPrices)].slice(0, 4)
|
|
214
|
+
return unique.join(', ')
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
function detectLocation(bodyText, html) {
|
|
218
|
+
const meta = html.match(/<meta[^>]+name="geo\.(region|placename)"[^>]+content="([^"]+)"/i)
|
|
219
|
+
if (meta) return meta[2]
|
|
220
|
+
|
|
221
|
+
const addressMatch = bodyText.match(
|
|
222
|
+
/(?:based\s+in|located\s+in|serving|headquarters?)\s+([A-Z][a-zA-Z\s,]+(?:AU|USA|UK|CA|NZ)?\b)/i
|
|
223
|
+
)
|
|
224
|
+
if (addressMatch) return addressMatch[1].trim()
|
|
225
|
+
|
|
226
|
+
const auCities =
|
|
227
|
+
/\b(Sydney|Melbourne|Brisbane|Perth|Adelaide|Gold Coast|Canberra|Hobart|Darwin|Newcastle|Sunshine Coast)\b.*?\b(NSW|VIC|QLD|WA|SA|ACT|TAS|NT|Australia|AU)\b/i
|
|
228
|
+
const auMatch = bodyText.match(auCities)
|
|
229
|
+
if (auMatch) return `${auMatch[1]}, ${auMatch[2]}`
|
|
230
|
+
|
|
231
|
+
const cityMatch = bodyText.match(
|
|
232
|
+
/\b(New York|Los Angeles|London|Toronto|Auckland|Dubai|Singapore|Hong Kong|San Francisco|Chicago|Austin|Miami|Denver|Seattle|Portland|Nashville|Atlanta)\b/i
|
|
233
|
+
)
|
|
234
|
+
if (cityMatch) return cityMatch[1]
|
|
235
|
+
|
|
236
|
+
return null
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
// ─── Business Type Classification ──────────────────────────────────
|
|
240
|
+
|
|
241
|
+
const BUSINESS_TYPE_LABELS = {
|
|
242
|
+
coaching_consulting: 'Coaching / Consulting',
|
|
243
|
+
digital_product: 'Digital Products',
|
|
244
|
+
agency: 'Agency / Services',
|
|
245
|
+
physical_product: 'Physical Product / E-commerce',
|
|
246
|
+
saas_software: 'SaaS / Software',
|
|
247
|
+
membership_subscription: 'Membership / Subscription',
|
|
248
|
+
service_local: 'Local Service',
|
|
249
|
+
hybrid: 'Hybrid / Multiple',
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
function classifyBusiness(scores) {
|
|
253
|
+
const { ecommerce, saas, coaching, agency, digitalProduct, membership, booking, localService } = scores
|
|
254
|
+
|
|
255
|
+
const candidates = [
|
|
256
|
+
{ type: 'physical_product', score: ecommerce * 2 },
|
|
257
|
+
{ type: 'saas_software', score: saas * 1.8 },
|
|
258
|
+
{ type: 'coaching_consulting', score: (coaching + booking * 0.5) * 1.5 },
|
|
259
|
+
{ type: 'agency', score: agency * 1.5 },
|
|
260
|
+
{ type: 'digital_product', score: digitalProduct * 1.5 },
|
|
261
|
+
{ type: 'membership_subscription', score: membership * 1.5 },
|
|
262
|
+
{ type: 'service_local', score: (localService + booking * 0.8) * 1.5 },
|
|
263
|
+
]
|
|
264
|
+
|
|
265
|
+
candidates.sort((a, b) => b.score - a.score)
|
|
266
|
+
if (candidates[0].score < 2) return 'hybrid'
|
|
267
|
+
if (candidates[0].score > 0 && candidates[1].score > 0 && candidates[1].score / candidates[0].score > 0.8) return 'hybrid'
|
|
268
|
+
return candidates[0].type
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
function deriveBusinessName(meta, url) {
|
|
272
|
+
if (meta.title) {
|
|
273
|
+
const cleaned = meta.title.split(/[|\-–—]/)[0].replace(/home/i, '').trim()
|
|
274
|
+
if (cleaned.length > 1 && cleaned.length < 80) return cleaned
|
|
275
|
+
}
|
|
276
|
+
try {
|
|
277
|
+
const hostname = new URL(url).hostname.replace(/^www\./, '')
|
|
278
|
+
const domain = hostname.split('.')[0]
|
|
279
|
+
return domain.charAt(0).toUpperCase() + domain.slice(1)
|
|
280
|
+
} catch {
|
|
281
|
+
return 'Your Business'
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
// ─── Main Scraper ──────────────────────────────────────────────────
|
|
286
|
+
|
|
287
|
+
/**
|
|
288
|
+
* Scrape a website and return a business profile.
|
|
289
|
+
* Uses server-side fetch (no CORS proxy needed).
|
|
290
|
+
*
|
|
291
|
+
* @param {string} rawUrl - The website URL to scrape.
|
|
292
|
+
* @param {(step: string) => void} [onStep] - Optional progress callback.
|
|
293
|
+
* @returns {Promise<object>} businessProfile
|
|
294
|
+
*/
|
|
295
|
+
async function scrapeWebsite(rawUrl, onStep) {
|
|
296
|
+
const url = normalizeUrl(rawUrl)
|
|
297
|
+
|
|
298
|
+
onStep?.('Fetching website...')
|
|
299
|
+
const controller = new AbortController()
|
|
300
|
+
const timeout = setTimeout(() => controller.abort(), SCRAPE_TIMEOUT)
|
|
301
|
+
|
|
302
|
+
try {
|
|
303
|
+
const response = await fetch(url, {
|
|
304
|
+
signal: controller.signal,
|
|
305
|
+
headers: {
|
|
306
|
+
'User-Agent': 'Mozilla/5.0 (compatible; ClaudePortalBot/1.0)',
|
|
307
|
+
'Accept': 'text/html',
|
|
308
|
+
},
|
|
309
|
+
})
|
|
310
|
+
|
|
311
|
+
clearTimeout(timeout)
|
|
312
|
+
|
|
313
|
+
if (!response.ok) {
|
|
314
|
+
throw new Error(`Failed to fetch website (${response.status})`)
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
const html = await response.text()
|
|
318
|
+
if (!html || html.length < 100) {
|
|
319
|
+
throw new Error('Website returned empty or very short content')
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
onStep?.('Reading page content...')
|
|
323
|
+
const meta = extractMeta(html)
|
|
324
|
+
const bodyText = extractBodyText(html)
|
|
325
|
+
const links = extractLinks(html)
|
|
326
|
+
|
|
327
|
+
onStep?.('Detecting business signals...')
|
|
328
|
+
const socialProfiles = detectSocialLinks(links, html)
|
|
329
|
+
const emailListDetected = detectEmailSignup(html)
|
|
330
|
+
const priceRange = detectPricing(bodyText)
|
|
331
|
+
const location = detectLocation(bodyText, html)
|
|
332
|
+
|
|
333
|
+
const scores = {
|
|
334
|
+
ecommerce: detectEcommerce(html, links),
|
|
335
|
+
booking: detectBooking(html, links),
|
|
336
|
+
saas: detectSaaS(html, bodyText),
|
|
337
|
+
agency: detectAgency(bodyText),
|
|
338
|
+
coaching: detectCoaching(bodyText),
|
|
339
|
+
digitalProduct: detectDigitalProduct(bodyText, links),
|
|
340
|
+
membership: detectMembership(bodyText),
|
|
341
|
+
localService: detectLocalService(bodyText, html, links),
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
onStep?.('Classifying business type...')
|
|
345
|
+
const businessType = classifyBusiness(scores)
|
|
346
|
+
const businessName = deriveBusinessName(meta, url)
|
|
347
|
+
const description = meta.description || bodyText.slice(0, 120).trim() + '...'
|
|
348
|
+
|
|
349
|
+
return {
|
|
350
|
+
businessName,
|
|
351
|
+
businessType,
|
|
352
|
+
businessTypeLabel: BUSINESS_TYPE_LABELS[businessType] || businessType,
|
|
353
|
+
description,
|
|
354
|
+
website: url,
|
|
355
|
+
priceRange,
|
|
356
|
+
location,
|
|
357
|
+
socialProfiles,
|
|
358
|
+
emailListDetected,
|
|
359
|
+
ogImage: meta.ogImage || null,
|
|
360
|
+
bodyTextPreview: bodyText.slice(0, 500),
|
|
361
|
+
}
|
|
362
|
+
} finally {
|
|
363
|
+
clearTimeout(timeout)
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
module.exports = { scrapeWebsite, BUSINESS_TYPE_LABELS }
|