create-hq 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/deps.d.ts +4 -0
- package/dist/deps.d.ts.map +1 -0
- package/dist/deps.js +65 -0
- package/dist/deps.js.map +1 -0
- package/dist/git.d.ts +3 -0
- package/dist/git.d.ts.map +1 -0
- package/dist/git.js +19 -0
- package/dist/git.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +23 -0
- package/dist/index.js.map +1 -0
- package/dist/scaffold.d.ts +8 -0
- package/dist/scaffold.d.ts.map +1 -0
- package/dist/scaffold.js +130 -0
- package/dist/scaffold.js.map +1 -0
- package/dist/ui.d.ts +7 -0
- package/dist/ui.d.ts.map +1 -0
- package/dist/ui.js +36 -0
- package/dist/ui.js.map +1 -0
- package/package.json +41 -0
- package/template/.claude/CLAUDE.md +202 -0
- package/template/.claude/commands/checkpoint.md +127 -0
- package/template/.claude/commands/cleanup.md +307 -0
- package/template/.claude/commands/execute-task.md +440 -0
- package/template/.claude/commands/exit-plan.md +41 -0
- package/template/.claude/commands/handoff.md +97 -0
- package/template/.claude/commands/learn.md +218 -0
- package/template/.claude/commands/metrics.md +118 -0
- package/template/.claude/commands/newworker.md +162 -0
- package/template/.claude/commands/nexttask.md +67 -0
- package/template/.claude/commands/prd.md +238 -0
- package/template/.claude/commands/reanchor.md +51 -0
- package/template/.claude/commands/remember.md +126 -0
- package/template/.claude/commands/run-project.md +348 -0
- package/template/.claude/commands/run.md +110 -0
- package/template/.claude/commands/search-reindex.md +62 -0
- package/template/.claude/commands/search.md +100 -0
- package/template/.claude/commands/setup.md +381 -0
- package/template/.claude/scripts/pure-ralph-loop.ps1 +312 -0
- package/template/.claude/scripts/pure-ralph-loop.sh +859 -0
- package/template/CHANGELOG.md +220 -0
- package/template/LICENSE +21 -0
- package/template/MIGRATION.md +259 -0
- package/template/README.md +368 -0
- package/template/data/journal/.gitkeep +0 -0
- package/template/docs/images/ascii-banner-options.md +122 -0
- package/template/docs/images/hq-banner.svg +105 -0
- package/template/knowledge/Ralph/01-overview.md +71 -0
- package/template/knowledge/Ralph/02-core-concepts.md +114 -0
- package/template/knowledge/Ralph/03-how-ralph-works.md +184 -0
- package/template/knowledge/Ralph/04-back-pressure.md +222 -0
- package/template/knowledge/Ralph/05-specifications.md +210 -0
- package/template/knowledge/Ralph/06-agents-md.md +222 -0
- package/template/knowledge/Ralph/07-implementation.md +316 -0
- package/template/knowledge/Ralph/08-economics.md +182 -0
- package/template/knowledge/Ralph/09-resources.md +145 -0
- package/template/knowledge/Ralph/10-claude-code-workflow.md +212 -0
- package/template/knowledge/Ralph/11-team-training-guide.md +383 -0
- package/template/knowledge/Ralph/README.md +40 -0
- package/template/knowledge/ai-security-framework/CONTRIBUTING.md +139 -0
- package/template/knowledge/ai-security-framework/GLOSSARY.md +176 -0
- package/template/knowledge/ai-security-framework/LICENSE +21 -0
- package/template/knowledge/ai-security-framework/QUICK-START.md +172 -0
- package/template/knowledge/ai-security-framework/README.md +232 -0
- package/template/knowledge/ai-security-framework/checklists/browser-security.md +301 -0
- package/template/knowledge/ai-security-framework/checklists/credential-isolation.md +322 -0
- package/template/knowledge/ai-security-framework/checklists/incident-response.md +288 -0
- package/template/knowledge/ai-security-framework/checklists/pre-flight.md +249 -0
- package/template/knowledge/ai-security-framework/checklists/weekly-audit.md +159 -0
- package/template/knowledge/ai-security-framework/configs/audit-logging.md +372 -0
- package/template/knowledge/ai-security-framework/configs/kill-switches.md +354 -0
- package/template/knowledge/ai-security-framework/docs/01-core-principles.md +256 -0
- package/template/knowledge/ai-security-framework/docs/02-threat-landscape.md +326 -0
- package/template/knowledge/ai-security-framework/docs/03-security-posture.md +250 -0
- package/template/knowledge/ai-security-framework/templates/agents-security.md +233 -0
- package/template/knowledge/design-styles/README.md +42 -0
- package/template/knowledge/design-styles/american-industrial.md +136 -0
- package/template/knowledge/design-styles/ethereal-abstract.md +133 -0
- package/template/knowledge/design-styles/liminal-portal.md +111 -0
- package/template/knowledge/design-styles/swipes/american-industrial/G-3m4YPW0AADdu2.jpeg +0 -0
- package/template/knowledge/design-styles/swipes/american-industrial/G-JJlt5WwAABK3K.png +0 -0
- package/template/knowledge/design-styles/swipes/american-industrial/G-JJmj5W0AEbJ-7.png +0 -0
- package/template/knowledge/design-styles/swipes/american-industrial/G59fgNuXkAAKLJQ (1).jpeg +0 -0
- package/template/knowledge/design-styles/swipes/american-industrial/G59fgNuXkAAKLJQ.jpeg +0 -0
- package/template/knowledge/design-styles/swipes/american-industrial/G7fVkn3WEAAM-ST.jpeg +0 -0
- package/template/knowledge/design-styles/swipes/american-industrial/G8ECO5JWEAIksyn.png +0 -0
- package/template/knowledge/design-styles/swipes/american-industrial/G9-3GQSWoAA8eqZ.png +0 -0
- package/template/knowledge/design-styles/swipes/american-industrial/G9xEOqrXkAEZRcs.png +0 -0
- package/template/knowledge/design-styles/swipes/american-industrial/G_MVeJrXQAA8sx4.jpeg +0 -0
- package/template/knowledge/design-styles/swipes/american-industrial/G_RSkmGXkAAgAVZ.png +0 -0
- package/template/knowledge/design-styles/swipes/american-industrial/README.md +31 -0
- package/template/knowledge/design-styles/swipes/american-industrial/qyqtg7Dq.png +0 -0
- package/template/knowledge/dev-team/README.md +35 -0
- package/template/knowledge/dev-team/patterns/README.md +34 -0
- package/template/knowledge/dev-team/patterns/frontend/react-best-practices.md +178 -0
- package/template/knowledge/dev-team/troubleshooting/README.md +31 -0
- package/template/knowledge/dev-team/workflows/README.md +49 -0
- package/template/knowledge/hq/checkpoint-schema.json +51 -0
- package/template/knowledge/hq/index-md-spec.md +74 -0
- package/template/knowledge/hq/thread-schema.md +153 -0
- package/template/knowledge/hq-core/checkpoint-schema.json +51 -0
- package/template/knowledge/hq-core/index-md-spec.md +74 -0
- package/template/knowledge/hq-core/thread-schema.md +153 -0
- package/template/knowledge/loom/README.md +51 -0
- package/template/knowledge/loom/architecture.md +125 -0
- package/template/knowledge/loom/code-style.md +169 -0
- package/template/knowledge/loom/llm-proxy.md +132 -0
- package/template/knowledge/loom/state-machine.md +131 -0
- package/template/knowledge/loom/thread-system.md +117 -0
- package/template/knowledge/loom/tools.md +94 -0
- package/template/knowledge/loom/weaver.md +96 -0
- package/template/knowledge/loom/web-frontend.md +131 -0
- package/template/knowledge/projects/README.md +72 -0
- package/template/knowledge/projects/templates/README.template.md +28 -0
- package/template/knowledge/workers/README.md +195 -0
- package/template/knowledge/workers/ralph-loop-pattern.md +157 -0
- package/template/knowledge/workers/skill-schema.md +182 -0
- package/template/knowledge/workers/state-machine.md +102 -0
- package/template/knowledge/workers/templates/base-worker.yaml +73 -0
- package/template/knowledge/workers/templates/code-worker.yaml +85 -0
- package/template/knowledge/workers/templates/skill.yaml +49 -0
- package/template/knowledge/workers/templates/social-worker.yaml +70 -0
- package/template/modules/examples/full-manifest.yaml +92 -0
- package/template/modules/examples/minimal.yaml +14 -0
- package/template/modules/modules.yaml +59 -0
- package/template/projects/.gitkeep +0 -0
- package/template/projects/incorporate-workers-into-pure-ralph/prd.json +88 -0
- package/template/projects/pure-ralph-branch-isolation/README.md +114 -0
- package/template/projects/pure-ralph-branch-isolation/prd.json +123 -0
- package/template/projects/purist-ralph-loop/README.md +148 -0
- package/template/projects/purist-ralph-loop/prd.json +135 -0
- package/template/projects/ralph-test/prd.json +50 -0
- package/template/prompts/pure-ralph-base.md +551 -0
- package/template/settings/.gitkeep +0 -0
- package/template/settings/pure-ralph.json +42 -0
- package/template/social-content/drafts/INDEX.md +21 -0
- package/template/social-content/drafts/linkedin/.gitkeep +1 -0
- package/template/social-content/drafts/x/.gitkeep +1 -0
- package/template/social-content/images/.gitkeep +1 -0
- package/template/starter-projects/code-worker/README.md +97 -0
- package/template/starter-projects/code-worker/prd.json +45 -0
- package/template/starter-projects/personal-assistant/README.md +42 -0
- package/template/starter-projects/personal-assistant/prd.json +43 -0
- package/template/starter-projects/social-media/README.md +60 -0
- package/template/starter-projects/social-media/prd.json +43 -0
- package/template/workers/content-brand/README.md +59 -0
- package/template/workers/content-brand/skills/messaging-alignment.md +91 -0
- package/template/workers/content-brand/skills/tone-check.md +76 -0
- package/template/workers/content-brand/skills/voice-analysis.md +68 -0
- package/template/workers/content-brand/worker.yaml +81 -0
- package/template/workers/content-legal/README.md +80 -0
- package/template/workers/content-legal/skills/claim-substantiation.md +150 -0
- package/template/workers/content-legal/skills/compliance-scan.md +123 -0
- package/template/workers/content-legal/skills/disclaimer-check.md +146 -0
- package/template/workers/content-legal/worker.yaml +118 -0
- package/template/workers/content-product/README.md +77 -0
- package/template/workers/content-product/skills/claim-verification.md +96 -0
- package/template/workers/content-product/skills/feature-accuracy.md +117 -0
- package/template/workers/content-product/skills/stats-check.md +128 -0
- package/template/workers/content-product/worker.yaml +97 -0
- package/template/workers/content-sales/README.md +70 -0
- package/template/workers/content-sales/skills/conversion-analysis.md +96 -0
- package/template/workers/content-sales/skills/cta-audit.md +107 -0
- package/template/workers/content-sales/skills/value-prop-check.md +114 -0
- package/template/workers/content-sales/worker.yaml +93 -0
- package/template/workers/content-shared/cli.ts +242 -0
- package/template/workers/content-shared/index.ts +234 -0
- package/template/workers/content-shared/lib/accuracy-analyzer.ts +661 -0
- package/template/workers/content-shared/lib/analyze.ts +370 -0
- package/template/workers/content-shared/lib/brand-analyzer.ts +526 -0
- package/template/workers/content-shared/lib/cms-integration.ts +446 -0
- package/template/workers/content-shared/lib/compliance-analyzer.ts +655 -0
- package/template/workers/content-shared/lib/conversion-analyzer.ts +555 -0
- package/template/workers/content-shared/lib/github-integration.ts +582 -0
- package/template/workers/content-shared/lib/output.ts +373 -0
- package/template/workers/content-shared/lib/parser.ts +771 -0
- package/template/workers/content-shared/lib/priority.ts +439 -0
- package/template/workers/content-shared/lib/recommendations.ts +512 -0
- package/template/workers/content-shared/lib/reporter.ts +749 -0
- package/template/workers/content-shared/lib/restructure.ts +664 -0
- package/template/workers/content-shared/lib/scorer.ts +140 -0
- package/template/workers/content-shared/lib/types.ts +227 -0
- package/template/workers/content-shared/lib/variants.ts +595 -0
- package/template/workers/content-shared/package.json +51 -0
- package/template/workers/content-shared/pnpm-lock.yaml +39 -0
- package/template/workers/content-shared/test/sample-page.json +115 -0
- package/template/workers/content-shared/tsconfig.json +20 -0
- package/template/workers/dev-team/README.md +166 -0
- package/template/workers/dev-team/_template.yaml +70 -0
- package/template/workers/dev-team/architect/package.json +27 -0
- package/template/workers/dev-team/architect/skills/api-design.md +89 -0
- package/template/workers/dev-team/architect/skills/refactor-plan.md +96 -0
- package/template/workers/dev-team/architect/skills/system-design.md +100 -0
- package/template/workers/dev-team/architect/src/index.ts +49 -0
- package/template/workers/dev-team/architect/src/mcp-server.ts +122 -0
- package/template/workers/dev-team/architect/src/skills/api-design.ts +316 -0
- package/template/workers/dev-team/architect/src/skills/refactor-plan.ts +264 -0
- package/template/workers/dev-team/architect/src/skills/system-design.ts +212 -0
- package/template/workers/dev-team/architect/tsconfig.json +19 -0
- package/template/workers/dev-team/architect/worker.yaml +128 -0
- package/template/workers/dev-team/backend-dev/package-lock.json +1252 -0
- package/template/workers/dev-team/backend-dev/package.json +27 -0
- package/template/workers/dev-team/backend-dev/skills/implement-endpoint.md +70 -0
- package/template/workers/dev-team/backend-dev/skills/implement-service.md +62 -0
- package/template/workers/dev-team/backend-dev/src/index.ts +51 -0
- package/template/workers/dev-team/backend-dev/src/mcp-server.ts +109 -0
- package/template/workers/dev-team/backend-dev/src/skills/implement-endpoint.ts +122 -0
- package/template/workers/dev-team/backend-dev/src/skills/implement-service.ts +126 -0
- package/template/workers/dev-team/backend-dev/tsconfig.json +19 -0
- package/template/workers/dev-team/backend-dev/worker.yaml +128 -0
- package/template/workers/dev-team/code-reviewer/package-lock.json +1080 -0
- package/template/workers/dev-team/code-reviewer/package.json +24 -0
- package/template/workers/dev-team/code-reviewer/skills/merge-to-production.md +61 -0
- package/template/workers/dev-team/code-reviewer/skills/merge-to-staging.md +54 -0
- package/template/workers/dev-team/code-reviewer/skills/request-changes.md +63 -0
- package/template/workers/dev-team/code-reviewer/skills/review-pr.md +77 -0
- package/template/workers/dev-team/code-reviewer/src/index.ts +56 -0
- package/template/workers/dev-team/code-reviewer/src/mcp-server.ts +101 -0
- package/template/workers/dev-team/code-reviewer/tsconfig.json +19 -0
- package/template/workers/dev-team/code-reviewer/worker.yaml +90 -0
- package/template/workers/dev-team/database-dev/package.json +22 -0
- package/template/workers/dev-team/database-dev/skills/create-schema.md +48 -0
- package/template/workers/dev-team/database-dev/src/index.ts +50 -0
- package/template/workers/dev-team/database-dev/src/mcp-server.ts +76 -0
- package/template/workers/dev-team/database-dev/tsconfig.json +18 -0
- package/template/workers/dev-team/database-dev/worker.yaml +90 -0
- package/template/workers/dev-team/frontend-dev/package.json +22 -0
- package/template/workers/dev-team/frontend-dev/skills/create-component.md +26 -0
- package/template/workers/dev-team/frontend-dev/src/index.ts +50 -0
- package/template/workers/dev-team/frontend-dev/src/mcp-server.ts +77 -0
- package/template/workers/dev-team/frontend-dev/tsconfig.json +18 -0
- package/template/workers/dev-team/frontend-dev/worker.yaml +132 -0
- package/template/workers/dev-team/infra-dev/package.json +24 -0
- package/template/workers/dev-team/infra-dev/skills/add-monitoring.md +73 -0
- package/template/workers/dev-team/infra-dev/skills/configure-deployment.md +80 -0
- package/template/workers/dev-team/infra-dev/skills/create-dockerfile.md +62 -0
- package/template/workers/dev-team/infra-dev/skills/setup-cicd.md +63 -0
- package/template/workers/dev-team/infra-dev/src/index.ts +55 -0
- package/template/workers/dev-team/infra-dev/src/mcp-server.ts +82 -0
- package/template/workers/dev-team/infra-dev/tsconfig.json +19 -0
- package/template/workers/dev-team/infra-dev/worker.yaml +92 -0
- package/template/workers/dev-team/knowledge-curator/package.json +24 -0
- package/template/workers/dev-team/knowledge-curator/skills/curate-troubleshooting.md +63 -0
- package/template/workers/dev-team/knowledge-curator/skills/process-learnings.md +61 -0
- package/template/workers/dev-team/knowledge-curator/skills/sync-documentation.md +76 -0
- package/template/workers/dev-team/knowledge-curator/skills/update-patterns.md +63 -0
- package/template/workers/dev-team/knowledge-curator/src/index.ts +53 -0
- package/template/workers/dev-team/knowledge-curator/src/mcp-server.ts +92 -0
- package/template/workers/dev-team/knowledge-curator/tsconfig.json +19 -0
- package/template/workers/dev-team/knowledge-curator/worker.yaml +80 -0
- package/template/workers/dev-team/motion-designer/package.json +22 -0
- package/template/workers/dev-team/motion-designer/skills/add-animation.md +25 -0
- package/template/workers/dev-team/motion-designer/skills/generate-image.md +36 -0
- package/template/workers/dev-team/motion-designer/src/index.ts +63 -0
- package/template/workers/dev-team/motion-designer/src/mcp-server.ts +79 -0
- package/template/workers/dev-team/motion-designer/tsconfig.json +18 -0
- package/template/workers/dev-team/motion-designer/worker.yaml +84 -0
- package/template/workers/dev-team/product-planner/queue.json +4 -0
- package/template/workers/dev-team/product-planner/worker.yaml +220 -0
- package/template/workers/dev-team/project-manager/package-lock.json +1252 -0
- package/template/workers/dev-team/project-manager/package.json +27 -0
- package/template/workers/dev-team/project-manager/skills/create-prd.md +66 -0
- package/template/workers/dev-team/project-manager/skills/next-issue.md +51 -0
- package/template/workers/dev-team/project-manager/skills/project-status.md +59 -0
- package/template/workers/dev-team/project-manager/skills/update-learnings.md +65 -0
- package/template/workers/dev-team/project-manager/src/index.ts +54 -0
- package/template/workers/dev-team/project-manager/src/mcp-server.ts +207 -0
- package/template/workers/dev-team/project-manager/src/skills/create-prd.ts +86 -0
- package/template/workers/dev-team/project-manager/src/skills/next-issue.ts +137 -0
- package/template/workers/dev-team/project-manager/src/skills/project-status.ts +131 -0
- package/template/workers/dev-team/project-manager/src/skills/update-learnings.ts +94 -0
- package/template/workers/dev-team/project-manager/tsconfig.json +19 -0
- package/template/workers/dev-team/project-manager/worker.yaml +96 -0
- package/template/workers/dev-team/qa-tester/package.json +24 -0
- package/template/workers/dev-team/qa-tester/skills/create-demo-account.md +36 -0
- package/template/workers/dev-team/qa-tester/skills/run-tests.md +36 -0
- package/template/workers/dev-team/qa-tester/skills/write-test.md +27 -0
- package/template/workers/dev-team/qa-tester/src/index.ts +61 -0
- package/template/workers/dev-team/qa-tester/src/mcp-server.ts +88 -0
- package/template/workers/dev-team/qa-tester/tsconfig.json +18 -0
- package/template/workers/dev-team/qa-tester/worker.yaml +116 -0
- package/template/workers/dev-team/task-executor/package-lock.json +1252 -0
- package/template/workers/dev-team/task-executor/package.json +27 -0
- package/template/workers/dev-team/task-executor/skills/analyze-issue.md +101 -0
- package/template/workers/dev-team/task-executor/skills/execute.md +133 -0
- package/template/workers/dev-team/task-executor/skills/report-learnings.md +106 -0
- package/template/workers/dev-team/task-executor/skills/validate-completion.md +121 -0
- package/template/workers/dev-team/task-executor/src/index.ts +54 -0
- package/template/workers/dev-team/task-executor/src/mcp-server.ts +139 -0
- package/template/workers/dev-team/task-executor/src/skills/analyze-issue.ts +219 -0
- package/template/workers/dev-team/task-executor/src/skills/execute.ts +132 -0
- package/template/workers/dev-team/task-executor/src/skills/report-learnings.ts +119 -0
- package/template/workers/dev-team/task-executor/src/skills/validate-completion.ts +142 -0
- package/template/workers/dev-team/task-executor/tsconfig.json +19 -0
- package/template/workers/dev-team/task-executor/worker.yaml +110 -0
- package/template/workers/registry.yaml +171 -0
- package/template/workers/security-scanner/README.md +73 -0
- package/template/workers/security-scanner/skills/pre-deploy-check.md +205 -0
- package/template/workers/security-scanner/worker.yaml +26 -0
- package/template/workspace/checkpoints/.gitkeep +0 -0
- package/template/workspace/content-ideas/inbox.jsonl +0 -0
- package/template/workspace/drafts/.gitkeep +0 -0
- package/template/workspace/learnings/.gitkeep +3 -0
- package/template/workspace/orchestrator/.gitkeep +0 -0
- package/template/workspace/ralph-test/COMPLETE.md +18 -0
- package/template/workspace/ralph-test/hello.txt +2 -0
- package/template/workspace/reports/.gitkeep +0 -0
- package/template/workspace/scratch/.gitkeep +0 -0
- package/template/workspace/threads/.gitkeep +3 -0
|
@@ -0,0 +1,771 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parser utilities for content analysis workers
|
|
3
|
+
* Handles parsing page content from various formats
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { readFileSync } from 'node:fs';
|
|
7
|
+
import type { PageContent, ContentSection, CTA, ImageAsset, PageMeta, AnalysisInput, SectionAnalysis } from './types.js';
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Parse page content from JSON file
|
|
11
|
+
*/
|
|
12
|
+
export function parsePageContent(json: unknown): PageContent {
|
|
13
|
+
if (!isValidPageContent(json)) {
|
|
14
|
+
throw new Error('Invalid page content structure');
|
|
15
|
+
}
|
|
16
|
+
return json as PageContent;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Type guard for PageContent
|
|
21
|
+
*/
|
|
22
|
+
function isValidPageContent(obj: unknown): obj is PageContent {
|
|
23
|
+
if (typeof obj !== 'object' || obj === null) return false;
|
|
24
|
+
const content = obj as Record<string, unknown>;
|
|
25
|
+
return (
|
|
26
|
+
typeof content.url === 'string' &&
|
|
27
|
+
typeof content.title === 'string' &&
|
|
28
|
+
Array.isArray(content.sections)
|
|
29
|
+
);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Extract all text content from a page
|
|
34
|
+
*/
|
|
35
|
+
export function extractAllText(page: PageContent): string[] {
|
|
36
|
+
const texts: string[] = [];
|
|
37
|
+
|
|
38
|
+
// Title and meta
|
|
39
|
+
texts.push(page.title);
|
|
40
|
+
if (page.meta.description) texts.push(page.meta.description);
|
|
41
|
+
|
|
42
|
+
// Section content
|
|
43
|
+
for (const section of page.sections) {
|
|
44
|
+
if (section.heading) texts.push(section.heading);
|
|
45
|
+
if (section.subheading) texts.push(section.subheading);
|
|
46
|
+
texts.push(...section.paragraphs);
|
|
47
|
+
if (section.bulletPoints) texts.push(...section.bulletPoints);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// CTAs
|
|
51
|
+
for (const cta of page.ctas) {
|
|
52
|
+
texts.push(cta.text);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
return texts.filter(t => t && t.trim().length > 0);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Extract headings from page content
|
|
60
|
+
*/
|
|
61
|
+
export function extractHeadings(page: PageContent): string[] {
|
|
62
|
+
const headings: string[] = [page.title];
|
|
63
|
+
|
|
64
|
+
for (const section of page.sections) {
|
|
65
|
+
if (section.heading) headings.push(section.heading);
|
|
66
|
+
if (section.subheading) headings.push(section.subheading);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return headings.filter(h => h && h.trim().length > 0);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Extract all CTAs from page
|
|
74
|
+
*/
|
|
75
|
+
export function extractCTAs(page: PageContent): CTA[] {
|
|
76
|
+
return page.ctas || [];
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Extract paragraphs by section type
|
|
81
|
+
*/
|
|
82
|
+
export function extractParagraphsBySection(
|
|
83
|
+
page: PageContent,
|
|
84
|
+
sectionType: ContentSection['type']
|
|
85
|
+
): string[] {
|
|
86
|
+
return page.sections
|
|
87
|
+
.filter(s => s.type === sectionType)
|
|
88
|
+
.flatMap(s => s.paragraphs);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Find sections containing specific keywords
|
|
93
|
+
*/
|
|
94
|
+
export function findSectionsWithKeywords(
|
|
95
|
+
page: PageContent,
|
|
96
|
+
keywords: string[]
|
|
97
|
+
): ContentSection[] {
|
|
98
|
+
const lowerKeywords = keywords.map(k => k.toLowerCase());
|
|
99
|
+
|
|
100
|
+
return page.sections.filter(section => {
|
|
101
|
+
const allText = [
|
|
102
|
+
section.heading || '',
|
|
103
|
+
section.subheading || '',
|
|
104
|
+
...section.paragraphs,
|
|
105
|
+
...(section.bulletPoints || [])
|
|
106
|
+
].join(' ').toLowerCase();
|
|
107
|
+
|
|
108
|
+
return lowerKeywords.some(keyword => allText.includes(keyword));
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Count words in content
|
|
114
|
+
*/
|
|
115
|
+
export function countWords(texts: string[]): number {
|
|
116
|
+
return texts.join(' ').split(/\s+/).filter(w => w.length > 0).length;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Extract sentences from text array
|
|
121
|
+
*/
|
|
122
|
+
export function extractSentences(texts: string[]): string[] {
|
|
123
|
+
const combined = texts.join(' ');
|
|
124
|
+
return combined.split(/[.!?]+/).map(s => s.trim()).filter(s => s.length > 0);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// ============================================
|
|
128
|
+
// File Parsing Functions (US-006)
|
|
129
|
+
// ============================================
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Read and parse page content from JSON file
|
|
133
|
+
* Supports multiple formats: standard PageContent, CMS format, or site audit format
|
|
134
|
+
*/
|
|
135
|
+
export function parsePageFile(filePath: string): PageContent {
|
|
136
|
+
const content = readFileSync(filePath, 'utf-8');
|
|
137
|
+
const json = JSON.parse(content);
|
|
138
|
+
|
|
139
|
+
// Try standard format first
|
|
140
|
+
if (isValidPageContent(json)) {
|
|
141
|
+
return json as PageContent;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Try site audit format (has meta.url, content.headings/paragraphs)
|
|
145
|
+
if (isSiteAuditFormat(json)) {
|
|
146
|
+
return parseSiteAuditContent(json);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Try CMS format
|
|
150
|
+
return parseCMSContent(json);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Check if JSON is site audit format
|
|
155
|
+
*/
|
|
156
|
+
function isSiteAuditFormat(obj: unknown): boolean {
|
|
157
|
+
if (typeof obj !== 'object' || obj === null) return false;
|
|
158
|
+
const data = obj as Record<string, unknown>;
|
|
159
|
+
return (
|
|
160
|
+
typeof data.meta === 'object' &&
|
|
161
|
+
data.meta !== null &&
|
|
162
|
+
typeof data.content === 'object' &&
|
|
163
|
+
data.content !== null
|
|
164
|
+
);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Parse site audit format into PageContent
|
|
169
|
+
* Site audit format: { meta: { url, title, description }, content: { headings, paragraphs, lists, ctas } }
|
|
170
|
+
*/
|
|
171
|
+
export function parseSiteAuditContent(json: unknown): PageContent {
|
|
172
|
+
if (typeof json !== 'object' || json === null) {
|
|
173
|
+
throw new Error('Invalid site audit content: expected object');
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
const data = json as Record<string, unknown>;
|
|
177
|
+
const meta = data.meta as Record<string, unknown> | undefined;
|
|
178
|
+
const content = data.content as Record<string, unknown> | undefined;
|
|
179
|
+
const navigation = data.navigation as Record<string, unknown> | undefined;
|
|
180
|
+
const media = data.media as Record<string, unknown> | undefined;
|
|
181
|
+
|
|
182
|
+
if (!meta || !content) {
|
|
183
|
+
throw new Error('Invalid site audit content: missing meta or content');
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// Extract URL and title from meta
|
|
187
|
+
const url = (meta.url ?? meta.path ?? '/') as string;
|
|
188
|
+
const title = (meta.title ?? 'Untitled') as string;
|
|
189
|
+
|
|
190
|
+
// Build page meta
|
|
191
|
+
const pageMeta: PageMeta = {
|
|
192
|
+
description: meta.description as string | undefined,
|
|
193
|
+
ogTitle: meta.ogTags && typeof meta.ogTags === 'object'
|
|
194
|
+
? (meta.ogTags as Record<string, unknown>).title as string | undefined
|
|
195
|
+
: undefined,
|
|
196
|
+
ogDescription: meta.ogTags && typeof meta.ogTags === 'object'
|
|
197
|
+
? (meta.ogTags as Record<string, unknown>).description as string | undefined
|
|
198
|
+
: undefined,
|
|
199
|
+
ogImage: meta.ogTags && typeof meta.ogTags === 'object'
|
|
200
|
+
? (meta.ogTags as Record<string, unknown>).image as string | undefined
|
|
201
|
+
: undefined,
|
|
202
|
+
};
|
|
203
|
+
|
|
204
|
+
// Group headings and paragraphs into sections
|
|
205
|
+
const headings = Array.isArray(content.headings) ? content.headings : [];
|
|
206
|
+
const paragraphs = Array.isArray(content.paragraphs) ? content.paragraphs : [];
|
|
207
|
+
const lists = Array.isArray(content.lists) ? content.lists : [];
|
|
208
|
+
|
|
209
|
+
// Build sections from headings
|
|
210
|
+
const sections: ContentSection[] = [];
|
|
211
|
+
let currentSection: ContentSection | null = null;
|
|
212
|
+
let paragraphIndex = 0;
|
|
213
|
+
|
|
214
|
+
for (const heading of headings) {
|
|
215
|
+
if (typeof heading !== 'object' || heading === null) continue;
|
|
216
|
+
const h = heading as Record<string, unknown>;
|
|
217
|
+
const level = h.level as number;
|
|
218
|
+
const text = h.text as string;
|
|
219
|
+
|
|
220
|
+
if (level === 1 || level === 2) {
|
|
221
|
+
// Start a new section
|
|
222
|
+
if (currentSection) {
|
|
223
|
+
sections.push(currentSection);
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
// Determine section type from heading
|
|
227
|
+
const type = inferSectionType(text);
|
|
228
|
+
|
|
229
|
+
currentSection = {
|
|
230
|
+
id: `section-${sections.length}`,
|
|
231
|
+
type,
|
|
232
|
+
heading: text,
|
|
233
|
+
paragraphs: [],
|
|
234
|
+
};
|
|
235
|
+
} else if (level === 3 && currentSection) {
|
|
236
|
+
// Add as subheading or bullet point
|
|
237
|
+
if (!currentSection.bulletPoints) {
|
|
238
|
+
currentSection.bulletPoints = [];
|
|
239
|
+
}
|
|
240
|
+
currentSection.bulletPoints.push(text);
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// Push last section
|
|
245
|
+
if (currentSection) {
|
|
246
|
+
sections.push(currentSection);
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
// Distribute paragraphs across sections
|
|
250
|
+
const paragraphsPerSection = Math.ceil(paragraphs.length / Math.max(sections.length, 1));
|
|
251
|
+
for (let i = 0; i < sections.length; i++) {
|
|
252
|
+
const start = i * paragraphsPerSection;
|
|
253
|
+
const end = Math.min(start + paragraphsPerSection, paragraphs.length);
|
|
254
|
+
sections[i].paragraphs = paragraphs.slice(start, end).filter(
|
|
255
|
+
(p): p is string => typeof p === 'string'
|
|
256
|
+
);
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
// If no sections created, create one with all paragraphs
|
|
260
|
+
if (sections.length === 0) {
|
|
261
|
+
sections.push({
|
|
262
|
+
id: 'section-0',
|
|
263
|
+
type: 'content',
|
|
264
|
+
heading: title,
|
|
265
|
+
paragraphs: paragraphs.filter((p): p is string => typeof p === 'string'),
|
|
266
|
+
});
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
// Add bullet points from lists
|
|
270
|
+
for (const list of lists) {
|
|
271
|
+
if (typeof list !== 'object' || list === null) continue;
|
|
272
|
+
const l = list as Record<string, unknown>;
|
|
273
|
+
const items = Array.isArray(l.items) ? l.items : [];
|
|
274
|
+
|
|
275
|
+
// Add to the last section or create one
|
|
276
|
+
const targetSection = sections[sections.length - 1];
|
|
277
|
+
if (targetSection) {
|
|
278
|
+
if (!targetSection.bulletPoints) {
|
|
279
|
+
targetSection.bulletPoints = [];
|
|
280
|
+
}
|
|
281
|
+
targetSection.bulletPoints.push(
|
|
282
|
+
...items.filter((i): i is string => typeof i === 'string')
|
|
283
|
+
);
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
// Extract CTAs
|
|
288
|
+
const ctaData = Array.isArray(content.ctas) ? content.ctas : [];
|
|
289
|
+
const ctas: CTA[] = ctaData
|
|
290
|
+
.filter((c): c is Record<string, unknown> => typeof c === 'object' && c !== null)
|
|
291
|
+
.map((c, index) => ({
|
|
292
|
+
text: (c.text ?? 'Click here') as string,
|
|
293
|
+
href: c.href as string | undefined,
|
|
294
|
+
type: 'primary' as const,
|
|
295
|
+
location: `cta-${index}`,
|
|
296
|
+
}))
|
|
297
|
+
.filter(c => c.text && c.text !== 'Manage Preferences' && c.text !== 'Accept All');
|
|
298
|
+
|
|
299
|
+
// Extract images
|
|
300
|
+
const imageData = media && Array.isArray(media.images) ? media.images : [];
|
|
301
|
+
const images: ImageAsset[] = imageData
|
|
302
|
+
.filter((i): i is Record<string, unknown> => typeof i === 'object' && i !== null)
|
|
303
|
+
.map(i => ({
|
|
304
|
+
src: (i.src ?? '') as string,
|
|
305
|
+
alt: i.alt as string | undefined,
|
|
306
|
+
context: '',
|
|
307
|
+
}));
|
|
308
|
+
|
|
309
|
+
return {
|
|
310
|
+
url,
|
|
311
|
+
title,
|
|
312
|
+
meta: pageMeta,
|
|
313
|
+
sections,
|
|
314
|
+
ctas,
|
|
315
|
+
images,
|
|
316
|
+
extractedAt: (meta.capturedAt as string) ?? new Date().toISOString(),
|
|
317
|
+
};
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
/**
|
|
321
|
+
* Infer section type from heading text
|
|
322
|
+
*/
|
|
323
|
+
function inferSectionType(heading: string): ContentSection['type'] {
|
|
324
|
+
const lower = heading.toLowerCase();
|
|
325
|
+
|
|
326
|
+
if (lower.includes('hero') || lower.includes('welcome') || lower.includes('enterprise ai')) {
|
|
327
|
+
return 'hero';
|
|
328
|
+
}
|
|
329
|
+
if (lower.includes('feature') || lower.includes('why') || lower.includes('benefit')) {
|
|
330
|
+
return 'features';
|
|
331
|
+
}
|
|
332
|
+
if (lower.includes('testimonial') || lower.includes('customer') || lower.includes('review')) {
|
|
333
|
+
return 'testimonials';
|
|
334
|
+
}
|
|
335
|
+
if (lower.includes('pricing') || lower.includes('plan') || lower.includes('package')) {
|
|
336
|
+
return 'pricing';
|
|
337
|
+
}
|
|
338
|
+
if (lower.includes('start') || lower.includes('contact') || lower.includes('demo') || lower.includes('ready')) {
|
|
339
|
+
return 'cta';
|
|
340
|
+
}
|
|
341
|
+
if (lower.includes('security') || lower.includes('compliance') || lower.includes('certification')) {
|
|
342
|
+
return 'features';
|
|
343
|
+
}
|
|
344
|
+
if (lower.includes('technical') || lower.includes('specification') || lower.includes('spec')) {
|
|
345
|
+
return 'content';
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
return 'content';
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
/**
|
|
352
|
+
* Parse content from CMS JSON format (matches example-company-cms schema)
|
|
353
|
+
* Handles the structure: { page: { slug, title, sections, ... } }
|
|
354
|
+
*/
|
|
355
|
+
export function parseCMSContent(json: unknown): PageContent {
|
|
356
|
+
if (typeof json !== 'object' || json === null) {
|
|
357
|
+
throw new Error('Invalid CMS content: expected object');
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
const data = json as Record<string, unknown>;
|
|
361
|
+
|
|
362
|
+
// Handle nested page structure from CMS
|
|
363
|
+
const pageData = data.page ?? data;
|
|
364
|
+
|
|
365
|
+
if (typeof pageData !== 'object' || pageData === null) {
|
|
366
|
+
throw new Error('Invalid CMS content: missing page data');
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
const page = pageData as Record<string, unknown>;
|
|
370
|
+
|
|
371
|
+
// Extract URL/slug
|
|
372
|
+
const slug = (page.slug ?? page.url ?? 'unknown') as string;
|
|
373
|
+
const url = slug.startsWith('http') ? slug : `/${slug}`;
|
|
374
|
+
|
|
375
|
+
// Extract title
|
|
376
|
+
const title = (page.title ?? page.name ?? 'Untitled') as string;
|
|
377
|
+
|
|
378
|
+
// Extract meta
|
|
379
|
+
const meta: PageMeta = {
|
|
380
|
+
description: page.description as string | undefined,
|
|
381
|
+
keywords: page.keywords as string[] | undefined,
|
|
382
|
+
ogTitle: page.ogTitle as string | undefined,
|
|
383
|
+
ogDescription: page.ogDescription as string | undefined,
|
|
384
|
+
ogImage: page.ogImage as string | undefined,
|
|
385
|
+
};
|
|
386
|
+
|
|
387
|
+
// Parse sections
|
|
388
|
+
const rawSections = Array.isArray(page.sections) ? page.sections : [];
|
|
389
|
+
const sections: ContentSection[] = rawSections.map((s: unknown, index: number) =>
|
|
390
|
+
parseCMSSection(s, index)
|
|
391
|
+
);
|
|
392
|
+
|
|
393
|
+
// Parse CTAs
|
|
394
|
+
const ctas: CTA[] = extractCTAsFromCMS(page);
|
|
395
|
+
|
|
396
|
+
// Parse images
|
|
397
|
+
const images: ImageAsset[] = extractImagesFromCMS(page);
|
|
398
|
+
|
|
399
|
+
return {
|
|
400
|
+
url,
|
|
401
|
+
title,
|
|
402
|
+
meta,
|
|
403
|
+
sections,
|
|
404
|
+
ctas,
|
|
405
|
+
images,
|
|
406
|
+
extractedAt: new Date().toISOString(),
|
|
407
|
+
};
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
/**
|
|
411
|
+
* Parse a single section from CMS format
|
|
412
|
+
*/
|
|
413
|
+
function parseCMSSection(section: unknown, index: number): ContentSection {
|
|
414
|
+
if (typeof section !== 'object' || section === null) {
|
|
415
|
+
return {
|
|
416
|
+
id: `section-${index}`,
|
|
417
|
+
type: 'other',
|
|
418
|
+
paragraphs: [],
|
|
419
|
+
};
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
const s = section as Record<string, unknown>;
|
|
423
|
+
|
|
424
|
+
// Determine section type
|
|
425
|
+
const type = mapSectionType(s.type as string | undefined, s);
|
|
426
|
+
|
|
427
|
+
// Extract paragraphs from various possible fields
|
|
428
|
+
const paragraphs: string[] = [];
|
|
429
|
+
|
|
430
|
+
if (typeof s.content === 'string') {
|
|
431
|
+
paragraphs.push(s.content);
|
|
432
|
+
} else if (Array.isArray(s.content)) {
|
|
433
|
+
paragraphs.push(...s.content.filter((c): c is string => typeof c === 'string'));
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
if (typeof s.body === 'string') {
|
|
437
|
+
paragraphs.push(s.body);
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
if (typeof s.text === 'string') {
|
|
441
|
+
paragraphs.push(s.text);
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
if (typeof s.description === 'string') {
|
|
445
|
+
paragraphs.push(s.description);
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
// Extract bullet points
|
|
449
|
+
const bulletPoints: string[] = [];
|
|
450
|
+
if (Array.isArray(s.bullets)) {
|
|
451
|
+
bulletPoints.push(...s.bullets.filter((b): b is string => typeof b === 'string'));
|
|
452
|
+
}
|
|
453
|
+
if (Array.isArray(s.items)) {
|
|
454
|
+
for (const item of s.items) {
|
|
455
|
+
if (typeof item === 'string') {
|
|
456
|
+
bulletPoints.push(item);
|
|
457
|
+
} else if (typeof item === 'object' && item !== null) {
|
|
458
|
+
const obj = item as Record<string, unknown>;
|
|
459
|
+
if (typeof obj.text === 'string') bulletPoints.push(obj.text);
|
|
460
|
+
if (typeof obj.title === 'string') bulletPoints.push(obj.title);
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
return {
|
|
466
|
+
id: (s.id ?? s.key ?? `section-${index}`) as string,
|
|
467
|
+
type,
|
|
468
|
+
heading: s.heading as string | undefined ?? s.title as string | undefined,
|
|
469
|
+
subheading: s.subheading as string | undefined ?? s.subtitle as string | undefined,
|
|
470
|
+
paragraphs,
|
|
471
|
+
bulletPoints: bulletPoints.length > 0 ? bulletPoints : undefined,
|
|
472
|
+
};
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
/**
|
|
476
|
+
* Map CMS section type to our standard types
|
|
477
|
+
*/
|
|
478
|
+
function mapSectionType(
|
|
479
|
+
type: string | undefined,
|
|
480
|
+
section: Record<string, unknown>
|
|
481
|
+
): ContentSection['type'] {
|
|
482
|
+
if (!type) {
|
|
483
|
+
// Infer from content
|
|
484
|
+
if (section.hero || section.headline) return 'hero';
|
|
485
|
+
if (section.features || section.featureList) return 'features';
|
|
486
|
+
if (section.testimonials || section.quotes) return 'testimonials';
|
|
487
|
+
if (section.pricing || section.plans) return 'pricing';
|
|
488
|
+
return 'content';
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
const normalized = type.toLowerCase();
|
|
492
|
+
|
|
493
|
+
if (normalized.includes('hero')) return 'hero';
|
|
494
|
+
if (normalized.includes('feature')) return 'features';
|
|
495
|
+
if (normalized.includes('testimonial') || normalized.includes('quote')) return 'testimonials';
|
|
496
|
+
if (normalized.includes('pricing') || normalized.includes('plan')) return 'pricing';
|
|
497
|
+
if (normalized.includes('cta') || normalized.includes('action')) return 'cta';
|
|
498
|
+
if (normalized.includes('footer')) return 'footer';
|
|
499
|
+
if (normalized.includes('header') || normalized.includes('nav')) return 'header';
|
|
500
|
+
|
|
501
|
+
return 'content';
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
/**
|
|
505
|
+
* Extract CTAs from CMS page data
|
|
506
|
+
*/
|
|
507
|
+
function extractCTAsFromCMS(page: Record<string, unknown>): CTA[] {
|
|
508
|
+
const ctas: CTA[] = [];
|
|
509
|
+
|
|
510
|
+
// Check for explicit CTAs array
|
|
511
|
+
if (Array.isArray(page.ctas)) {
|
|
512
|
+
for (const cta of page.ctas) {
|
|
513
|
+
if (typeof cta === 'object' && cta !== null) {
|
|
514
|
+
const c = cta as Record<string, unknown>;
|
|
515
|
+
ctas.push({
|
|
516
|
+
text: (c.text ?? c.label ?? 'Click here') as string,
|
|
517
|
+
href: c.href as string | undefined ?? c.url as string | undefined ?? c.link as string | undefined,
|
|
518
|
+
type: mapCTAType(c.type as string | undefined, c.variant as string | undefined),
|
|
519
|
+
location: (c.location ?? c.section ?? 'unknown') as string,
|
|
520
|
+
});
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
// Extract CTAs from sections
|
|
526
|
+
if (Array.isArray(page.sections)) {
|
|
527
|
+
for (const section of page.sections) {
|
|
528
|
+
if (typeof section !== 'object' || section === null) continue;
|
|
529
|
+
const s = section as Record<string, unknown>;
|
|
530
|
+
|
|
531
|
+
// Check for CTA in section
|
|
532
|
+
if (s.cta && typeof s.cta === 'object') {
|
|
533
|
+
const c = s.cta as Record<string, unknown>;
|
|
534
|
+
ctas.push({
|
|
535
|
+
text: (c.text ?? c.label ?? 'Click here') as string,
|
|
536
|
+
href: c.href as string | undefined ?? c.url as string | undefined,
|
|
537
|
+
type: mapCTAType(c.type as string | undefined, c.variant as string | undefined),
|
|
538
|
+
location: (s.id ?? s.type ?? 'section') as string,
|
|
539
|
+
});
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
// Check for buttons array
|
|
543
|
+
if (Array.isArray(s.buttons)) {
|
|
544
|
+
for (const btn of s.buttons) {
|
|
545
|
+
if (typeof btn === 'object' && btn !== null) {
|
|
546
|
+
const b = btn as Record<string, unknown>;
|
|
547
|
+
ctas.push({
|
|
548
|
+
text: (b.text ?? b.label ?? 'Click here') as string,
|
|
549
|
+
href: b.href as string | undefined ?? b.url as string | undefined,
|
|
550
|
+
type: mapCTAType(b.type as string | undefined, b.variant as string | undefined),
|
|
551
|
+
location: (s.id ?? s.type ?? 'section') as string,
|
|
552
|
+
});
|
|
553
|
+
}
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
return ctas;
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
/**
|
|
563
|
+
* Map CTA type from various formats
|
|
564
|
+
*/
|
|
565
|
+
function mapCTAType(type?: string, variant?: string): CTA['type'] {
|
|
566
|
+
const t = (type ?? variant ?? '').toLowerCase();
|
|
567
|
+
if (t.includes('primary') || t.includes('main')) return 'primary';
|
|
568
|
+
if (t.includes('secondary') || t.includes('outline')) return 'secondary';
|
|
569
|
+
if (t.includes('link') || t.includes('text')) return 'link';
|
|
570
|
+
return 'primary'; // Default to primary
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
/**
|
|
574
|
+
* Extract images from CMS page data
|
|
575
|
+
*/
|
|
576
|
+
function extractImagesFromCMS(page: Record<string, unknown>): ImageAsset[] {
|
|
577
|
+
const images: ImageAsset[] = [];
|
|
578
|
+
|
|
579
|
+
// Check for explicit images array
|
|
580
|
+
if (Array.isArray(page.images)) {
|
|
581
|
+
for (const img of page.images) {
|
|
582
|
+
if (typeof img === 'object' && img !== null) {
|
|
583
|
+
const i = img as Record<string, unknown>;
|
|
584
|
+
images.push({
|
|
585
|
+
src: (i.src ?? i.url ?? '') as string,
|
|
586
|
+
alt: i.alt as string | undefined,
|
|
587
|
+
context: (i.context ?? i.caption ?? '') as string,
|
|
588
|
+
});
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
// Extract images from sections
|
|
594
|
+
if (Array.isArray(page.sections)) {
|
|
595
|
+
for (const section of page.sections) {
|
|
596
|
+
if (typeof section !== 'object' || section === null) continue;
|
|
597
|
+
const s = section as Record<string, unknown>;
|
|
598
|
+
|
|
599
|
+
if (s.image && typeof s.image === 'object') {
|
|
600
|
+
const i = s.image as Record<string, unknown>;
|
|
601
|
+
images.push({
|
|
602
|
+
src: (i.src ?? i.url ?? '') as string,
|
|
603
|
+
alt: i.alt as string | undefined,
|
|
604
|
+
context: (s.heading ?? s.title ?? s.type ?? 'section') as string,
|
|
605
|
+
});
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
if (typeof s.backgroundImage === 'string') {
|
|
609
|
+
images.push({
|
|
610
|
+
src: s.backgroundImage,
|
|
611
|
+
alt: undefined,
|
|
612
|
+
context: `background: ${(s.type ?? 'section') as string}`,
|
|
613
|
+
});
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
return images;
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
// ============================================
|
|
622
|
+
// Analysis Input Extraction (US-006)
|
|
623
|
+
// ============================================
|
|
624
|
+
|
|
625
|
+
/**
|
|
626
|
+
* Extract structured content for analysis from PageContent
|
|
627
|
+
*/
|
|
628
|
+
export function extractAnalysisInput(page: PageContent): AnalysisInput {
|
|
629
|
+
const headings = extractHeadings(page);
|
|
630
|
+
const paragraphs = extractAllParagraphs(page);
|
|
631
|
+
const stats = extractStats(page);
|
|
632
|
+
const claims = extractClaims(page);
|
|
633
|
+
const sections = extractSectionAnalyses(page);
|
|
634
|
+
|
|
635
|
+
// Extract slug from URL
|
|
636
|
+
const pageSlug = extractSlug(page.url);
|
|
637
|
+
|
|
638
|
+
return {
|
|
639
|
+
pageSlug,
|
|
640
|
+
title: page.title,
|
|
641
|
+
headings,
|
|
642
|
+
paragraphs,
|
|
643
|
+
ctas: page.ctas.map(cta => ({
|
|
644
|
+
text: cta.text,
|
|
645
|
+
link: cta.href ?? '',
|
|
646
|
+
})),
|
|
647
|
+
stats,
|
|
648
|
+
claims,
|
|
649
|
+
sections,
|
|
650
|
+
};
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
/**
|
|
654
|
+
* Extract all paragraphs from page content
|
|
655
|
+
*/
|
|
656
|
+
function extractAllParagraphs(page: PageContent): string[] {
|
|
657
|
+
const paragraphs: string[] = [];
|
|
658
|
+
|
|
659
|
+
for (const section of page.sections) {
|
|
660
|
+
paragraphs.push(...section.paragraphs);
|
|
661
|
+
if (section.bulletPoints) {
|
|
662
|
+
paragraphs.push(...section.bulletPoints);
|
|
663
|
+
}
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
return paragraphs.filter(p => p.trim().length > 0);
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
/**
|
|
670
|
+
* Extract statistics from page content
|
|
671
|
+
* Looks for patterns like "50%", "$1M", "100+", "10x", etc.
|
|
672
|
+
*/
|
|
673
|
+
function extractStats(page: PageContent): { value: string; label: string }[] {
|
|
674
|
+
const stats: { value: string; label: string }[] = [];
|
|
675
|
+
const statPatterns = [
|
|
676
|
+
// Percentages: 50%, 99.9%
|
|
677
|
+
/(\d+(?:\.\d+)?%)/g,
|
|
678
|
+
// Dollar amounts: $1M, $500K, $1,000
|
|
679
|
+
/(\$[\d,.]+[KMB]?)/gi,
|
|
680
|
+
// Multipliers: 10x, 2.5x
|
|
681
|
+
/(\d+(?:\.\d+)?x)/gi,
|
|
682
|
+
// Large numbers with suffixes: 100+, 1000+, 50K+
|
|
683
|
+
/(\d+(?:,\d{3})*[KMB]?\+?)/g,
|
|
684
|
+
// Time-based: 24/7, 99.9% uptime
|
|
685
|
+
/(24\/7|\d+(?:\.\d+)?%\s*uptime)/gi,
|
|
686
|
+
];
|
|
687
|
+
|
|
688
|
+
const allText = extractAllText(page);
|
|
689
|
+
|
|
690
|
+
for (const text of allText) {
|
|
691
|
+
for (const pattern of statPatterns) {
|
|
692
|
+
const matches = text.matchAll(pattern);
|
|
693
|
+
for (const match of matches) {
|
|
694
|
+
const value = match[1];
|
|
695
|
+
// Extract surrounding context as label
|
|
696
|
+
const startIdx = Math.max(0, match.index! - 30);
|
|
697
|
+
const endIdx = Math.min(text.length, match.index! + match[0].length + 30);
|
|
698
|
+
const context = text.slice(startIdx, endIdx).trim();
|
|
699
|
+
|
|
700
|
+
// Avoid duplicates
|
|
701
|
+
if (!stats.some(s => s.value === value)) {
|
|
702
|
+
stats.push({ value, label: context });
|
|
703
|
+
}
|
|
704
|
+
}
|
|
705
|
+
}
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
return stats;
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
/**
|
|
712
|
+
* Extract claims from page content
|
|
713
|
+
* Looks for assertive statements, superlatives, comparisons
|
|
714
|
+
*/
|
|
715
|
+
function extractClaims(page: PageContent): string[] {
|
|
716
|
+
const claims: string[] = [];
|
|
717
|
+
const sentences = extractSentences(extractAllText(page));
|
|
718
|
+
|
|
719
|
+
// Patterns that indicate claims
|
|
720
|
+
const claimIndicators = [
|
|
721
|
+
/\b(best|leading|top|#1|number one|premier|fastest|most|only|first)\b/i,
|
|
722
|
+
/\b(guaranteed|proven|certified|trusted|secure|compliant)\b/i,
|
|
723
|
+
/\b(save|reduce|increase|improve|boost|grow|eliminate)\b/i,
|
|
724
|
+
/\b(never|always|every|all|100%)\b/i,
|
|
725
|
+
/\b(award-winning|industry-leading|world-class|enterprise-grade)\b/i,
|
|
726
|
+
/\b(more than|over|up to|\d+[x%])\b/i,
|
|
727
|
+
];
|
|
728
|
+
|
|
729
|
+
for (const sentence of sentences) {
|
|
730
|
+
const isClaimLike = claimIndicators.some(pattern => pattern.test(sentence));
|
|
731
|
+
if (isClaimLike && sentence.length > 20 && sentence.length < 500) {
|
|
732
|
+
claims.push(sentence);
|
|
733
|
+
}
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
return claims;
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
/**
|
|
740
|
+
* Extract section analyses from page content
|
|
741
|
+
*/
|
|
742
|
+
function extractSectionAnalyses(page: PageContent): SectionAnalysis[] {
|
|
743
|
+
return page.sections.map(section => {
|
|
744
|
+
const content = [
|
|
745
|
+
...section.paragraphs,
|
|
746
|
+
...(section.bulletPoints ?? []),
|
|
747
|
+
];
|
|
748
|
+
|
|
749
|
+
const wordCount = countWords(content);
|
|
750
|
+
|
|
751
|
+
return {
|
|
752
|
+
id: section.id,
|
|
753
|
+
type: section.type,
|
|
754
|
+
heading: section.heading ?? '',
|
|
755
|
+
content,
|
|
756
|
+
wordCount,
|
|
757
|
+
};
|
|
758
|
+
});
|
|
759
|
+
}
|
|
760
|
+
|
|
761
|
+
/**
|
|
762
|
+
* Extract slug from URL
|
|
763
|
+
*/
|
|
764
|
+
function extractSlug(url: string): string {
|
|
765
|
+
// Remove protocol and domain if present
|
|
766
|
+
let slug = url.replace(/^https?:\/\/[^\/]+/, '');
|
|
767
|
+
// Remove leading/trailing slashes
|
|
768
|
+
slug = slug.replace(/^\/+|\/+$/g, '');
|
|
769
|
+
// Use 'home' for empty slugs
|
|
770
|
+
return slug || 'home';
|
|
771
|
+
}
|