create-hq 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (310) hide show
  1. package/dist/deps.d.ts +4 -0
  2. package/dist/deps.d.ts.map +1 -0
  3. package/dist/deps.js +65 -0
  4. package/dist/deps.js.map +1 -0
  5. package/dist/git.d.ts +3 -0
  6. package/dist/git.d.ts.map +1 -0
  7. package/dist/git.js +19 -0
  8. package/dist/git.js.map +1 -0
  9. package/dist/index.d.ts +3 -0
  10. package/dist/index.d.ts.map +1 -0
  11. package/dist/index.js +23 -0
  12. package/dist/index.js.map +1 -0
  13. package/dist/scaffold.d.ts +8 -0
  14. package/dist/scaffold.d.ts.map +1 -0
  15. package/dist/scaffold.js +130 -0
  16. package/dist/scaffold.js.map +1 -0
  17. package/dist/ui.d.ts +7 -0
  18. package/dist/ui.d.ts.map +1 -0
  19. package/dist/ui.js +36 -0
  20. package/dist/ui.js.map +1 -0
  21. package/package.json +41 -0
  22. package/template/.claude/CLAUDE.md +202 -0
  23. package/template/.claude/commands/checkpoint.md +127 -0
  24. package/template/.claude/commands/cleanup.md +307 -0
  25. package/template/.claude/commands/execute-task.md +440 -0
  26. package/template/.claude/commands/exit-plan.md +41 -0
  27. package/template/.claude/commands/handoff.md +97 -0
  28. package/template/.claude/commands/learn.md +218 -0
  29. package/template/.claude/commands/metrics.md +118 -0
  30. package/template/.claude/commands/newworker.md +162 -0
  31. package/template/.claude/commands/nexttask.md +67 -0
  32. package/template/.claude/commands/prd.md +238 -0
  33. package/template/.claude/commands/reanchor.md +51 -0
  34. package/template/.claude/commands/remember.md +126 -0
  35. package/template/.claude/commands/run-project.md +348 -0
  36. package/template/.claude/commands/run.md +110 -0
  37. package/template/.claude/commands/search-reindex.md +62 -0
  38. package/template/.claude/commands/search.md +100 -0
  39. package/template/.claude/commands/setup.md +381 -0
  40. package/template/.claude/scripts/pure-ralph-loop.ps1 +312 -0
  41. package/template/.claude/scripts/pure-ralph-loop.sh +859 -0
  42. package/template/CHANGELOG.md +220 -0
  43. package/template/LICENSE +21 -0
  44. package/template/MIGRATION.md +259 -0
  45. package/template/README.md +368 -0
  46. package/template/data/journal/.gitkeep +0 -0
  47. package/template/docs/images/ascii-banner-options.md +122 -0
  48. package/template/docs/images/hq-banner.svg +105 -0
  49. package/template/knowledge/Ralph/01-overview.md +71 -0
  50. package/template/knowledge/Ralph/02-core-concepts.md +114 -0
  51. package/template/knowledge/Ralph/03-how-ralph-works.md +184 -0
  52. package/template/knowledge/Ralph/04-back-pressure.md +222 -0
  53. package/template/knowledge/Ralph/05-specifications.md +210 -0
  54. package/template/knowledge/Ralph/06-agents-md.md +222 -0
  55. package/template/knowledge/Ralph/07-implementation.md +316 -0
  56. package/template/knowledge/Ralph/08-economics.md +182 -0
  57. package/template/knowledge/Ralph/09-resources.md +145 -0
  58. package/template/knowledge/Ralph/10-claude-code-workflow.md +212 -0
  59. package/template/knowledge/Ralph/11-team-training-guide.md +383 -0
  60. package/template/knowledge/Ralph/README.md +40 -0
  61. package/template/knowledge/ai-security-framework/CONTRIBUTING.md +139 -0
  62. package/template/knowledge/ai-security-framework/GLOSSARY.md +176 -0
  63. package/template/knowledge/ai-security-framework/LICENSE +21 -0
  64. package/template/knowledge/ai-security-framework/QUICK-START.md +172 -0
  65. package/template/knowledge/ai-security-framework/README.md +232 -0
  66. package/template/knowledge/ai-security-framework/checklists/browser-security.md +301 -0
  67. package/template/knowledge/ai-security-framework/checklists/credential-isolation.md +322 -0
  68. package/template/knowledge/ai-security-framework/checklists/incident-response.md +288 -0
  69. package/template/knowledge/ai-security-framework/checklists/pre-flight.md +249 -0
  70. package/template/knowledge/ai-security-framework/checklists/weekly-audit.md +159 -0
  71. package/template/knowledge/ai-security-framework/configs/audit-logging.md +372 -0
  72. package/template/knowledge/ai-security-framework/configs/kill-switches.md +354 -0
  73. package/template/knowledge/ai-security-framework/docs/01-core-principles.md +256 -0
  74. package/template/knowledge/ai-security-framework/docs/02-threat-landscape.md +326 -0
  75. package/template/knowledge/ai-security-framework/docs/03-security-posture.md +250 -0
  76. package/template/knowledge/ai-security-framework/templates/agents-security.md +233 -0
  77. package/template/knowledge/design-styles/README.md +42 -0
  78. package/template/knowledge/design-styles/american-industrial.md +136 -0
  79. package/template/knowledge/design-styles/ethereal-abstract.md +133 -0
  80. package/template/knowledge/design-styles/liminal-portal.md +111 -0
  81. package/template/knowledge/design-styles/swipes/american-industrial/G-3m4YPW0AADdu2.jpeg +0 -0
  82. package/template/knowledge/design-styles/swipes/american-industrial/G-JJlt5WwAABK3K.png +0 -0
  83. package/template/knowledge/design-styles/swipes/american-industrial/G-JJmj5W0AEbJ-7.png +0 -0
  84. package/template/knowledge/design-styles/swipes/american-industrial/G59fgNuXkAAKLJQ (1).jpeg +0 -0
  85. package/template/knowledge/design-styles/swipes/american-industrial/G59fgNuXkAAKLJQ.jpeg +0 -0
  86. package/template/knowledge/design-styles/swipes/american-industrial/G7fVkn3WEAAM-ST.jpeg +0 -0
  87. package/template/knowledge/design-styles/swipes/american-industrial/G8ECO5JWEAIksyn.png +0 -0
  88. package/template/knowledge/design-styles/swipes/american-industrial/G9-3GQSWoAA8eqZ.png +0 -0
  89. package/template/knowledge/design-styles/swipes/american-industrial/G9xEOqrXkAEZRcs.png +0 -0
  90. package/template/knowledge/design-styles/swipes/american-industrial/G_MVeJrXQAA8sx4.jpeg +0 -0
  91. package/template/knowledge/design-styles/swipes/american-industrial/G_RSkmGXkAAgAVZ.png +0 -0
  92. package/template/knowledge/design-styles/swipes/american-industrial/README.md +31 -0
  93. package/template/knowledge/design-styles/swipes/american-industrial/qyqtg7Dq.png +0 -0
  94. package/template/knowledge/dev-team/README.md +35 -0
  95. package/template/knowledge/dev-team/patterns/README.md +34 -0
  96. package/template/knowledge/dev-team/patterns/frontend/react-best-practices.md +178 -0
  97. package/template/knowledge/dev-team/troubleshooting/README.md +31 -0
  98. package/template/knowledge/dev-team/workflows/README.md +49 -0
  99. package/template/knowledge/hq/checkpoint-schema.json +51 -0
  100. package/template/knowledge/hq/index-md-spec.md +74 -0
  101. package/template/knowledge/hq/thread-schema.md +153 -0
  102. package/template/knowledge/hq-core/checkpoint-schema.json +51 -0
  103. package/template/knowledge/hq-core/index-md-spec.md +74 -0
  104. package/template/knowledge/hq-core/thread-schema.md +153 -0
  105. package/template/knowledge/loom/README.md +51 -0
  106. package/template/knowledge/loom/architecture.md +125 -0
  107. package/template/knowledge/loom/code-style.md +169 -0
  108. package/template/knowledge/loom/llm-proxy.md +132 -0
  109. package/template/knowledge/loom/state-machine.md +131 -0
  110. package/template/knowledge/loom/thread-system.md +117 -0
  111. package/template/knowledge/loom/tools.md +94 -0
  112. package/template/knowledge/loom/weaver.md +96 -0
  113. package/template/knowledge/loom/web-frontend.md +131 -0
  114. package/template/knowledge/projects/README.md +72 -0
  115. package/template/knowledge/projects/templates/README.template.md +28 -0
  116. package/template/knowledge/workers/README.md +195 -0
  117. package/template/knowledge/workers/ralph-loop-pattern.md +157 -0
  118. package/template/knowledge/workers/skill-schema.md +182 -0
  119. package/template/knowledge/workers/state-machine.md +102 -0
  120. package/template/knowledge/workers/templates/base-worker.yaml +73 -0
  121. package/template/knowledge/workers/templates/code-worker.yaml +85 -0
  122. package/template/knowledge/workers/templates/skill.yaml +49 -0
  123. package/template/knowledge/workers/templates/social-worker.yaml +70 -0
  124. package/template/modules/examples/full-manifest.yaml +92 -0
  125. package/template/modules/examples/minimal.yaml +14 -0
  126. package/template/modules/modules.yaml +59 -0
  127. package/template/projects/.gitkeep +0 -0
  128. package/template/projects/incorporate-workers-into-pure-ralph/prd.json +88 -0
  129. package/template/projects/pure-ralph-branch-isolation/README.md +114 -0
  130. package/template/projects/pure-ralph-branch-isolation/prd.json +123 -0
  131. package/template/projects/purist-ralph-loop/README.md +148 -0
  132. package/template/projects/purist-ralph-loop/prd.json +135 -0
  133. package/template/projects/ralph-test/prd.json +50 -0
  134. package/template/prompts/pure-ralph-base.md +551 -0
  135. package/template/settings/.gitkeep +0 -0
  136. package/template/settings/pure-ralph.json +42 -0
  137. package/template/social-content/drafts/INDEX.md +21 -0
  138. package/template/social-content/drafts/linkedin/.gitkeep +1 -0
  139. package/template/social-content/drafts/x/.gitkeep +1 -0
  140. package/template/social-content/images/.gitkeep +1 -0
  141. package/template/starter-projects/code-worker/README.md +97 -0
  142. package/template/starter-projects/code-worker/prd.json +45 -0
  143. package/template/starter-projects/personal-assistant/README.md +42 -0
  144. package/template/starter-projects/personal-assistant/prd.json +43 -0
  145. package/template/starter-projects/social-media/README.md +60 -0
  146. package/template/starter-projects/social-media/prd.json +43 -0
  147. package/template/workers/content-brand/README.md +59 -0
  148. package/template/workers/content-brand/skills/messaging-alignment.md +91 -0
  149. package/template/workers/content-brand/skills/tone-check.md +76 -0
  150. package/template/workers/content-brand/skills/voice-analysis.md +68 -0
  151. package/template/workers/content-brand/worker.yaml +81 -0
  152. package/template/workers/content-legal/README.md +80 -0
  153. package/template/workers/content-legal/skills/claim-substantiation.md +150 -0
  154. package/template/workers/content-legal/skills/compliance-scan.md +123 -0
  155. package/template/workers/content-legal/skills/disclaimer-check.md +146 -0
  156. package/template/workers/content-legal/worker.yaml +118 -0
  157. package/template/workers/content-product/README.md +77 -0
  158. package/template/workers/content-product/skills/claim-verification.md +96 -0
  159. package/template/workers/content-product/skills/feature-accuracy.md +117 -0
  160. package/template/workers/content-product/skills/stats-check.md +128 -0
  161. package/template/workers/content-product/worker.yaml +97 -0
  162. package/template/workers/content-sales/README.md +70 -0
  163. package/template/workers/content-sales/skills/conversion-analysis.md +96 -0
  164. package/template/workers/content-sales/skills/cta-audit.md +107 -0
  165. package/template/workers/content-sales/skills/value-prop-check.md +114 -0
  166. package/template/workers/content-sales/worker.yaml +93 -0
  167. package/template/workers/content-shared/cli.ts +242 -0
  168. package/template/workers/content-shared/index.ts +234 -0
  169. package/template/workers/content-shared/lib/accuracy-analyzer.ts +661 -0
  170. package/template/workers/content-shared/lib/analyze.ts +370 -0
  171. package/template/workers/content-shared/lib/brand-analyzer.ts +526 -0
  172. package/template/workers/content-shared/lib/cms-integration.ts +446 -0
  173. package/template/workers/content-shared/lib/compliance-analyzer.ts +655 -0
  174. package/template/workers/content-shared/lib/conversion-analyzer.ts +555 -0
  175. package/template/workers/content-shared/lib/github-integration.ts +582 -0
  176. package/template/workers/content-shared/lib/output.ts +373 -0
  177. package/template/workers/content-shared/lib/parser.ts +771 -0
  178. package/template/workers/content-shared/lib/priority.ts +439 -0
  179. package/template/workers/content-shared/lib/recommendations.ts +512 -0
  180. package/template/workers/content-shared/lib/reporter.ts +749 -0
  181. package/template/workers/content-shared/lib/restructure.ts +664 -0
  182. package/template/workers/content-shared/lib/scorer.ts +140 -0
  183. package/template/workers/content-shared/lib/types.ts +227 -0
  184. package/template/workers/content-shared/lib/variants.ts +595 -0
  185. package/template/workers/content-shared/package.json +51 -0
  186. package/template/workers/content-shared/pnpm-lock.yaml +39 -0
  187. package/template/workers/content-shared/test/sample-page.json +115 -0
  188. package/template/workers/content-shared/tsconfig.json +20 -0
  189. package/template/workers/dev-team/README.md +166 -0
  190. package/template/workers/dev-team/_template.yaml +70 -0
  191. package/template/workers/dev-team/architect/package.json +27 -0
  192. package/template/workers/dev-team/architect/skills/api-design.md +89 -0
  193. package/template/workers/dev-team/architect/skills/refactor-plan.md +96 -0
  194. package/template/workers/dev-team/architect/skills/system-design.md +100 -0
  195. package/template/workers/dev-team/architect/src/index.ts +49 -0
  196. package/template/workers/dev-team/architect/src/mcp-server.ts +122 -0
  197. package/template/workers/dev-team/architect/src/skills/api-design.ts +316 -0
  198. package/template/workers/dev-team/architect/src/skills/refactor-plan.ts +264 -0
  199. package/template/workers/dev-team/architect/src/skills/system-design.ts +212 -0
  200. package/template/workers/dev-team/architect/tsconfig.json +19 -0
  201. package/template/workers/dev-team/architect/worker.yaml +128 -0
  202. package/template/workers/dev-team/backend-dev/package-lock.json +1252 -0
  203. package/template/workers/dev-team/backend-dev/package.json +27 -0
  204. package/template/workers/dev-team/backend-dev/skills/implement-endpoint.md +70 -0
  205. package/template/workers/dev-team/backend-dev/skills/implement-service.md +62 -0
  206. package/template/workers/dev-team/backend-dev/src/index.ts +51 -0
  207. package/template/workers/dev-team/backend-dev/src/mcp-server.ts +109 -0
  208. package/template/workers/dev-team/backend-dev/src/skills/implement-endpoint.ts +122 -0
  209. package/template/workers/dev-team/backend-dev/src/skills/implement-service.ts +126 -0
  210. package/template/workers/dev-team/backend-dev/tsconfig.json +19 -0
  211. package/template/workers/dev-team/backend-dev/worker.yaml +128 -0
  212. package/template/workers/dev-team/code-reviewer/package-lock.json +1080 -0
  213. package/template/workers/dev-team/code-reviewer/package.json +24 -0
  214. package/template/workers/dev-team/code-reviewer/skills/merge-to-production.md +61 -0
  215. package/template/workers/dev-team/code-reviewer/skills/merge-to-staging.md +54 -0
  216. package/template/workers/dev-team/code-reviewer/skills/request-changes.md +63 -0
  217. package/template/workers/dev-team/code-reviewer/skills/review-pr.md +77 -0
  218. package/template/workers/dev-team/code-reviewer/src/index.ts +56 -0
  219. package/template/workers/dev-team/code-reviewer/src/mcp-server.ts +101 -0
  220. package/template/workers/dev-team/code-reviewer/tsconfig.json +19 -0
  221. package/template/workers/dev-team/code-reviewer/worker.yaml +90 -0
  222. package/template/workers/dev-team/database-dev/package.json +22 -0
  223. package/template/workers/dev-team/database-dev/skills/create-schema.md +48 -0
  224. package/template/workers/dev-team/database-dev/src/index.ts +50 -0
  225. package/template/workers/dev-team/database-dev/src/mcp-server.ts +76 -0
  226. package/template/workers/dev-team/database-dev/tsconfig.json +18 -0
  227. package/template/workers/dev-team/database-dev/worker.yaml +90 -0
  228. package/template/workers/dev-team/frontend-dev/package.json +22 -0
  229. package/template/workers/dev-team/frontend-dev/skills/create-component.md +26 -0
  230. package/template/workers/dev-team/frontend-dev/src/index.ts +50 -0
  231. package/template/workers/dev-team/frontend-dev/src/mcp-server.ts +77 -0
  232. package/template/workers/dev-team/frontend-dev/tsconfig.json +18 -0
  233. package/template/workers/dev-team/frontend-dev/worker.yaml +132 -0
  234. package/template/workers/dev-team/infra-dev/package.json +24 -0
  235. package/template/workers/dev-team/infra-dev/skills/add-monitoring.md +73 -0
  236. package/template/workers/dev-team/infra-dev/skills/configure-deployment.md +80 -0
  237. package/template/workers/dev-team/infra-dev/skills/create-dockerfile.md +62 -0
  238. package/template/workers/dev-team/infra-dev/skills/setup-cicd.md +63 -0
  239. package/template/workers/dev-team/infra-dev/src/index.ts +55 -0
  240. package/template/workers/dev-team/infra-dev/src/mcp-server.ts +82 -0
  241. package/template/workers/dev-team/infra-dev/tsconfig.json +19 -0
  242. package/template/workers/dev-team/infra-dev/worker.yaml +92 -0
  243. package/template/workers/dev-team/knowledge-curator/package.json +24 -0
  244. package/template/workers/dev-team/knowledge-curator/skills/curate-troubleshooting.md +63 -0
  245. package/template/workers/dev-team/knowledge-curator/skills/process-learnings.md +61 -0
  246. package/template/workers/dev-team/knowledge-curator/skills/sync-documentation.md +76 -0
  247. package/template/workers/dev-team/knowledge-curator/skills/update-patterns.md +63 -0
  248. package/template/workers/dev-team/knowledge-curator/src/index.ts +53 -0
  249. package/template/workers/dev-team/knowledge-curator/src/mcp-server.ts +92 -0
  250. package/template/workers/dev-team/knowledge-curator/tsconfig.json +19 -0
  251. package/template/workers/dev-team/knowledge-curator/worker.yaml +80 -0
  252. package/template/workers/dev-team/motion-designer/package.json +22 -0
  253. package/template/workers/dev-team/motion-designer/skills/add-animation.md +25 -0
  254. package/template/workers/dev-team/motion-designer/skills/generate-image.md +36 -0
  255. package/template/workers/dev-team/motion-designer/src/index.ts +63 -0
  256. package/template/workers/dev-team/motion-designer/src/mcp-server.ts +79 -0
  257. package/template/workers/dev-team/motion-designer/tsconfig.json +18 -0
  258. package/template/workers/dev-team/motion-designer/worker.yaml +84 -0
  259. package/template/workers/dev-team/product-planner/queue.json +4 -0
  260. package/template/workers/dev-team/product-planner/worker.yaml +220 -0
  261. package/template/workers/dev-team/project-manager/package-lock.json +1252 -0
  262. package/template/workers/dev-team/project-manager/package.json +27 -0
  263. package/template/workers/dev-team/project-manager/skills/create-prd.md +66 -0
  264. package/template/workers/dev-team/project-manager/skills/next-issue.md +51 -0
  265. package/template/workers/dev-team/project-manager/skills/project-status.md +59 -0
  266. package/template/workers/dev-team/project-manager/skills/update-learnings.md +65 -0
  267. package/template/workers/dev-team/project-manager/src/index.ts +54 -0
  268. package/template/workers/dev-team/project-manager/src/mcp-server.ts +207 -0
  269. package/template/workers/dev-team/project-manager/src/skills/create-prd.ts +86 -0
  270. package/template/workers/dev-team/project-manager/src/skills/next-issue.ts +137 -0
  271. package/template/workers/dev-team/project-manager/src/skills/project-status.ts +131 -0
  272. package/template/workers/dev-team/project-manager/src/skills/update-learnings.ts +94 -0
  273. package/template/workers/dev-team/project-manager/tsconfig.json +19 -0
  274. package/template/workers/dev-team/project-manager/worker.yaml +96 -0
  275. package/template/workers/dev-team/qa-tester/package.json +24 -0
  276. package/template/workers/dev-team/qa-tester/skills/create-demo-account.md +36 -0
  277. package/template/workers/dev-team/qa-tester/skills/run-tests.md +36 -0
  278. package/template/workers/dev-team/qa-tester/skills/write-test.md +27 -0
  279. package/template/workers/dev-team/qa-tester/src/index.ts +61 -0
  280. package/template/workers/dev-team/qa-tester/src/mcp-server.ts +88 -0
  281. package/template/workers/dev-team/qa-tester/tsconfig.json +18 -0
  282. package/template/workers/dev-team/qa-tester/worker.yaml +116 -0
  283. package/template/workers/dev-team/task-executor/package-lock.json +1252 -0
  284. package/template/workers/dev-team/task-executor/package.json +27 -0
  285. package/template/workers/dev-team/task-executor/skills/analyze-issue.md +101 -0
  286. package/template/workers/dev-team/task-executor/skills/execute.md +133 -0
  287. package/template/workers/dev-team/task-executor/skills/report-learnings.md +106 -0
  288. package/template/workers/dev-team/task-executor/skills/validate-completion.md +121 -0
  289. package/template/workers/dev-team/task-executor/src/index.ts +54 -0
  290. package/template/workers/dev-team/task-executor/src/mcp-server.ts +139 -0
  291. package/template/workers/dev-team/task-executor/src/skills/analyze-issue.ts +219 -0
  292. package/template/workers/dev-team/task-executor/src/skills/execute.ts +132 -0
  293. package/template/workers/dev-team/task-executor/src/skills/report-learnings.ts +119 -0
  294. package/template/workers/dev-team/task-executor/src/skills/validate-completion.ts +142 -0
  295. package/template/workers/dev-team/task-executor/tsconfig.json +19 -0
  296. package/template/workers/dev-team/task-executor/worker.yaml +110 -0
  297. package/template/workers/registry.yaml +171 -0
  298. package/template/workers/security-scanner/README.md +73 -0
  299. package/template/workers/security-scanner/skills/pre-deploy-check.md +205 -0
  300. package/template/workers/security-scanner/worker.yaml +26 -0
  301. package/template/workspace/checkpoints/.gitkeep +0 -0
  302. package/template/workspace/content-ideas/inbox.jsonl +0 -0
  303. package/template/workspace/drafts/.gitkeep +0 -0
  304. package/template/workspace/learnings/.gitkeep +3 -0
  305. package/template/workspace/orchestrator/.gitkeep +0 -0
  306. package/template/workspace/ralph-test/COMPLETE.md +18 -0
  307. package/template/workspace/ralph-test/hello.txt +2 -0
  308. package/template/workspace/reports/.gitkeep +0 -0
  309. package/template/workspace/scratch/.gitkeep +0 -0
  310. package/template/workspace/threads/.gitkeep +3 -0
@@ -0,0 +1,771 @@
1
+ /**
2
+ * Parser utilities for content analysis workers
3
+ * Handles parsing page content from various formats
4
+ */
5
+
6
+ import { readFileSync } from 'node:fs';
7
+ import type { PageContent, ContentSection, CTA, ImageAsset, PageMeta, AnalysisInput, SectionAnalysis } from './types.js';
8
+
9
+ /**
10
+ * Parse page content from JSON file
11
+ */
12
+ export function parsePageContent(json: unknown): PageContent {
13
+ if (!isValidPageContent(json)) {
14
+ throw new Error('Invalid page content structure');
15
+ }
16
+ return json as PageContent;
17
+ }
18
+
19
+ /**
20
+ * Type guard for PageContent
21
+ */
22
+ function isValidPageContent(obj: unknown): obj is PageContent {
23
+ if (typeof obj !== 'object' || obj === null) return false;
24
+ const content = obj as Record<string, unknown>;
25
+ return (
26
+ typeof content.url === 'string' &&
27
+ typeof content.title === 'string' &&
28
+ Array.isArray(content.sections)
29
+ );
30
+ }
31
+
32
+ /**
33
+ * Extract all text content from a page
34
+ */
35
+ export function extractAllText(page: PageContent): string[] {
36
+ const texts: string[] = [];
37
+
38
+ // Title and meta
39
+ texts.push(page.title);
40
+ if (page.meta.description) texts.push(page.meta.description);
41
+
42
+ // Section content
43
+ for (const section of page.sections) {
44
+ if (section.heading) texts.push(section.heading);
45
+ if (section.subheading) texts.push(section.subheading);
46
+ texts.push(...section.paragraphs);
47
+ if (section.bulletPoints) texts.push(...section.bulletPoints);
48
+ }
49
+
50
+ // CTAs
51
+ for (const cta of page.ctas) {
52
+ texts.push(cta.text);
53
+ }
54
+
55
+ return texts.filter(t => t && t.trim().length > 0);
56
+ }
57
+
58
+ /**
59
+ * Extract headings from page content
60
+ */
61
+ export function extractHeadings(page: PageContent): string[] {
62
+ const headings: string[] = [page.title];
63
+
64
+ for (const section of page.sections) {
65
+ if (section.heading) headings.push(section.heading);
66
+ if (section.subheading) headings.push(section.subheading);
67
+ }
68
+
69
+ return headings.filter(h => h && h.trim().length > 0);
70
+ }
71
+
72
+ /**
73
+ * Extract all CTAs from page
74
+ */
75
+ export function extractCTAs(page: PageContent): CTA[] {
76
+ return page.ctas || [];
77
+ }
78
+
79
+ /**
80
+ * Extract paragraphs by section type
81
+ */
82
+ export function extractParagraphsBySection(
83
+ page: PageContent,
84
+ sectionType: ContentSection['type']
85
+ ): string[] {
86
+ return page.sections
87
+ .filter(s => s.type === sectionType)
88
+ .flatMap(s => s.paragraphs);
89
+ }
90
+
91
+ /**
92
+ * Find sections containing specific keywords
93
+ */
94
+ export function findSectionsWithKeywords(
95
+ page: PageContent,
96
+ keywords: string[]
97
+ ): ContentSection[] {
98
+ const lowerKeywords = keywords.map(k => k.toLowerCase());
99
+
100
+ return page.sections.filter(section => {
101
+ const allText = [
102
+ section.heading || '',
103
+ section.subheading || '',
104
+ ...section.paragraphs,
105
+ ...(section.bulletPoints || [])
106
+ ].join(' ').toLowerCase();
107
+
108
+ return lowerKeywords.some(keyword => allText.includes(keyword));
109
+ });
110
+ }
111
+
112
+ /**
113
+ * Count words in content
114
+ */
115
+ export function countWords(texts: string[]): number {
116
+ return texts.join(' ').split(/\s+/).filter(w => w.length > 0).length;
117
+ }
118
+
119
+ /**
120
+ * Extract sentences from text array
121
+ */
122
+ export function extractSentences(texts: string[]): string[] {
123
+ const combined = texts.join(' ');
124
+ return combined.split(/[.!?]+/).map(s => s.trim()).filter(s => s.length > 0);
125
+ }
126
+
127
+ // ============================================
128
+ // File Parsing Functions (US-006)
129
+ // ============================================
130
+
131
+ /**
132
+ * Read and parse page content from JSON file
133
+ * Supports multiple formats: standard PageContent, CMS format, or site audit format
134
+ */
135
+ export function parsePageFile(filePath: string): PageContent {
136
+ const content = readFileSync(filePath, 'utf-8');
137
+ const json = JSON.parse(content);
138
+
139
+ // Try standard format first
140
+ if (isValidPageContent(json)) {
141
+ return json as PageContent;
142
+ }
143
+
144
+ // Try site audit format (has meta.url, content.headings/paragraphs)
145
+ if (isSiteAuditFormat(json)) {
146
+ return parseSiteAuditContent(json);
147
+ }
148
+
149
+ // Try CMS format
150
+ return parseCMSContent(json);
151
+ }
152
+
153
+ /**
154
+ * Check if JSON is site audit format
155
+ */
156
+ function isSiteAuditFormat(obj: unknown): boolean {
157
+ if (typeof obj !== 'object' || obj === null) return false;
158
+ const data = obj as Record<string, unknown>;
159
+ return (
160
+ typeof data.meta === 'object' &&
161
+ data.meta !== null &&
162
+ typeof data.content === 'object' &&
163
+ data.content !== null
164
+ );
165
+ }
166
+
167
+ /**
168
+ * Parse site audit format into PageContent
169
+ * Site audit format: { meta: { url, title, description }, content: { headings, paragraphs, lists, ctas } }
170
+ */
171
+ export function parseSiteAuditContent(json: unknown): PageContent {
172
+ if (typeof json !== 'object' || json === null) {
173
+ throw new Error('Invalid site audit content: expected object');
174
+ }
175
+
176
+ const data = json as Record<string, unknown>;
177
+ const meta = data.meta as Record<string, unknown> | undefined;
178
+ const content = data.content as Record<string, unknown> | undefined;
179
+ const navigation = data.navigation as Record<string, unknown> | undefined;
180
+ const media = data.media as Record<string, unknown> | undefined;
181
+
182
+ if (!meta || !content) {
183
+ throw new Error('Invalid site audit content: missing meta or content');
184
+ }
185
+
186
+ // Extract URL and title from meta
187
+ const url = (meta.url ?? meta.path ?? '/') as string;
188
+ const title = (meta.title ?? 'Untitled') as string;
189
+
190
+ // Build page meta
191
+ const pageMeta: PageMeta = {
192
+ description: meta.description as string | undefined,
193
+ ogTitle: meta.ogTags && typeof meta.ogTags === 'object'
194
+ ? (meta.ogTags as Record<string, unknown>).title as string | undefined
195
+ : undefined,
196
+ ogDescription: meta.ogTags && typeof meta.ogTags === 'object'
197
+ ? (meta.ogTags as Record<string, unknown>).description as string | undefined
198
+ : undefined,
199
+ ogImage: meta.ogTags && typeof meta.ogTags === 'object'
200
+ ? (meta.ogTags as Record<string, unknown>).image as string | undefined
201
+ : undefined,
202
+ };
203
+
204
+ // Group headings and paragraphs into sections
205
+ const headings = Array.isArray(content.headings) ? content.headings : [];
206
+ const paragraphs = Array.isArray(content.paragraphs) ? content.paragraphs : [];
207
+ const lists = Array.isArray(content.lists) ? content.lists : [];
208
+
209
+ // Build sections from headings
210
+ const sections: ContentSection[] = [];
211
+ let currentSection: ContentSection | null = null;
212
+ let paragraphIndex = 0;
213
+
214
+ for (const heading of headings) {
215
+ if (typeof heading !== 'object' || heading === null) continue;
216
+ const h = heading as Record<string, unknown>;
217
+ const level = h.level as number;
218
+ const text = h.text as string;
219
+
220
+ if (level === 1 || level === 2) {
221
+ // Start a new section
222
+ if (currentSection) {
223
+ sections.push(currentSection);
224
+ }
225
+
226
+ // Determine section type from heading
227
+ const type = inferSectionType(text);
228
+
229
+ currentSection = {
230
+ id: `section-${sections.length}`,
231
+ type,
232
+ heading: text,
233
+ paragraphs: [],
234
+ };
235
+ } else if (level === 3 && currentSection) {
236
+ // Add as subheading or bullet point
237
+ if (!currentSection.bulletPoints) {
238
+ currentSection.bulletPoints = [];
239
+ }
240
+ currentSection.bulletPoints.push(text);
241
+ }
242
+ }
243
+
244
+ // Push last section
245
+ if (currentSection) {
246
+ sections.push(currentSection);
247
+ }
248
+
249
+ // Distribute paragraphs across sections
250
+ const paragraphsPerSection = Math.ceil(paragraphs.length / Math.max(sections.length, 1));
251
+ for (let i = 0; i < sections.length; i++) {
252
+ const start = i * paragraphsPerSection;
253
+ const end = Math.min(start + paragraphsPerSection, paragraphs.length);
254
+ sections[i].paragraphs = paragraphs.slice(start, end).filter(
255
+ (p): p is string => typeof p === 'string'
256
+ );
257
+ }
258
+
259
+ // If no sections created, create one with all paragraphs
260
+ if (sections.length === 0) {
261
+ sections.push({
262
+ id: 'section-0',
263
+ type: 'content',
264
+ heading: title,
265
+ paragraphs: paragraphs.filter((p): p is string => typeof p === 'string'),
266
+ });
267
+ }
268
+
269
+ // Add bullet points from lists
270
+ for (const list of lists) {
271
+ if (typeof list !== 'object' || list === null) continue;
272
+ const l = list as Record<string, unknown>;
273
+ const items = Array.isArray(l.items) ? l.items : [];
274
+
275
+ // Add to the last section or create one
276
+ const targetSection = sections[sections.length - 1];
277
+ if (targetSection) {
278
+ if (!targetSection.bulletPoints) {
279
+ targetSection.bulletPoints = [];
280
+ }
281
+ targetSection.bulletPoints.push(
282
+ ...items.filter((i): i is string => typeof i === 'string')
283
+ );
284
+ }
285
+ }
286
+
287
+ // Extract CTAs
288
+ const ctaData = Array.isArray(content.ctas) ? content.ctas : [];
289
+ const ctas: CTA[] = ctaData
290
+ .filter((c): c is Record<string, unknown> => typeof c === 'object' && c !== null)
291
+ .map((c, index) => ({
292
+ text: (c.text ?? 'Click here') as string,
293
+ href: c.href as string | undefined,
294
+ type: 'primary' as const,
295
+ location: `cta-${index}`,
296
+ }))
297
+ .filter(c => c.text && c.text !== 'Manage Preferences' && c.text !== 'Accept All');
298
+
299
+ // Extract images
300
+ const imageData = media && Array.isArray(media.images) ? media.images : [];
301
+ const images: ImageAsset[] = imageData
302
+ .filter((i): i is Record<string, unknown> => typeof i === 'object' && i !== null)
303
+ .map(i => ({
304
+ src: (i.src ?? '') as string,
305
+ alt: i.alt as string | undefined,
306
+ context: '',
307
+ }));
308
+
309
+ return {
310
+ url,
311
+ title,
312
+ meta: pageMeta,
313
+ sections,
314
+ ctas,
315
+ images,
316
+ extractedAt: (meta.capturedAt as string) ?? new Date().toISOString(),
317
+ };
318
+ }
319
+
320
+ /**
321
+ * Infer section type from heading text
322
+ */
323
+ function inferSectionType(heading: string): ContentSection['type'] {
324
+ const lower = heading.toLowerCase();
325
+
326
+ if (lower.includes('hero') || lower.includes('welcome') || lower.includes('enterprise ai')) {
327
+ return 'hero';
328
+ }
329
+ if (lower.includes('feature') || lower.includes('why') || lower.includes('benefit')) {
330
+ return 'features';
331
+ }
332
+ if (lower.includes('testimonial') || lower.includes('customer') || lower.includes('review')) {
333
+ return 'testimonials';
334
+ }
335
+ if (lower.includes('pricing') || lower.includes('plan') || lower.includes('package')) {
336
+ return 'pricing';
337
+ }
338
+ if (lower.includes('start') || lower.includes('contact') || lower.includes('demo') || lower.includes('ready')) {
339
+ return 'cta';
340
+ }
341
+ if (lower.includes('security') || lower.includes('compliance') || lower.includes('certification')) {
342
+ return 'features';
343
+ }
344
+ if (lower.includes('technical') || lower.includes('specification') || lower.includes('spec')) {
345
+ return 'content';
346
+ }
347
+
348
+ return 'content';
349
+ }
350
+
351
+ /**
352
+ * Parse content from CMS JSON format (matches example-company-cms schema)
353
+ * Handles the structure: { page: { slug, title, sections, ... } }
354
+ */
355
+ export function parseCMSContent(json: unknown): PageContent {
356
+ if (typeof json !== 'object' || json === null) {
357
+ throw new Error('Invalid CMS content: expected object');
358
+ }
359
+
360
+ const data = json as Record<string, unknown>;
361
+
362
+ // Handle nested page structure from CMS
363
+ const pageData = data.page ?? data;
364
+
365
+ if (typeof pageData !== 'object' || pageData === null) {
366
+ throw new Error('Invalid CMS content: missing page data');
367
+ }
368
+
369
+ const page = pageData as Record<string, unknown>;
370
+
371
+ // Extract URL/slug
372
+ const slug = (page.slug ?? page.url ?? 'unknown') as string;
373
+ const url = slug.startsWith('http') ? slug : `/${slug}`;
374
+
375
+ // Extract title
376
+ const title = (page.title ?? page.name ?? 'Untitled') as string;
377
+
378
+ // Extract meta
379
+ const meta: PageMeta = {
380
+ description: page.description as string | undefined,
381
+ keywords: page.keywords as string[] | undefined,
382
+ ogTitle: page.ogTitle as string | undefined,
383
+ ogDescription: page.ogDescription as string | undefined,
384
+ ogImage: page.ogImage as string | undefined,
385
+ };
386
+
387
+ // Parse sections
388
+ const rawSections = Array.isArray(page.sections) ? page.sections : [];
389
+ const sections: ContentSection[] = rawSections.map((s: unknown, index: number) =>
390
+ parseCMSSection(s, index)
391
+ );
392
+
393
+ // Parse CTAs
394
+ const ctas: CTA[] = extractCTAsFromCMS(page);
395
+
396
+ // Parse images
397
+ const images: ImageAsset[] = extractImagesFromCMS(page);
398
+
399
+ return {
400
+ url,
401
+ title,
402
+ meta,
403
+ sections,
404
+ ctas,
405
+ images,
406
+ extractedAt: new Date().toISOString(),
407
+ };
408
+ }
409
+
410
+ /**
411
+ * Parse a single section from CMS format
412
+ */
413
+ function parseCMSSection(section: unknown, index: number): ContentSection {
414
+ if (typeof section !== 'object' || section === null) {
415
+ return {
416
+ id: `section-${index}`,
417
+ type: 'other',
418
+ paragraphs: [],
419
+ };
420
+ }
421
+
422
+ const s = section as Record<string, unknown>;
423
+
424
+ // Determine section type
425
+ const type = mapSectionType(s.type as string | undefined, s);
426
+
427
+ // Extract paragraphs from various possible fields
428
+ const paragraphs: string[] = [];
429
+
430
+ if (typeof s.content === 'string') {
431
+ paragraphs.push(s.content);
432
+ } else if (Array.isArray(s.content)) {
433
+ paragraphs.push(...s.content.filter((c): c is string => typeof c === 'string'));
434
+ }
435
+
436
+ if (typeof s.body === 'string') {
437
+ paragraphs.push(s.body);
438
+ }
439
+
440
+ if (typeof s.text === 'string') {
441
+ paragraphs.push(s.text);
442
+ }
443
+
444
+ if (typeof s.description === 'string') {
445
+ paragraphs.push(s.description);
446
+ }
447
+
448
+ // Extract bullet points
449
+ const bulletPoints: string[] = [];
450
+ if (Array.isArray(s.bullets)) {
451
+ bulletPoints.push(...s.bullets.filter((b): b is string => typeof b === 'string'));
452
+ }
453
+ if (Array.isArray(s.items)) {
454
+ for (const item of s.items) {
455
+ if (typeof item === 'string') {
456
+ bulletPoints.push(item);
457
+ } else if (typeof item === 'object' && item !== null) {
458
+ const obj = item as Record<string, unknown>;
459
+ if (typeof obj.text === 'string') bulletPoints.push(obj.text);
460
+ if (typeof obj.title === 'string') bulletPoints.push(obj.title);
461
+ }
462
+ }
463
+ }
464
+
465
+ return {
466
+ id: (s.id ?? s.key ?? `section-${index}`) as string,
467
+ type,
468
+ heading: s.heading as string | undefined ?? s.title as string | undefined,
469
+ subheading: s.subheading as string | undefined ?? s.subtitle as string | undefined,
470
+ paragraphs,
471
+ bulletPoints: bulletPoints.length > 0 ? bulletPoints : undefined,
472
+ };
473
+ }
474
+
475
+ /**
476
+ * Map CMS section type to our standard types
477
+ */
478
+ function mapSectionType(
479
+ type: string | undefined,
480
+ section: Record<string, unknown>
481
+ ): ContentSection['type'] {
482
+ if (!type) {
483
+ // Infer from content
484
+ if (section.hero || section.headline) return 'hero';
485
+ if (section.features || section.featureList) return 'features';
486
+ if (section.testimonials || section.quotes) return 'testimonials';
487
+ if (section.pricing || section.plans) return 'pricing';
488
+ return 'content';
489
+ }
490
+
491
+ const normalized = type.toLowerCase();
492
+
493
+ if (normalized.includes('hero')) return 'hero';
494
+ if (normalized.includes('feature')) return 'features';
495
+ if (normalized.includes('testimonial') || normalized.includes('quote')) return 'testimonials';
496
+ if (normalized.includes('pricing') || normalized.includes('plan')) return 'pricing';
497
+ if (normalized.includes('cta') || normalized.includes('action')) return 'cta';
498
+ if (normalized.includes('footer')) return 'footer';
499
+ if (normalized.includes('header') || normalized.includes('nav')) return 'header';
500
+
501
+ return 'content';
502
+ }
503
+
504
+ /**
505
+ * Extract CTAs from CMS page data
506
+ */
507
+ function extractCTAsFromCMS(page: Record<string, unknown>): CTA[] {
508
+ const ctas: CTA[] = [];
509
+
510
+ // Check for explicit CTAs array
511
+ if (Array.isArray(page.ctas)) {
512
+ for (const cta of page.ctas) {
513
+ if (typeof cta === 'object' && cta !== null) {
514
+ const c = cta as Record<string, unknown>;
515
+ ctas.push({
516
+ text: (c.text ?? c.label ?? 'Click here') as string,
517
+ href: c.href as string | undefined ?? c.url as string | undefined ?? c.link as string | undefined,
518
+ type: mapCTAType(c.type as string | undefined, c.variant as string | undefined),
519
+ location: (c.location ?? c.section ?? 'unknown') as string,
520
+ });
521
+ }
522
+ }
523
+ }
524
+
525
+ // Extract CTAs from sections
526
+ if (Array.isArray(page.sections)) {
527
+ for (const section of page.sections) {
528
+ if (typeof section !== 'object' || section === null) continue;
529
+ const s = section as Record<string, unknown>;
530
+
531
+ // Check for CTA in section
532
+ if (s.cta && typeof s.cta === 'object') {
533
+ const c = s.cta as Record<string, unknown>;
534
+ ctas.push({
535
+ text: (c.text ?? c.label ?? 'Click here') as string,
536
+ href: c.href as string | undefined ?? c.url as string | undefined,
537
+ type: mapCTAType(c.type as string | undefined, c.variant as string | undefined),
538
+ location: (s.id ?? s.type ?? 'section') as string,
539
+ });
540
+ }
541
+
542
+ // Check for buttons array
543
+ if (Array.isArray(s.buttons)) {
544
+ for (const btn of s.buttons) {
545
+ if (typeof btn === 'object' && btn !== null) {
546
+ const b = btn as Record<string, unknown>;
547
+ ctas.push({
548
+ text: (b.text ?? b.label ?? 'Click here') as string,
549
+ href: b.href as string | undefined ?? b.url as string | undefined,
550
+ type: mapCTAType(b.type as string | undefined, b.variant as string | undefined),
551
+ location: (s.id ?? s.type ?? 'section') as string,
552
+ });
553
+ }
554
+ }
555
+ }
556
+ }
557
+ }
558
+
559
+ return ctas;
560
+ }
561
+
562
+ /**
563
+ * Map CTA type from various formats
564
+ */
565
+ function mapCTAType(type?: string, variant?: string): CTA['type'] {
566
+ const t = (type ?? variant ?? '').toLowerCase();
567
+ if (t.includes('primary') || t.includes('main')) return 'primary';
568
+ if (t.includes('secondary') || t.includes('outline')) return 'secondary';
569
+ if (t.includes('link') || t.includes('text')) return 'link';
570
+ return 'primary'; // Default to primary
571
+ }
572
+
573
+ /**
574
+ * Extract images from CMS page data
575
+ */
576
+ function extractImagesFromCMS(page: Record<string, unknown>): ImageAsset[] {
577
+ const images: ImageAsset[] = [];
578
+
579
+ // Check for explicit images array
580
+ if (Array.isArray(page.images)) {
581
+ for (const img of page.images) {
582
+ if (typeof img === 'object' && img !== null) {
583
+ const i = img as Record<string, unknown>;
584
+ images.push({
585
+ src: (i.src ?? i.url ?? '') as string,
586
+ alt: i.alt as string | undefined,
587
+ context: (i.context ?? i.caption ?? '') as string,
588
+ });
589
+ }
590
+ }
591
+ }
592
+
593
+ // Extract images from sections
594
+ if (Array.isArray(page.sections)) {
595
+ for (const section of page.sections) {
596
+ if (typeof section !== 'object' || section === null) continue;
597
+ const s = section as Record<string, unknown>;
598
+
599
+ if (s.image && typeof s.image === 'object') {
600
+ const i = s.image as Record<string, unknown>;
601
+ images.push({
602
+ src: (i.src ?? i.url ?? '') as string,
603
+ alt: i.alt as string | undefined,
604
+ context: (s.heading ?? s.title ?? s.type ?? 'section') as string,
605
+ });
606
+ }
607
+
608
+ if (typeof s.backgroundImage === 'string') {
609
+ images.push({
610
+ src: s.backgroundImage,
611
+ alt: undefined,
612
+ context: `background: ${(s.type ?? 'section') as string}`,
613
+ });
614
+ }
615
+ }
616
+ }
617
+
618
+ return images;
619
+ }
620
+
621
+ // ============================================
622
+ // Analysis Input Extraction (US-006)
623
+ // ============================================
624
+
625
+ /**
626
+ * Extract structured content for analysis from PageContent
627
+ */
628
+ export function extractAnalysisInput(page: PageContent): AnalysisInput {
629
+ const headings = extractHeadings(page);
630
+ const paragraphs = extractAllParagraphs(page);
631
+ const stats = extractStats(page);
632
+ const claims = extractClaims(page);
633
+ const sections = extractSectionAnalyses(page);
634
+
635
+ // Extract slug from URL
636
+ const pageSlug = extractSlug(page.url);
637
+
638
+ return {
639
+ pageSlug,
640
+ title: page.title,
641
+ headings,
642
+ paragraphs,
643
+ ctas: page.ctas.map(cta => ({
644
+ text: cta.text,
645
+ link: cta.href ?? '',
646
+ })),
647
+ stats,
648
+ claims,
649
+ sections,
650
+ };
651
+ }
652
+
653
+ /**
654
+ * Extract all paragraphs from page content
655
+ */
656
+ function extractAllParagraphs(page: PageContent): string[] {
657
+ const paragraphs: string[] = [];
658
+
659
+ for (const section of page.sections) {
660
+ paragraphs.push(...section.paragraphs);
661
+ if (section.bulletPoints) {
662
+ paragraphs.push(...section.bulletPoints);
663
+ }
664
+ }
665
+
666
+ return paragraphs.filter(p => p.trim().length > 0);
667
+ }
668
+
669
+ /**
670
+ * Extract statistics from page content
671
+ * Looks for patterns like "50%", "$1M", "100+", "10x", etc.
672
+ */
673
+ function extractStats(page: PageContent): { value: string; label: string }[] {
674
+ const stats: { value: string; label: string }[] = [];
675
+ const statPatterns = [
676
+ // Percentages: 50%, 99.9%
677
+ /(\d+(?:\.\d+)?%)/g,
678
+ // Dollar amounts: $1M, $500K, $1,000
679
+ /(\$[\d,.]+[KMB]?)/gi,
680
+ // Multipliers: 10x, 2.5x
681
+ /(\d+(?:\.\d+)?x)/gi,
682
+ // Large numbers with suffixes: 100+, 1000+, 50K+
683
+ /(\d+(?:,\d{3})*[KMB]?\+?)/g,
684
+ // Time-based: 24/7, 99.9% uptime
685
+ /(24\/7|\d+(?:\.\d+)?%\s*uptime)/gi,
686
+ ];
687
+
688
+ const allText = extractAllText(page);
689
+
690
+ for (const text of allText) {
691
+ for (const pattern of statPatterns) {
692
+ const matches = text.matchAll(pattern);
693
+ for (const match of matches) {
694
+ const value = match[1];
695
+ // Extract surrounding context as label
696
+ const startIdx = Math.max(0, match.index! - 30);
697
+ const endIdx = Math.min(text.length, match.index! + match[0].length + 30);
698
+ const context = text.slice(startIdx, endIdx).trim();
699
+
700
+ // Avoid duplicates
701
+ if (!stats.some(s => s.value === value)) {
702
+ stats.push({ value, label: context });
703
+ }
704
+ }
705
+ }
706
+ }
707
+
708
+ return stats;
709
+ }
710
+
711
+ /**
712
+ * Extract claims from page content
713
+ * Looks for assertive statements, superlatives, comparisons
714
+ */
715
+ function extractClaims(page: PageContent): string[] {
716
+ const claims: string[] = [];
717
+ const sentences = extractSentences(extractAllText(page));
718
+
719
+ // Patterns that indicate claims
720
+ const claimIndicators = [
721
+ /\b(best|leading|top|#1|number one|premier|fastest|most|only|first)\b/i,
722
+ /\b(guaranteed|proven|certified|trusted|secure|compliant)\b/i,
723
+ /\b(save|reduce|increase|improve|boost|grow|eliminate)\b/i,
724
+ /\b(never|always|every|all|100%)\b/i,
725
+ /\b(award-winning|industry-leading|world-class|enterprise-grade)\b/i,
726
+ /\b(more than|over|up to|\d+[x%])\b/i,
727
+ ];
728
+
729
+ for (const sentence of sentences) {
730
+ const isClaimLike = claimIndicators.some(pattern => pattern.test(sentence));
731
+ if (isClaimLike && sentence.length > 20 && sentence.length < 500) {
732
+ claims.push(sentence);
733
+ }
734
+ }
735
+
736
+ return claims;
737
+ }
738
+
739
+ /**
740
+ * Extract section analyses from page content
741
+ */
742
+ function extractSectionAnalyses(page: PageContent): SectionAnalysis[] {
743
+ return page.sections.map(section => {
744
+ const content = [
745
+ ...section.paragraphs,
746
+ ...(section.bulletPoints ?? []),
747
+ ];
748
+
749
+ const wordCount = countWords(content);
750
+
751
+ return {
752
+ id: section.id,
753
+ type: section.type,
754
+ heading: section.heading ?? '',
755
+ content,
756
+ wordCount,
757
+ };
758
+ });
759
+ }
760
+
761
+ /**
762
+ * Extract slug from URL
763
+ */
764
+ function extractSlug(url: string): string {
765
+ // Remove protocol and domain if present
766
+ let slug = url.replace(/^https?:\/\/[^\/]+/, '');
767
+ // Remove leading/trailing slashes
768
+ slug = slug.replace(/^\/+|\/+$/g, '');
769
+ // Use 'home' for empty slugs
770
+ return slug || 'home';
771
+ }