@nuasite/cms 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (269) hide show
  1. package/README.md +237 -0
  2. package/dist/src/build-processor.d.ts +20 -0
  3. package/dist/src/build-processor.d.ts.map +1 -0
  4. package/dist/src/collection-scanner.d.ts +6 -0
  5. package/dist/src/collection-scanner.d.ts.map +1 -0
  6. package/dist/src/component-registry.d.ts +63 -0
  7. package/dist/src/component-registry.d.ts.map +1 -0
  8. package/dist/src/config.d.ts +24 -0
  9. package/dist/src/config.d.ts.map +1 -0
  10. package/dist/src/dev-middleware.d.ts +20 -0
  11. package/dist/src/dev-middleware.d.ts.map +1 -0
  12. package/dist/src/editor/ai.d.ts +60 -0
  13. package/dist/src/editor/ai.d.ts.map +1 -0
  14. package/dist/src/editor/api.d.ts +140 -0
  15. package/dist/src/editor/api.d.ts.map +1 -0
  16. package/dist/src/editor/color-utils.d.ts +106 -0
  17. package/dist/src/editor/color-utils.d.ts.map +1 -0
  18. package/dist/src/editor/components/ai-chat.d.ts +11 -0
  19. package/dist/src/editor/components/ai-chat.d.ts.map +1 -0
  20. package/dist/src/editor/components/ai-tooltip.d.ts +12 -0
  21. package/dist/src/editor/components/ai-tooltip.d.ts.map +1 -0
  22. package/dist/src/editor/components/attribute-editor.d.ts +5 -0
  23. package/dist/src/editor/components/attribute-editor.d.ts.map +1 -0
  24. package/dist/src/editor/components/block-editor.d.ts +12 -0
  25. package/dist/src/editor/components/block-editor.d.ts.map +1 -0
  26. package/dist/src/editor/components/collections-browser.d.ts +2 -0
  27. package/dist/src/editor/components/collections-browser.d.ts.map +1 -0
  28. package/dist/src/editor/components/color-toolbar.d.ts +12 -0
  29. package/dist/src/editor/components/color-toolbar.d.ts.map +1 -0
  30. package/dist/src/editor/components/confirm-dialog.d.ts +2 -0
  31. package/dist/src/editor/components/confirm-dialog.d.ts.map +1 -0
  32. package/dist/src/editor/components/create-page-modal.d.ts +2 -0
  33. package/dist/src/editor/components/create-page-modal.d.ts.map +1 -0
  34. package/dist/src/editor/components/editable-highlights.d.ts +9 -0
  35. package/dist/src/editor/components/editable-highlights.d.ts.map +1 -0
  36. package/dist/src/editor/components/error-boundary.d.ts +32 -0
  37. package/dist/src/editor/components/error-boundary.d.ts.map +1 -0
  38. package/dist/src/editor/components/fields.d.ts +75 -0
  39. package/dist/src/editor/components/fields.d.ts.map +1 -0
  40. package/dist/src/editor/components/frontmatter-fields.d.ts +29 -0
  41. package/dist/src/editor/components/frontmatter-fields.d.ts.map +1 -0
  42. package/dist/src/editor/components/highlight-overlay.d.ts +64 -0
  43. package/dist/src/editor/components/highlight-overlay.d.ts.map +1 -0
  44. package/dist/src/editor/components/image-overlay.d.ts +12 -0
  45. package/dist/src/editor/components/image-overlay.d.ts.map +1 -0
  46. package/dist/src/editor/components/markdown-editor-overlay.d.ts +6 -0
  47. package/dist/src/editor/components/markdown-editor-overlay.d.ts.map +1 -0
  48. package/dist/src/editor/components/markdown-inline-editor.d.ts +10 -0
  49. package/dist/src/editor/components/markdown-inline-editor.d.ts.map +1 -0
  50. package/dist/src/editor/components/media-library.d.ts +2 -0
  51. package/dist/src/editor/components/media-library.d.ts.map +1 -0
  52. package/dist/src/editor/components/outline.d.ts +21 -0
  53. package/dist/src/editor/components/outline.d.ts.map +1 -0
  54. package/dist/src/editor/components/redirect-countdown.d.ts +2 -0
  55. package/dist/src/editor/components/redirect-countdown.d.ts.map +1 -0
  56. package/dist/src/editor/components/seo-editor.d.ts +2 -0
  57. package/dist/src/editor/components/seo-editor.d.ts.map +1 -0
  58. package/dist/src/editor/components/text-style-toolbar.d.ts +8 -0
  59. package/dist/src/editor/components/text-style-toolbar.d.ts.map +1 -0
  60. package/dist/src/editor/components/toast/toast-container.d.ts +7 -0
  61. package/dist/src/editor/components/toast/toast-container.d.ts.map +1 -0
  62. package/dist/src/editor/components/toast/toast.d.ts +7 -0
  63. package/dist/src/editor/components/toast/toast.d.ts.map +1 -0
  64. package/dist/src/editor/components/toast/types.d.ts +7 -0
  65. package/dist/src/editor/components/toast/types.d.ts.map +1 -0
  66. package/dist/src/editor/components/toolbar.d.ts +21 -0
  67. package/dist/src/editor/components/toolbar.d.ts.map +1 -0
  68. package/dist/src/editor/config.d.ts +4 -0
  69. package/dist/src/editor/config.d.ts.map +1 -0
  70. package/dist/src/editor/constants.d.ts +101 -0
  71. package/dist/src/editor/constants.d.ts.map +1 -0
  72. package/dist/src/editor/context.d.ts +14 -0
  73. package/dist/src/editor/context.d.ts.map +1 -0
  74. package/dist/src/editor/dom.d.ts +77 -0
  75. package/dist/src/editor/dom.d.ts.map +1 -0
  76. package/dist/src/editor/editor.d.ts +64 -0
  77. package/dist/src/editor/editor.d.ts.map +1 -0
  78. package/dist/src/editor/history.d.ts +20 -0
  79. package/dist/src/editor/history.d.ts.map +1 -0
  80. package/dist/src/editor/hooks/index.d.ts +14 -0
  81. package/dist/src/editor/hooks/index.d.ts.map +1 -0
  82. package/dist/src/editor/hooks/useAIHandlers.d.ts +22 -0
  83. package/dist/src/editor/hooks/useAIHandlers.d.ts.map +1 -0
  84. package/dist/src/editor/hooks/useBlockEditorHandlers.d.ts +18 -0
  85. package/dist/src/editor/hooks/useBlockEditorHandlers.d.ts.map +1 -0
  86. package/dist/src/editor/hooks/useElementDetection.d.ts +26 -0
  87. package/dist/src/editor/hooks/useElementDetection.d.ts.map +1 -0
  88. package/dist/src/editor/hooks/useImageHoverDetection.d.ts +12 -0
  89. package/dist/src/editor/hooks/useImageHoverDetection.d.ts.map +1 -0
  90. package/dist/src/editor/hooks/useTextSelection.d.ts +23 -0
  91. package/dist/src/editor/hooks/useTextSelection.d.ts.map +1 -0
  92. package/dist/src/editor/hooks/useTooltipState.d.ts +19 -0
  93. package/dist/src/editor/hooks/useTooltipState.d.ts.map +1 -0
  94. package/dist/src/editor/hooks/utils.d.ts +32 -0
  95. package/dist/src/editor/hooks/utils.d.ts.map +1 -0
  96. package/dist/src/editor/index.d.ts +12 -0
  97. package/dist/src/editor/index.d.ts.map +1 -0
  98. package/dist/src/editor/lib/cn.d.ts +3 -0
  99. package/dist/src/editor/lib/cn.d.ts.map +1 -0
  100. package/dist/src/editor/manifest.d.ts +19 -0
  101. package/dist/src/editor/manifest.d.ts.map +1 -0
  102. package/dist/src/editor/markdown-api.d.ts +36 -0
  103. package/dist/src/editor/markdown-api.d.ts.map +1 -0
  104. package/dist/src/editor/signals.d.ts +242 -0
  105. package/dist/src/editor/signals.d.ts.map +1 -0
  106. package/dist/src/editor/storage.d.ts +27 -0
  107. package/dist/src/editor/storage.d.ts.map +1 -0
  108. package/dist/src/editor/text-styling.d.ts +350 -0
  109. package/dist/src/editor/text-styling.d.ts.map +1 -0
  110. package/dist/src/editor/themes.d.ts +38 -0
  111. package/dist/src/editor/themes.d.ts.map +1 -0
  112. package/dist/src/editor/types.d.ts +454 -0
  113. package/dist/src/editor/types.d.ts.map +1 -0
  114. package/dist/src/error-collector.d.ts +56 -0
  115. package/dist/src/error-collector.d.ts.map +1 -0
  116. package/dist/src/handlers/component-ops.d.ts +34 -0
  117. package/dist/src/handlers/component-ops.d.ts.map +1 -0
  118. package/dist/src/handlers/markdown-ops.d.ts +41 -0
  119. package/dist/src/handlers/markdown-ops.d.ts.map +1 -0
  120. package/dist/src/handlers/request-utils.d.ts +20 -0
  121. package/dist/src/handlers/request-utils.d.ts.map +1 -0
  122. package/dist/src/handlers/source-writer.d.ts +51 -0
  123. package/dist/src/handlers/source-writer.d.ts.map +1 -0
  124. package/dist/src/html-processor.d.ts +63 -0
  125. package/dist/src/html-processor.d.ts.map +1 -0
  126. package/dist/src/index.d.ts +41 -0
  127. package/dist/src/index.d.ts.map +1 -0
  128. package/dist/src/manifest-writer.d.ts +111 -0
  129. package/dist/src/manifest-writer.d.ts.map +1 -0
  130. package/dist/src/media/contember.d.ts +15 -0
  131. package/dist/src/media/contember.d.ts.map +1 -0
  132. package/dist/src/media/local.d.ts +9 -0
  133. package/dist/src/media/local.d.ts.map +1 -0
  134. package/dist/src/media/s3.d.ts +12 -0
  135. package/dist/src/media/s3.d.ts.map +1 -0
  136. package/dist/src/media/types.d.ts +40 -0
  137. package/dist/src/media/types.d.ts.map +1 -0
  138. package/dist/src/preview-generator.d.ts +19 -0
  139. package/dist/src/preview-generator.d.ts.map +1 -0
  140. package/dist/src/seo-processor.d.ts +23 -0
  141. package/dist/src/seo-processor.d.ts.map +1 -0
  142. package/dist/src/source-finder/ast-extractors.d.ts +35 -0
  143. package/dist/src/source-finder/ast-extractors.d.ts.map +1 -0
  144. package/dist/src/source-finder/ast-parser.d.ts +16 -0
  145. package/dist/src/source-finder/ast-parser.d.ts.map +1 -0
  146. package/dist/src/source-finder/cache.d.ts +18 -0
  147. package/dist/src/source-finder/cache.d.ts.map +1 -0
  148. package/dist/src/source-finder/collection-finder.d.ts +29 -0
  149. package/dist/src/source-finder/collection-finder.d.ts.map +1 -0
  150. package/dist/src/source-finder/cross-file-tracker.d.ts +39 -0
  151. package/dist/src/source-finder/cross-file-tracker.d.ts.map +1 -0
  152. package/dist/src/source-finder/element-finder.d.ts +42 -0
  153. package/dist/src/source-finder/element-finder.d.ts.map +1 -0
  154. package/dist/src/source-finder/image-finder.d.ts +24 -0
  155. package/dist/src/source-finder/image-finder.d.ts.map +1 -0
  156. package/dist/src/source-finder/index.d.ts +9 -0
  157. package/dist/src/source-finder/index.d.ts.map +1 -0
  158. package/dist/src/source-finder/search-index.d.ts +27 -0
  159. package/dist/src/source-finder/search-index.d.ts.map +1 -0
  160. package/dist/src/source-finder/snippet-utils.d.ts +90 -0
  161. package/dist/src/source-finder/snippet-utils.d.ts.map +1 -0
  162. package/dist/src/source-finder/source-lookup.d.ts +16 -0
  163. package/dist/src/source-finder/source-lookup.d.ts.map +1 -0
  164. package/dist/src/source-finder/types.d.ts +167 -0
  165. package/dist/src/source-finder/types.d.ts.map +1 -0
  166. package/dist/src/source-finder/variable-extraction.d.ts +37 -0
  167. package/dist/src/source-finder/variable-extraction.d.ts.map +1 -0
  168. package/dist/src/tailwind-colors.d.ts +54 -0
  169. package/dist/src/tailwind-colors.d.ts.map +1 -0
  170. package/dist/src/tsconfig.tsbuildinfo +1 -0
  171. package/dist/src/types.d.ts +367 -0
  172. package/dist/src/types.d.ts.map +1 -0
  173. package/dist/src/utils.d.ts +61 -0
  174. package/dist/src/utils.d.ts.map +1 -0
  175. package/dist/src/vite-plugin.d.ts +14 -0
  176. package/dist/src/vite-plugin.d.ts.map +1 -0
  177. package/dist/types/tsconfig.tsbuildinfo +1 -0
  178. package/package.json +80 -0
  179. package/src/build-processor.ts +784 -0
  180. package/src/collection-scanner.ts +304 -0
  181. package/src/component-registry.ts +393 -0
  182. package/src/config.ts +74 -0
  183. package/src/dev-middleware.ts +525 -0
  184. package/src/dist/src/tsconfig.tsbuildinfo +1 -0
  185. package/src/editor/ai.ts +185 -0
  186. package/src/editor/api.ts +513 -0
  187. package/src/editor/color-utils.ts +556 -0
  188. package/src/editor/components/ai-chat.tsx +632 -0
  189. package/src/editor/components/ai-tooltip.tsx +179 -0
  190. package/src/editor/components/attribute-editor.tsx +596 -0
  191. package/src/editor/components/block-editor.tsx +546 -0
  192. package/src/editor/components/collections-browser.tsx +248 -0
  193. package/src/editor/components/color-toolbar.tsx +314 -0
  194. package/src/editor/components/confirm-dialog.tsx +69 -0
  195. package/src/editor/components/create-page-modal.tsx +163 -0
  196. package/src/editor/components/editable-highlights.tsx +260 -0
  197. package/src/editor/components/error-boundary.tsx +87 -0
  198. package/src/editor/components/fields.tsx +387 -0
  199. package/src/editor/components/frontmatter-fields.tsx +469 -0
  200. package/src/editor/components/highlight-overlay.ts +229 -0
  201. package/src/editor/components/image-overlay.tsx +230 -0
  202. package/src/editor/components/markdown-editor-overlay.tsx +505 -0
  203. package/src/editor/components/markdown-inline-editor.tsx +780 -0
  204. package/src/editor/components/media-library.tsx +297 -0
  205. package/src/editor/components/outline.tsx +402 -0
  206. package/src/editor/components/redirect-countdown.tsx +45 -0
  207. package/src/editor/components/seo-editor.tsx +498 -0
  208. package/src/editor/components/text-style-toolbar.tsx +362 -0
  209. package/src/editor/components/toast/toast-container.tsx +15 -0
  210. package/src/editor/components/toast/toast.tsx +49 -0
  211. package/src/editor/components/toast/types.ts +7 -0
  212. package/src/editor/components/toolbar.tsx +366 -0
  213. package/src/editor/config.ts +12 -0
  214. package/src/editor/constants.ts +106 -0
  215. package/src/editor/context.tsx +38 -0
  216. package/src/editor/dom.ts +357 -0
  217. package/src/editor/editor.ts +1510 -0
  218. package/src/editor/env.d.ts +4 -0
  219. package/src/editor/history.ts +355 -0
  220. package/src/editor/hooks/index.ts +19 -0
  221. package/src/editor/hooks/useAIHandlers.ts +345 -0
  222. package/src/editor/hooks/useBlockEditorHandlers.ts +206 -0
  223. package/src/editor/hooks/useElementDetection.ts +284 -0
  224. package/src/editor/hooks/useImageHoverDetection.ts +102 -0
  225. package/src/editor/hooks/useTextSelection.ts +187 -0
  226. package/src/editor/hooks/useTooltipState.ts +126 -0
  227. package/src/editor/hooks/utils.ts +101 -0
  228. package/src/editor/index.tsx +481 -0
  229. package/src/editor/lib/cn.ts +4 -0
  230. package/src/editor/manifest.ts +25 -0
  231. package/src/editor/markdown-api.ts +209 -0
  232. package/src/editor/signals.ts +1351 -0
  233. package/src/editor/storage.ts +266 -0
  234. package/src/editor/styles.css +465 -0
  235. package/src/editor/text-styling.ts +773 -0
  236. package/src/editor/themes.ts +210 -0
  237. package/src/editor/types.ts +591 -0
  238. package/src/error-collector.ts +106 -0
  239. package/src/handlers/component-ops.ts +463 -0
  240. package/src/handlers/markdown-ops.ts +202 -0
  241. package/src/handlers/request-utils.ts +151 -0
  242. package/src/handlers/source-writer.ts +649 -0
  243. package/src/html-processor.ts +1108 -0
  244. package/src/index.ts +284 -0
  245. package/src/manifest-writer.ts +371 -0
  246. package/src/media/contember.ts +84 -0
  247. package/src/media/local.ts +114 -0
  248. package/src/media/s3.ts +133 -0
  249. package/src/media/types.ts +33 -0
  250. package/src/preview-generator.ts +293 -0
  251. package/src/seo-processor.ts +567 -0
  252. package/src/source-finder/ast-extractors.ts +185 -0
  253. package/src/source-finder/ast-parser.ts +150 -0
  254. package/src/source-finder/cache.ts +76 -0
  255. package/src/source-finder/collection-finder.ts +335 -0
  256. package/src/source-finder/cross-file-tracker.ts +741 -0
  257. package/src/source-finder/element-finder.ts +387 -0
  258. package/src/source-finder/image-finder.ts +283 -0
  259. package/src/source-finder/index.ts +37 -0
  260. package/src/source-finder/search-index.ts +525 -0
  261. package/src/source-finder/snippet-utils.ts +668 -0
  262. package/src/source-finder/source-lookup.ts +200 -0
  263. package/src/source-finder/types.ts +210 -0
  264. package/src/source-finder/variable-extraction.ts +406 -0
  265. package/src/tailwind-colors.ts +874 -0
  266. package/src/tsconfig.json +25 -0
  267. package/src/types.ts +406 -0
  268. package/src/utils.ts +186 -0
  269. package/src/vite-plugin.ts +42 -0
@@ -0,0 +1,1108 @@
1
+ import { type HTMLElement as ParsedHTMLElement, parse } from 'node-html-parser'
2
+ import { processSeoFromHtml } from './seo-processor'
3
+ import { enhanceManifestWithSourceSnippets } from './source-finder'
4
+ import { extractColorClasses } from './tailwind-colors'
5
+ import type { Attribute, ComponentInstance, ImageMetadata, ManifestEntry, PageSeoData, SeoOptions } from './types'
6
+ import { generateStableId } from './utils'
7
+
8
+ /** Type for parsed HTML element nodes from node-html-parser */
9
+ type HTMLNode = ParsedHTMLElement
10
+
11
+ /**
12
+ * Inline text styling elements that should NOT be marked with CMS IDs.
13
+ * These elements are text formatting and should be part of their parent's content.
14
+ * They will be preserved as HTML when editing the parent element.
15
+ */
16
+ export const INLINE_STYLE_TAGS = [
17
+ 'strong',
18
+ 'b',
19
+ 'em',
20
+ 'i',
21
+ 'u',
22
+ 's',
23
+ 'strike',
24
+ 'del',
25
+ 'ins',
26
+ 'mark',
27
+ 'small',
28
+ 'sub',
29
+ 'sup',
30
+ 'abbr',
31
+ 'cite',
32
+ 'code',
33
+ 'kbd',
34
+ 'samp',
35
+ 'var',
36
+ 'time',
37
+ 'dfn',
38
+ 'q',
39
+ ] as const
40
+
41
+ export interface ProcessHtmlOptions {
42
+ attributeName: string
43
+ includeTags: string[] | null
44
+ excludeTags: string[]
45
+ includeEmptyText: boolean
46
+ generateManifest: boolean
47
+ markComponents?: boolean
48
+ componentDirs?: string[]
49
+ excludeComponentDirs?: string[]
50
+ markStyledSpans?: boolean
51
+ /** When true, only mark elements that have source file attributes (from Astro templates) */
52
+ skipMarkdownContent?: boolean
53
+ /**
54
+ * When true, skip marking inline text styling elements (strong, b, em, i, etc.).
55
+ * These elements will be preserved as part of their parent's HTML content.
56
+ * Defaults to true.
57
+ */
58
+ skipInlineStyleTags?: boolean
59
+ /** Collection info for marking the wrapper element containing markdown content */
60
+ collectionInfo?: {
61
+ name: string
62
+ slug: string
63
+ /** First line of the markdown body (used to find wrapper element in build mode) */
64
+ bodyFirstLine?: string
65
+ /** Full markdown body text (used for robust wrapper detection in build mode) */
66
+ bodyText?: string
67
+ /** Path to the markdown file (e.g., 'src/content/blog/my-post.md') */
68
+ contentPath?: string
69
+ }
70
+ /** SEO tracking options */
71
+ seo?: SeoOptions
72
+ }
73
+
74
+ export interface ProcessHtmlResult {
75
+ html: string
76
+ entries: Record<string, ManifestEntry>
77
+ components: Record<string, ComponentInstance>
78
+ /** ID of the element wrapping collection markdown content */
79
+ collectionWrapperId?: string
80
+ /** Extracted SEO data from the page */
81
+ seo?: PageSeoData
82
+ }
83
+
84
+ /**
85
+ * Tailwind text styling class patterns that indicate a styled span.
86
+ * These are classes that only affect text appearance, not layout.
87
+ */
88
+
89
+ // Known layout-affecting classes that should NOT be considered text styling
90
+ const LAYOUT_CLASS_PATTERNS = [
91
+ // Text alignment
92
+ /^text-(left|center|right|justify|start|end)$/,
93
+ // Text wrapping and overflow
94
+ /^text-(wrap|nowrap|balance|pretty|ellipsis|clip)$/,
95
+ // Vertical alignment
96
+ /^align-/,
97
+ // Background attachment, size, repeat, position
98
+ /^bg-(fixed|local|scroll)$/,
99
+ /^bg-(auto|cover|contain)$/,
100
+ /^bg-(repeat|no-repeat|repeat-x|repeat-y|repeat-round|repeat-space)$/,
101
+ /^bg-clip-/,
102
+ /^bg-origin-/,
103
+ /^bg-(top|bottom|left|right|center)$/,
104
+ /^bg-(top|bottom)-(left|right)$/,
105
+ ]
106
+
107
+ const TEXT_STYLE_PATTERNS = [
108
+ // Font weight
109
+ /^font-(thin|extralight|light|normal|medium|semibold|bold|extrabold|black|\d+)$/,
110
+ // Font style
111
+ /^(italic|not-italic)$/,
112
+ // Text decoration
113
+ /^(underline|overline|line-through|no-underline)$/,
114
+ // Text decoration style
115
+ /^decoration-(solid|double|dotted|dashed|wavy)$/,
116
+ // Text decoration color (any color, including custom ones)
117
+ /^decoration-[\w-]+$/,
118
+ // Text decoration thickness
119
+ /^decoration-(auto|from-font|0|1|2|4|8)$/,
120
+ // Text underline offset
121
+ /^underline-offset-/,
122
+ // Text transform
123
+ /^(uppercase|lowercase|capitalize|normal-case)$/,
124
+ // Text color with shade (e.g., text-red-500, text-brand-primary-600, text-custom-purple-500)
125
+ /^text-(?:[a-z]+-)+\d+$/,
126
+ // Text color without shade (e.g., text-white, text-black, text-inherit, text-current, text-transparent)
127
+ /^text-(white|black|inherit|current|transparent)$/,
128
+ // Text custom color without shade (e.g., text-brand-primary, text-sky-blue)
129
+ /^text-[a-z]+-[a-z]+(-[a-z]+)*$/,
130
+ // Text color with arbitrary value (e.g., text-[#ff0000])
131
+ /^text-\[.+\]$/,
132
+ // Background color with shade (e.g., bg-red-500, bg-custom-purple-500)
133
+ /^bg-(?:[a-z]+-)+\d+$/,
134
+ // Background color without shade (e.g., bg-white, bg-black, bg-inherit, bg-current, bg-transparent)
135
+ /^bg-(white|black|inherit|current|transparent)$/,
136
+ // Background custom color without shade (e.g., bg-brand-primary)
137
+ /^bg-[a-z]+-[a-z]+(-[a-z]+)*$/,
138
+ // Background color with arbitrary value (e.g., bg-[#ff0000])
139
+ /^bg-\[.+\]$/,
140
+ // Font size
141
+ /^text-(xs|sm|base|lg|xl|2xl|3xl|4xl|5xl|6xl|7xl|8xl|9xl)$/,
142
+ // Letter spacing
143
+ /^tracking-/,
144
+ // Line height
145
+ /^leading-/,
146
+ ]
147
+
148
+ /**
149
+ * Get text content from an HTML node, treating <br> elements as whitespace.
150
+ * This matches the rendered HTML behavior where <br> creates line breaks.
151
+ */
152
+ function getTextContent(node: HTMLNode): string {
153
+ const result: string[] = []
154
+
155
+ for (const child of node.childNodes) {
156
+ if (child.nodeType === 3) {
157
+ // Text node
158
+ result.push(child.text || '')
159
+ } else if (child.nodeType === 1) {
160
+ // Element node
161
+ const tagName = (child as HTMLNode).tagName?.toLowerCase?.()
162
+ if (tagName === 'br') {
163
+ // Treat <br> as whitespace
164
+ result.push(' ')
165
+ } else if (tagName === 'wbr') {
166
+ // Word break opportunity - no visible content
167
+ } else {
168
+ // Recursively get text from child elements
169
+ result.push(getTextContent(child as HTMLNode))
170
+ }
171
+ }
172
+ }
173
+
174
+ return result.join('')
175
+ }
176
+
177
+ /**
178
+ * Check if a class is a text styling class
179
+ */
180
+ function isTextStyleClass(className: string): boolean {
181
+ // First check if it's a known layout class
182
+ if (LAYOUT_CLASS_PATTERNS.some(pattern => pattern.test(className))) {
183
+ return false
184
+ }
185
+ // Then check if it matches any text style pattern
186
+ return TEXT_STYLE_PATTERNS.some(pattern => pattern.test(className))
187
+ }
188
+
189
+ /**
190
+ * Check if all classes on an element are text styling classes
191
+ */
192
+ function hasOnlyTextStyleClasses(classAttr: string): boolean {
193
+ if (!classAttr || !classAttr.trim()) return false
194
+
195
+ const classes = classAttr.split(/\s+/).filter(Boolean)
196
+ if (classes.length === 0) return false
197
+
198
+ // All classes must be text styling classes
199
+ return classes.every(isTextStyleClass)
200
+ }
201
+
202
+ /**
203
+ * Process HTML to inject CMS markers and extract manifest entries
204
+ */
205
+ export async function processHtml(
206
+ html: string,
207
+ fileId: string,
208
+ options: ProcessHtmlOptions,
209
+ getNextId: () => string,
210
+ sourcePath?: string,
211
+ ): Promise<ProcessHtmlResult> {
212
+ const {
213
+ attributeName,
214
+ includeTags,
215
+ excludeTags,
216
+ includeEmptyText,
217
+ generateManifest,
218
+ markComponents = true,
219
+ componentDirs = ['src/components'],
220
+ excludeComponentDirs = ['src/pages', 'src/layouts', 'src/layout'],
221
+ markStyledSpans = true,
222
+ skipMarkdownContent = false,
223
+ skipInlineStyleTags = true,
224
+ collectionInfo,
225
+ seo: seoOptions,
226
+ } = options
227
+
228
+ const root = parse(html, {
229
+ lowerCaseTagName: false,
230
+ comment: true,
231
+ blockTextElements: {
232
+ script: true,
233
+ noscript: true,
234
+ style: true,
235
+ pre: true,
236
+ },
237
+ })
238
+
239
+ const entries: Record<string, ManifestEntry> = {}
240
+ const components: Record<string, ComponentInstance> = {}
241
+ const sourceLocationMap = new Map<string, { file: string; line: number }>()
242
+ const markedComponentRoots = new Set<HTMLNode>()
243
+ let collectionWrapperId: string | undefined
244
+ const componentCountPerParent = new Map<string, Map<string, number>>()
245
+
246
+ // First pass: detect and mark component root elements
247
+ // A component root is detected by data-astro-source-file pointing to a component directory
248
+ if (markComponents) {
249
+ root.querySelectorAll('*').forEach((node) => {
250
+ const sourceFile = node.getAttribute('data-astro-source-file')
251
+ if (!sourceFile) return
252
+
253
+ // Check if this element's source is from a component file
254
+ // Exclude pages and layouts first
255
+ const isExcludedFile = excludeComponentDirs.some(dir => {
256
+ const normalizedDir = dir.replace(/^\/+|\/+$/g, '')
257
+ return sourceFile.startsWith(normalizedDir + '/')
258
+ || sourceFile.startsWith(normalizedDir + '\\')
259
+ || sourceFile.includes('/' + normalizedDir + '/')
260
+ || sourceFile.includes('\\' + normalizedDir + '\\')
261
+ })
262
+ if (isExcludedFile) return
263
+
264
+ // If componentDirs is specified, also check whitelist
265
+ if (componentDirs.length > 0) {
266
+ const isComponentFile = componentDirs.some(dir => {
267
+ const normalizedDir = dir.replace(/^\/+|\/+$/g, '')
268
+ return sourceFile.startsWith(normalizedDir + '/')
269
+ || sourceFile.startsWith(normalizedDir + '\\')
270
+ || sourceFile.includes('/' + normalizedDir + '/')
271
+ || sourceFile.includes('\\' + normalizedDir + '\\')
272
+ })
273
+ if (!isComponentFile) return
274
+ }
275
+
276
+ // Check if any ancestor is already marked as a component root from the same file
277
+ // (we only want to mark the outermost element from each component)
278
+ let parent = node.parentNode as HTMLNode | null
279
+ let ancestorFromSameComponent = false
280
+ while (parent) {
281
+ const parentSource = parent.getAttribute?.('data-astro-source-file')
282
+ if (parentSource === sourceFile) {
283
+ ancestorFromSameComponent = true
284
+ break
285
+ }
286
+ parent = parent.parentNode as HTMLNode | null
287
+ }
288
+
289
+ if (ancestorFromSameComponent) return
290
+
291
+ // Find the nearest ancestor with a different source file (the parent that invokes this component)
292
+ let invocationSourcePath: string | undefined
293
+ let ancestor = node.parentNode as HTMLNode | null
294
+ while (ancestor) {
295
+ const ancestorSource = ancestor.getAttribute?.('data-astro-source-file')
296
+ if (ancestorSource && ancestorSource !== sourceFile) {
297
+ invocationSourcePath = ancestorSource
298
+ break
299
+ }
300
+ ancestor = ancestor.parentNode as HTMLNode | null
301
+ }
302
+
303
+ // This is a component root - mark it
304
+ const id = getNextId()
305
+ node.setAttribute('data-cms-component-id', id)
306
+ markedComponentRoots.add(node)
307
+
308
+ // Extract component name from file path (e.g., "src/components/Welcome.astro" -> "Welcome")
309
+ const componentName = extractComponentName(sourceFile)
310
+ // Parse source loc - format is "line:col" e.g. "20:21"
311
+ // Support both our custom attribute and Astro's native attribute
312
+ const sourceLocAttr = node.getAttribute('data-astro-source-loc')
313
+ || node.getAttribute('data-astro-source-line')
314
+ || '1:0'
315
+ const sourceLine = parseInt(sourceLocAttr.split(':')[0] ?? '1', 10)
316
+
317
+ // Track invocation index (0-based count of same component name per parent file)
318
+ let invocationIndex: number | undefined
319
+ if (invocationSourcePath) {
320
+ if (!componentCountPerParent.has(invocationSourcePath)) {
321
+ componentCountPerParent.set(invocationSourcePath, new Map())
322
+ }
323
+ const counters = componentCountPerParent.get(invocationSourcePath)!
324
+ const current = counters.get(componentName) ?? 0
325
+ counters.set(componentName, current + 1)
326
+ invocationIndex = current
327
+ }
328
+
329
+ components[id] = {
330
+ id,
331
+ componentName,
332
+ file: fileId,
333
+ sourcePath: sourceFile,
334
+ sourceLine,
335
+ props: {}, // Props will be filled from component definitions
336
+ invocationSourcePath,
337
+ invocationIndex,
338
+ }
339
+ })
340
+ }
341
+
342
+ // Second pass: mark span elements with text-only styling classes as styled spans
343
+ // This allows the CMS editor to recognize pre-existing styled text
344
+ if (markStyledSpans) {
345
+ root.querySelectorAll('span').forEach((node) => {
346
+ // Skip if already marked
347
+ if (node.getAttribute('data-cms-styled')) return
348
+
349
+ const classAttr = node.getAttribute('class')
350
+ if (!classAttr) return
351
+
352
+ // Check if the span has only text styling classes
353
+ if (hasOnlyTextStyleClasses(classAttr)) {
354
+ node.setAttribute('data-cms-styled', 'true')
355
+ }
356
+ })
357
+ }
358
+
359
+ // Collection wrapper detection pass: find the element that wraps markdown content
360
+ // This needs to run BEFORE image marking so we can skip images inside markdown
361
+ let markdownWrapperNode: HTMLNode | null = null
362
+
363
+ // Two strategies:
364
+ // 1. Dev mode: look for elements with data-astro-source-file containing children without it
365
+ // 2. Build mode: find element whose first child content matches the start of markdown body
366
+ if (collectionInfo) {
367
+ const allElements = root.querySelectorAll('*')
368
+ let foundWrapper = false
369
+
370
+ // Strategy 1: Dev mode - look for source file attributes
371
+ for (const node of allElements) {
372
+ const sourceFile = node.getAttribute('data-astro-source-file')
373
+ if (!sourceFile) continue
374
+
375
+ // Check if this element has any direct child elements without source file attribute
376
+ // These would be markdown-rendered elements
377
+ const childElements = node.childNodes.filter(
378
+ (child): child is HTMLNode => child.nodeType === 1 && 'tagName' in child,
379
+ )
380
+ const hasMarkdownChildren = childElements.some(
381
+ (child) => !child.getAttribute?.('data-astro-source-file'),
382
+ )
383
+
384
+ if (hasMarkdownChildren) {
385
+ // Check if any ancestor already has been marked as a collection wrapper
386
+ // We want the innermost wrapper
387
+ let parent = node.parentNode as HTMLNode | null
388
+ let hasAncestorWrapper = false
389
+ while (parent) {
390
+ if (parent.getAttribute?.(attributeName)?.startsWith('cms-collection-')) {
391
+ hasAncestorWrapper = true
392
+ break
393
+ }
394
+ parent = parent.parentNode as HTMLNode | null
395
+ }
396
+
397
+ if (!hasAncestorWrapper) {
398
+ // Mark this as the collection wrapper using the standard attribute
399
+ const id = getNextId()
400
+ node.setAttribute(attributeName, id)
401
+ node.setAttribute('data-cms-markdown', 'true')
402
+ collectionWrapperId = id
403
+ markdownWrapperNode = node
404
+ foundWrapper = true
405
+ // Don't break - we want the deepest wrapper, so we'll overwrite
406
+ }
407
+ }
408
+ }
409
+
410
+ // Strategy 2: Build mode - find the deepest element containing all markdown body text
411
+ if (!foundWrapper && collectionInfo.bodyText) {
412
+ // Strip markdown syntax to get plain text for comparison
413
+ const bodyPlain = collectionInfo.bodyText
414
+ .replace(/^---[\s\S]*?---\n*/m, '') // Remove frontmatter
415
+ .replace(/!\[[^\]]*\]\([^)]+\)/g, '') // Remove images
416
+ .replace(/\[([^\]]*)\]\([^)]+\)/g, '$1') // Extract link text
417
+ .replace(/^#+\s+/gm, '') // Remove heading markers
418
+ .replace(/^\s*[-*+]\s+/gm, '') // Remove list markers
419
+ .replace(/^\s*\d+\.\s+/gm, '') // Remove ordered list markers
420
+ .replace(/^\s*>\s+/gm, '') // Remove blockquote markers
421
+ .replace(/`{1,3}[^`]*`{1,3}/g, (m) => m.replace(/`/g, '')) // Remove code backticks
422
+ .replace(/\*{1,2}([^*]+)\*{1,2}/g, '$1') // Remove bold/italic markers
423
+ .replace(/~{2}([^~]+)~{2}/g, '$1') // Remove strikethrough markers
424
+ .replace(/\n{2,}/g, '\n') // Collapse multiple newlines
425
+ .trim()
426
+
427
+ // Extract a few unique text snippets from different parts of the body
428
+ const lines = bodyPlain.split('\n').map(l => l.trim()).filter(l => l.length > 3)
429
+ const snippets: string[] = []
430
+ if (lines.length > 0) snippets.push(lines[0]!.substring(0, 60))
431
+ if (lines.length > 1) snippets.push(lines[lines.length - 1]!.substring(0, 60))
432
+ if (lines.length > 2) snippets.push(lines[Math.floor(lines.length / 2)]!.substring(0, 60))
433
+
434
+ if (snippets.length > 0) {
435
+ // Find the deepest element that contains all snippets
436
+ let bestWrapper: HTMLNode | null = null
437
+ let bestDepth = -1
438
+
439
+ const measureDepth = (node: HTMLNode): number => {
440
+ let depth = 0
441
+ let current = node.parentNode as HTMLNode | null
442
+ while (current) {
443
+ depth++
444
+ current = current.parentNode as HTMLNode | null
445
+ }
446
+ return depth
447
+ }
448
+
449
+ for (const node of allElements) {
450
+ const tag = node.tagName?.toLowerCase?.() ?? ''
451
+ if (['script', 'style', 'head', 'meta', 'link', 'html'].includes(tag)) continue
452
+ // Skip already-marked elements
453
+ if (node.getAttribute(attributeName)) continue
454
+
455
+ const nodeText = getTextContent(node).trim()
456
+ const containsAll = snippets.every(s => nodeText.includes(s))
457
+ if (containsAll) {
458
+ const depth = measureDepth(node)
459
+ if (depth > bestDepth) {
460
+ bestDepth = depth
461
+ bestWrapper = node
462
+ }
463
+ }
464
+ }
465
+
466
+ if (bestWrapper) {
467
+ const id = getNextId()
468
+ bestWrapper.setAttribute(attributeName, id)
469
+ bestWrapper.setAttribute('data-cms-markdown', 'true')
470
+ collectionWrapperId = id
471
+ markdownWrapperNode = bestWrapper
472
+ foundWrapper = true
473
+ }
474
+ }
475
+ }
476
+
477
+ // Strategy 3: Legacy fallback - match first line only (for when bodyText is not available)
478
+ if (!foundWrapper && collectionInfo.bodyFirstLine) {
479
+ const bodyStart = collectionInfo.bodyFirstLine
480
+ .replace(/^\*\*|\*\*$/g, '')
481
+ .replace(/\*\*/g, '')
482
+ .replace(/\*/g, '')
483
+ .replace(/^#+ /, '')
484
+ .replace(/^\s*[-*+]\s+/, '')
485
+ .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')
486
+ .trim()
487
+ .substring(0, 50)
488
+
489
+ if (bodyStart.length > 3) {
490
+ const candidates: Array<{ node: HTMLNode; blockChildCount: number }> = []
491
+
492
+ for (const node of allElements) {
493
+ const tag = node.tagName?.toLowerCase?.() ?? ''
494
+ if (['script', 'style', 'head', 'meta', 'link'].includes(tag)) continue
495
+
496
+ const firstChild = node.childNodes.find(
497
+ (child): child is HTMLNode => child.nodeType === 1 && 'tagName' in child,
498
+ )
499
+
500
+ if (firstChild) {
501
+ const firstChildText = getTextContent(firstChild).trim().substring(0, 80)
502
+ if (firstChildText.includes(bodyStart)) {
503
+ const blockTags = ['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ul', 'ol', 'blockquote', 'pre', 'table', 'hr']
504
+ const blockChildCount = node.childNodes.filter(
505
+ (child): child is HTMLNode =>
506
+ child.nodeType === 1 && 'tagName' in child && blockTags.includes((child as HTMLNode).tagName?.toLowerCase?.() ?? ''),
507
+ ).length
508
+
509
+ candidates.push({ node, blockChildCount })
510
+ }
511
+ }
512
+ }
513
+
514
+ const unmarkedCandidates = candidates.filter(c => !c.node.getAttribute(attributeName))
515
+ if (unmarkedCandidates.length > 0) {
516
+ const best = unmarkedCandidates.reduce((a, b) => (b.blockChildCount > a.blockChildCount ? b : a))
517
+ if (best.blockChildCount >= 1) {
518
+ const id = getNextId()
519
+ best.node.setAttribute(attributeName, id)
520
+ best.node.setAttribute('data-cms-markdown', 'true')
521
+ collectionWrapperId = id
522
+ markdownWrapperNode = best.node
523
+ foundWrapper = true
524
+ }
525
+ }
526
+ }
527
+ }
528
+ }
529
+
530
+ // Helper function to check if a node is inside the markdown wrapper
531
+ const isInsideMarkdownWrapper = (node: HTMLNode): boolean => {
532
+ if (!markdownWrapperNode) return false
533
+ let current = node.parentNode as HTMLNode | null
534
+ while (current) {
535
+ if (current === markdownWrapperNode) return true
536
+ current = current.parentNode as HTMLNode | null
537
+ }
538
+ return false
539
+ }
540
+
541
+ // Image detection pass: mark img elements for CMS image replacement
542
+ // Store image entries separately to add to manifest later
543
+ // NOTE: Skip images inside markdown wrapper - they are edited via the markdown editor
544
+ interface ImageEntry {
545
+ metadata: ImageMetadata
546
+ sourceFile?: string
547
+ sourceLine?: number
548
+ }
549
+ const imageEntries = new Map<string, ImageEntry>()
550
+ root.querySelectorAll('img').forEach((node) => {
551
+ // Skip if already marked
552
+ if (node.getAttribute(attributeName)) return
553
+
554
+ // Skip images inside markdown wrapper - they are edited via the markdown editor
555
+ if (isInsideMarkdownWrapper(node)) return
556
+
557
+ const src = node.getAttribute('src')
558
+ if (!src) return // Skip images without src
559
+
560
+ // When skipMarkdownContent is true (collection pages), only mark images
561
+ // that have source file attributes (from Astro templates, not markdown)
562
+ if (skipMarkdownContent) {
563
+ // Check if the image or any ancestor has source file attribute
564
+ let hasSourceAttr = false
565
+ let current: HTMLNode | null = node
566
+ while (current) {
567
+ if (current.getAttribute?.('data-astro-source-file')) {
568
+ hasSourceAttr = true
569
+ break
570
+ }
571
+ current = current.parentNode as HTMLNode | null
572
+ }
573
+ if (!hasSourceAttr) return
574
+ }
575
+
576
+ const id = getNextId()
577
+ node.setAttribute(attributeName, id)
578
+ node.setAttribute('data-cms-img', 'true')
579
+
580
+ // Try to get source location from the image itself or ancestors
581
+ let sourceFile: string | undefined
582
+ let sourceLine: number | undefined
583
+ let current: HTMLNode | null = node
584
+ while (current && !sourceFile) {
585
+ const file = current.getAttribute?.('data-astro-source-file')
586
+ const line = current.getAttribute?.('data-astro-source-loc') || current.getAttribute?.('data-astro-source-line')
587
+ if (file) {
588
+ sourceFile = file
589
+ if (line) {
590
+ const lineNum = parseInt(line.split(':')[0] ?? '1', 10)
591
+ if (!Number.isNaN(lineNum)) {
592
+ sourceLine = lineNum
593
+ }
594
+ }
595
+ }
596
+ current = current.parentNode as HTMLNode | null
597
+ }
598
+
599
+ // Build image metadata
600
+ const metadata: ImageMetadata = {
601
+ src,
602
+ alt: node.getAttribute('alt') || '',
603
+ srcSet: node.getAttribute('srcset') || undefined,
604
+ sizes: node.getAttribute('sizes') || undefined,
605
+ }
606
+
607
+ // Store image info for manifest
608
+ imageEntries.set(id, {
609
+ metadata,
610
+ sourceFile,
611
+ sourceLine,
612
+ })
613
+ })
614
+
615
+ // Third pass: collect candidate text elements (don't mark yet)
616
+ // We collect candidates first to filter out pure containers before marking
617
+ interface TextCandidate {
618
+ node: HTMLNode
619
+ tag: string
620
+ sourceFile: string | undefined
621
+ sourceLine: string | undefined
622
+ }
623
+ const textCandidates: TextCandidate[] = []
624
+ const candidateNodes = new Set<HTMLNode>()
625
+
626
+ root.querySelectorAll('*').forEach((node) => {
627
+ const tag = node.tagName?.toLowerCase?.() ?? ''
628
+
629
+ if (excludeTags.includes(tag)) return
630
+ if (includeTags && !includeTags.includes(tag)) return
631
+ if (node.getAttribute(attributeName)) return // Already marked (images, collection wrapper)
632
+
633
+ // Skip elements inside markdown wrapper - they are edited via the markdown editor
634
+ if (isInsideMarkdownWrapper(node)) return
635
+
636
+ // Skip inline text styling elements (strong, b, em, i, etc.)
637
+ // These should be part of their parent's text content, not separately editable
638
+ // Only apply when includeTags is null (all tags) - if specific tags are listed, respect them
639
+ if (skipInlineStyleTags && includeTags === null && INLINE_STYLE_TAGS.includes(tag as typeof INLINE_STYLE_TAGS[number])) {
640
+ return
641
+ }
642
+
643
+ // Skip styled spans (spans with only text styling Tailwind classes)
644
+ // These are also inline text formatting and should be part of parent content
645
+ // Only apply when includeTags is null or doesn't include 'span'
646
+ if (skipInlineStyleTags && (includeTags === null || !includeTags.includes('span')) && tag === 'span') {
647
+ const classAttr = node.getAttribute('class')
648
+ if (classAttr && hasOnlyTextStyleClasses(classAttr)) {
649
+ return
650
+ }
651
+ }
652
+
653
+ const textContent = getTextContent(node).trim()
654
+ if (!includeEmptyText && !textContent) return
655
+
656
+ // Extract source location from Astro compiler attributes
657
+ const sourceFile = node.getAttribute('data-astro-source-file')
658
+ const sourceLine = node.getAttribute('data-astro-source-loc')
659
+ || node.getAttribute('data-astro-source-line')
660
+
661
+ // When skipMarkdownContent is true, only mark elements that have source file attributes
662
+ // (meaning they come from Astro templates, not rendered markdown content)
663
+ if (skipMarkdownContent && !sourceFile) {
664
+ return
665
+ }
666
+
667
+ textCandidates.push({ node, tag, sourceFile: sourceFile || undefined, sourceLine: sourceLine || undefined })
668
+ candidateNodes.add(node)
669
+ })
670
+
671
+ // Helper to check if a node has direct text (text not inside candidate descendants)
672
+ const hasDirectText = (node: HTMLNode): boolean => {
673
+ // Check for text nodes directly under this element (not inside candidate children)
674
+ for (const child of node.childNodes) {
675
+ if (child.nodeType === 3) {
676
+ // Text node
677
+ const text = (child.text || '').trim()
678
+ if (text) return true
679
+ } else if (child.nodeType === 1) {
680
+ // Element node - only recurse if it's not a candidate
681
+ const childEl = child as HTMLNode
682
+ if (!candidateNodes.has(childEl) && !childEl.getAttribute?.(attributeName)) {
683
+ if (hasDirectText(childEl)) return true
684
+ }
685
+ }
686
+ }
687
+ return false
688
+ }
689
+
690
+ // Helper to check if a node has any candidate or already-marked descendants
691
+ const hasCandidateDescendants = (node: HTMLNode): boolean => {
692
+ for (const child of node.childNodes) {
693
+ if (child.nodeType === 1) {
694
+ const childEl = child as HTMLNode
695
+ if (candidateNodes.has(childEl) || childEl.getAttribute?.(attributeName)) {
696
+ return true
697
+ }
698
+ if (hasCandidateDescendants(childEl)) return true
699
+ }
700
+ }
701
+ return false
702
+ }
703
+
704
+ // Filter out pure containers (no direct text, only candidate/marked children)
705
+ // and mark remaining candidates
706
+ for (const candidate of textCandidates) {
707
+ const { node, sourceFile, sourceLine } = candidate
708
+
709
+ // Check if this is a pure container (no direct text, only has candidate descendants)
710
+ const directText = hasDirectText(node)
711
+ const hasDescendants = hasCandidateDescendants(node)
712
+
713
+ // Skip pure containers - they have no direct text and all content comes from children
714
+ if (!directText && hasDescendants) {
715
+ candidateNodes.delete(node) // Remove from candidates so nested checks stay accurate
716
+ continue
717
+ }
718
+
719
+ // Mark this element
720
+ const id = getNextId()
721
+ node.setAttribute(attributeName, id)
722
+
723
+ if (sourceFile && sourceLine) {
724
+ const lineNum = parseInt(sourceLine.split(':')[0] ?? '1', 10)
725
+ if (!Number.isNaN(lineNum)) {
726
+ sourceLocationMap.set(id, { file: sourceFile, line: lineNum })
727
+ }
728
+ // Only remove source attributes if this is NOT a component root
729
+ // Component roots need these for identification
730
+ if (!markedComponentRoots.has(node)) {
731
+ node.removeAttribute('data-astro-source-file')
732
+ node.removeAttribute('data-astro-source-loc')
733
+ node.removeAttribute('data-astro-source-line')
734
+ }
735
+ }
736
+ }
737
+
738
+ // Fourth pass: build manifest entries for all marked elements
739
+ if (generateManifest) {
740
+ root.querySelectorAll(`[${attributeName}]`).forEach((node) => {
741
+ const id = node.getAttribute(attributeName)
742
+ if (!id) return
743
+
744
+ const tag = node.tagName?.toLowerCase?.() ?? ''
745
+
746
+ // Get direct child CMS elements (not deeply nested descendants)
747
+ const childCmsIds: string[] = []
748
+ for (const child of node.childNodes) {
749
+ if (child.nodeType === 1) {
750
+ const childEl = child as HTMLNode
751
+ const childId = childEl.getAttribute?.(attributeName)
752
+ if (childId) {
753
+ childCmsIds.push(childId)
754
+ }
755
+ }
756
+ }
757
+
758
+ // Build text with placeholders for child CMS elements
759
+ // Recursively process child nodes to handle nested CMS elements correctly
760
+ type ChildNode = { nodeType: number; text?: string; tagName?: string; childNodes?: ChildNode[]; getAttribute?: (name: string) => string | null }
761
+ const buildTextWithPlaceholders = (nodes: ChildNode[]): string => {
762
+ let text = ''
763
+ for (const child of nodes) {
764
+ if (child.nodeType === 3) {
765
+ // Text node
766
+ text += child.text || ''
767
+ } else if (child.nodeType === 1) {
768
+ // Element node
769
+ const tagName = child.tagName?.toLowerCase?.()
770
+
771
+ // Preserve <br> and <wbr> literally so text matches source snippets
772
+ if (tagName === 'br') {
773
+ text += '<br>'
774
+ continue
775
+ }
776
+ if (tagName === 'wbr') {
777
+ text += '<wbr>'
778
+ continue
779
+ }
780
+
781
+ const directCmsId = child.getAttribute?.(attributeName)
782
+
783
+ if (directCmsId) {
784
+ // Child has a direct CMS ID - use placeholder
785
+ text += `{{cms:${directCmsId}}}`
786
+ } else {
787
+ // Child doesn't have a CMS ID - recursively process its children
788
+ text += buildTextWithPlaceholders((child.childNodes || []) as ChildNode[])
789
+ }
790
+ }
791
+ }
792
+ return text
793
+ }
794
+
795
+ const textWithPlaceholders = buildTextWithPlaceholders((node.childNodes || []) as ChildNode[])
796
+
797
+ // Get source location from map (injected by Astro compiler)
798
+ const sourceLocation = sourceLocationMap.get(id)
799
+
800
+ // Find parent component if any
801
+ let parentComponentId: string | undefined
802
+ let parent = node.parentNode as HTMLNode | null
803
+ while (parent) {
804
+ const parentCompId = parent.getAttribute?.('data-cms-component-id')
805
+ if (parentCompId) {
806
+ parentComponentId = parentCompId
807
+ break
808
+ }
809
+ parent = parent.parentNode as HTMLNode | null
810
+ }
811
+
812
+ // Check if element contains inline style elements (strong, b, em, etc.) or styled spans
813
+ // If so, store the HTML content for source file updates
814
+ const inlineStyleSelector = INLINE_STYLE_TAGS.join(', ')
815
+ const hasInlineStyleElements = node.querySelector(inlineStyleSelector) !== null
816
+ const hasStyledSpans = node.querySelector('[data-cms-styled]') !== null
817
+ const htmlContent = (hasInlineStyleElements || hasStyledSpans) ? node.innerHTML : undefined
818
+
819
+ // Check if this is an image entry
820
+ const imageInfo = imageEntries.get(id)
821
+ const isImage = !!imageInfo
822
+
823
+ // Check if this is the collection wrapper
824
+ const isCollectionWrapper = id === collectionWrapperId
825
+
826
+ const entryText = isImage ? (imageInfo.metadata.alt || imageInfo.metadata.src) : textWithPlaceholders.trim()
827
+ // For images, use the source file we captured from ancestors if not in sourceLocationMap
828
+ const entrySourcePath = sourceLocation?.file || imageInfo?.sourceFile || sourcePath
829
+
830
+ // Generate stable ID based on content and context
831
+ const stableId = generateStableId(tag, entryText, entrySourcePath)
832
+
833
+ // Extract color classes for buttons and other elements
834
+ const classAttr = node.getAttribute('class')
835
+ const colorClasses = extractColorClasses(classAttr)
836
+
837
+ // Extract all relevant attributes for git diff tracking
838
+ const attributes = extractAllAttributes(node)
839
+
840
+ entries[id] = {
841
+ id,
842
+ tag,
843
+ text: entryText,
844
+ html: htmlContent,
845
+ sourcePath: entrySourcePath,
846
+ childCmsIds: childCmsIds.length > 0 ? childCmsIds : undefined,
847
+ sourceLine: sourceLocation?.line ?? imageInfo?.sourceLine,
848
+ sourceSnippet: undefined,
849
+ variableName: undefined,
850
+ parentComponentId,
851
+ // Add collection info for the wrapper entry
852
+ collectionName: isCollectionWrapper ? collectionInfo?.name : undefined,
853
+ collectionSlug: isCollectionWrapper ? collectionInfo?.slug : undefined,
854
+ contentPath: isCollectionWrapper ? collectionInfo?.contentPath : undefined,
855
+ // Robustness fields
856
+ stableId,
857
+ // Image metadata for image entries
858
+ imageMetadata: imageInfo?.metadata,
859
+ // Color classes for buttons/styled elements
860
+ colorClasses,
861
+ // All attributes with resolved values (isStatic will be updated later from source)
862
+ attributes,
863
+ }
864
+ })
865
+ }
866
+
867
+ // Clean up any remaining source attributes from component-marked elements
868
+ markedComponentRoots.forEach((node) => {
869
+ node.removeAttribute('data-astro-source-file')
870
+ node.removeAttribute('data-astro-source-loc')
871
+ node.removeAttribute('data-astro-source-line')
872
+ })
873
+
874
+ // Enhance manifest entries with actual source snippets from source files
875
+ // This allows the CMS to match and replace dynamic content in source files
876
+ const enhancedEntries = await enhanceManifestWithSourceSnippets(entries)
877
+
878
+ // Get the current HTML for SEO processing
879
+ let finalHtml = root.toString()
880
+
881
+ // Process SEO elements from the page
882
+ let seo: PageSeoData | undefined
883
+ if (seoOptions?.trackSeo !== false) {
884
+ const seoResult = await processSeoFromHtml(
885
+ finalHtml,
886
+ {
887
+ markTitle: seoOptions?.markTitle ?? true,
888
+ parseJsonLd: seoOptions?.parseJsonLd ?? true,
889
+ sourcePath,
890
+ },
891
+ getNextId,
892
+ )
893
+
894
+ seo = seoResult.seo
895
+ finalHtml = seoResult.html
896
+
897
+ // If title was marked with CMS ID, add it to entries
898
+ if (seoResult.titleId && seo.title) {
899
+ enhancedEntries[seoResult.titleId] = {
900
+ id: seoResult.titleId,
901
+ tag: 'title',
902
+ text: seo.title.content,
903
+ sourcePath: seo.title.sourcePath || sourcePath,
904
+ sourceLine: seo.title.sourceLine,
905
+ sourceSnippet: seo.title.sourceSnippet,
906
+ }
907
+ }
908
+ }
909
+
910
+ return {
911
+ html: finalHtml,
912
+ entries: enhancedEntries,
913
+ components,
914
+ collectionWrapperId,
915
+ seo,
916
+ }
917
+ }
918
+
919
+ /**
920
+ * Extract component name from source file path
921
+ * e.g., "src/components/Welcome.astro" -> "Welcome"
922
+ * e.g., "src/components/ui/Button.astro" -> "Button"
923
+ */
924
+ export function extractComponentName(sourceFile: string): string {
925
+ const parts = sourceFile.split('/')
926
+ const fileName = parts[parts.length - 1] || ''
927
+ // Strip any known component extension (.astro, .tsx, .jsx, .svelte)
928
+ return fileName.replace(/\.(astro|tsx|jsx|svelte)$/, '')
929
+ }
930
+
931
+ /**
932
+ * Clean text for comparison (normalize whitespace)
933
+ */
934
+ export function cleanText(text: string): string {
935
+ return text.trim().replace(/\s+/g, ' ').toLowerCase()
936
+ }
937
+
938
+ /**
939
+ * Extract all relevant attributes from an element for git diff tracking.
940
+ * Returns a Record mapping attribute names to Attribute objects.
941
+ * Initially all attributes are marked as isStatic: true - this will be
942
+ * updated later when we analyze the source code.
943
+ */
944
+ function extractAllAttributes(node: HTMLNode): Record<string, Attribute> | undefined {
945
+ const tag = node.tagName?.toLowerCase?.()
946
+ const result: Record<string, Attribute> = {}
947
+
948
+ // Helper to add an attribute if it has a value
949
+ const addAttr = (name: string, value: string | boolean | null | undefined) => {
950
+ if (value !== null && value !== undefined && value !== '') {
951
+ result[name] = {
952
+ value: typeof value === 'boolean' ? String(value) : value,
953
+ }
954
+ }
955
+ }
956
+
957
+ // Common attributes for all elements
958
+ addAttr('id', node.getAttribute('id'))
959
+ addAttr('title', node.getAttribute('title'))
960
+ addAttr('lang', node.getAttribute('lang'))
961
+ addAttr('tabindex', node.getAttribute('tabindex'))
962
+
963
+ // Link attributes (a tags)
964
+ if (tag === 'a') {
965
+ addAttr('href', node.getAttribute('href'))
966
+ addAttr('target', node.getAttribute('target'))
967
+ addAttr('rel', node.getAttribute('rel'))
968
+ if (node.hasAttribute('download')) {
969
+ addAttr('download', node.getAttribute('download') || 'true')
970
+ }
971
+ }
972
+
973
+ // Button attributes
974
+ if (tag === 'button') {
975
+ addAttr('type', node.getAttribute('type'))
976
+ addAttr('form', node.getAttribute('form'))
977
+ addAttr('formaction', node.getAttribute('formaction'))
978
+ addAttr('formmethod', node.getAttribute('formmethod'))
979
+ if (node.hasAttribute('disabled')) addAttr('disabled', 'true')
980
+ }
981
+
982
+ // Input attributes
983
+ if (tag === 'input') {
984
+ addAttr('type', node.getAttribute('type'))
985
+ addAttr('name', node.getAttribute('name'))
986
+ addAttr('placeholder', node.getAttribute('placeholder'))
987
+ addAttr('value', node.getAttribute('value'))
988
+ addAttr('pattern', node.getAttribute('pattern'))
989
+ addAttr('inputmode', node.getAttribute('inputmode'))
990
+ addAttr('autocomplete', node.getAttribute('autocomplete'))
991
+ addAttr('min', node.getAttribute('min'))
992
+ addAttr('max', node.getAttribute('max'))
993
+ addAttr('step', node.getAttribute('step'))
994
+ addAttr('minlength', node.getAttribute('minlength'))
995
+ addAttr('maxlength', node.getAttribute('maxlength'))
996
+ if (node.hasAttribute('required')) addAttr('required', 'true')
997
+ if (node.hasAttribute('disabled')) addAttr('disabled', 'true')
998
+ if (node.hasAttribute('readonly')) addAttr('readonly', 'true')
999
+ }
1000
+
1001
+ // Form attributes
1002
+ if (tag === 'form') {
1003
+ addAttr('action', node.getAttribute('action'))
1004
+ addAttr('method', node.getAttribute('method'))
1005
+ addAttr('enctype', node.getAttribute('enctype'))
1006
+ addAttr('target', node.getAttribute('target'))
1007
+ addAttr('name', node.getAttribute('name'))
1008
+ if (node.hasAttribute('novalidate')) addAttr('novalidate', 'true')
1009
+ }
1010
+
1011
+ // Media attributes (video, audio)
1012
+ if (tag === 'video' || tag === 'audio') {
1013
+ addAttr('src', node.getAttribute('src'))
1014
+ addAttr('poster', node.getAttribute('poster'))
1015
+ addAttr('preload', node.getAttribute('preload'))
1016
+ if (node.hasAttribute('controls')) addAttr('controls', 'true')
1017
+ if (node.hasAttribute('autoplay')) addAttr('autoplay', 'true')
1018
+ if (node.hasAttribute('muted')) addAttr('muted', 'true')
1019
+ if (node.hasAttribute('loop')) addAttr('loop', 'true')
1020
+ if (node.hasAttribute('playsinline')) addAttr('playsinline', 'true')
1021
+ }
1022
+
1023
+ // Iframe attributes
1024
+ if (tag === 'iframe') {
1025
+ addAttr('src', node.getAttribute('src'))
1026
+ addAttr('allow', node.getAttribute('allow'))
1027
+ addAttr('sandbox', node.getAttribute('sandbox'))
1028
+ addAttr('loading', node.getAttribute('loading'))
1029
+ addAttr('width', node.getAttribute('width'))
1030
+ addAttr('height', node.getAttribute('height'))
1031
+ addAttr('name', node.getAttribute('name'))
1032
+ }
1033
+
1034
+ // Select attributes
1035
+ if (tag === 'select') {
1036
+ addAttr('name', node.getAttribute('name'))
1037
+ addAttr('size', node.getAttribute('size'))
1038
+ if (node.hasAttribute('multiple')) addAttr('multiple', 'true')
1039
+ if (node.hasAttribute('required')) addAttr('required', 'true')
1040
+ if (node.hasAttribute('disabled')) addAttr('disabled', 'true')
1041
+ }
1042
+
1043
+ // Textarea attributes
1044
+ if (tag === 'textarea') {
1045
+ addAttr('name', node.getAttribute('name'))
1046
+ addAttr('placeholder', node.getAttribute('placeholder'))
1047
+ addAttr('rows', node.getAttribute('rows'))
1048
+ addAttr('cols', node.getAttribute('cols'))
1049
+ addAttr('minlength', node.getAttribute('minlength'))
1050
+ addAttr('maxlength', node.getAttribute('maxlength'))
1051
+ addAttr('wrap', node.getAttribute('wrap'))
1052
+ if (node.hasAttribute('required')) addAttr('required', 'true')
1053
+ if (node.hasAttribute('disabled')) addAttr('disabled', 'true')
1054
+ if (node.hasAttribute('readonly')) addAttr('readonly', 'true')
1055
+ }
1056
+
1057
+ // Image attributes
1058
+ if (tag === 'img') {
1059
+ addAttr('src', node.getAttribute('src'))
1060
+ addAttr('alt', node.getAttribute('alt'))
1061
+ addAttr('width', node.getAttribute('width'))
1062
+ addAttr('height', node.getAttribute('height'))
1063
+ addAttr('loading', node.getAttribute('loading'))
1064
+ addAttr('decoding', node.getAttribute('decoding'))
1065
+ }
1066
+
1067
+ // ARIA attributes (for any element)
1068
+ addAttr('role', node.getAttribute('role'))
1069
+ addAttr('aria-label', node.getAttribute('aria-label'))
1070
+ addAttr('aria-labelledby', node.getAttribute('aria-labelledby'))
1071
+ addAttr('aria-describedby', node.getAttribute('aria-describedby'))
1072
+ addAttr('aria-controls', node.getAttribute('aria-controls'))
1073
+ addAttr('aria-owns', node.getAttribute('aria-owns'))
1074
+ addAttr('aria-current', node.getAttribute('aria-current'))
1075
+ addAttr('aria-live', node.getAttribute('aria-live'))
1076
+ // For boolean-like ARIA attributes, preserve the actual value (including "false")
1077
+ // Only default to "true" when the attribute is present with no value (e.g., `aria-hidden` without `="..."`)
1078
+ const getAriaValue = (name: string) => {
1079
+ const val = node.getAttribute(name)
1080
+ // getAttribute returns '' for valueless attributes; null/undefined means not present
1081
+ return val === '' || val === null ? 'true' : val
1082
+ }
1083
+ if (node.hasAttribute('aria-hidden')) addAttr('aria-hidden', getAriaValue('aria-hidden'))
1084
+ if (node.hasAttribute('aria-expanded')) addAttr('aria-expanded', getAriaValue('aria-expanded'))
1085
+ if (node.hasAttribute('aria-pressed')) addAttr('aria-pressed', getAriaValue('aria-pressed'))
1086
+ if (node.hasAttribute('aria-selected')) addAttr('aria-selected', getAriaValue('aria-selected'))
1087
+ if (node.hasAttribute('aria-disabled')) addAttr('aria-disabled', getAriaValue('aria-disabled'))
1088
+ if (node.hasAttribute('aria-required')) addAttr('aria-required', getAriaValue('aria-required'))
1089
+ if (node.hasAttribute('aria-invalid')) addAttr('aria-invalid', getAriaValue('aria-invalid'))
1090
+ if (node.hasAttribute('aria-atomic')) addAttr('aria-atomic', getAriaValue('aria-atomic'))
1091
+ if (node.hasAttribute('aria-busy')) addAttr('aria-busy', getAriaValue('aria-busy'))
1092
+ if (node.hasAttribute('aria-haspopup')) addAttr('aria-haspopup', getAriaValue('aria-haspopup'))
1093
+
1094
+ // Custom data-* attributes (selective - common patterns)
1095
+ // Match double-quoted, single-quoted, and unquoted attribute values
1096
+ const rawAttributes = node.rawAttrs || ''
1097
+ const dataAttrMatches = rawAttributes.matchAll(/data-([\w-]+)=(?:"([^"]*)"|'([^']*)'|(\S+))/g)
1098
+ for (const match of dataAttrMatches) {
1099
+ const attrName = `data-${match[1]}`
1100
+ const attrValue = match[2] ?? match[3] ?? match[4]
1101
+ // Skip internal CMS attributes
1102
+ if (!attrName.startsWith('data-cms') && !attrName.startsWith('data-astro')) {
1103
+ addAttr(attrName, attrValue)
1104
+ }
1105
+ }
1106
+
1107
+ return Object.keys(result).length > 0 ? result : undefined
1108
+ }