@nuasite/cms-marker 0.0.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,240 @@
1
+ # @nuasite/cms-marker
2
+
3
+ An Astro integration that automatically marks HTML elements with unique identifiers and generates a manifest mapping content to source files and line numbers. Perfect for building CMS editors that need to track where content originates in your codebase.
4
+
5
+ ## Features
6
+
7
+ - **Automatic Marking**: Adds `data-cms-id` attributes to HTML elements during build
8
+ - **Source Location Tracking**: Maps content back to exact line numbers in `.astro` source files
9
+ - **Variable Detection**: Finds content defined as variables in frontmatter
10
+ - **Nested Content Support**: Handles placeholders for nested CMS-editable elements
11
+ - **Dev & Build Modes**: Works in both development and production builds
12
+ - **Manifest Generation**: Creates JSON manifest with all CMS-editable content
13
+
14
+ ## Installation
15
+
16
+ ```bash
17
+ bun add -D @nuasite/cms-marker
18
+ # or: npm install -D @nuasite/cms-marker
19
+ ```
20
+
21
+ ## Usage
22
+
23
+ Add the integration to your `astro.config.mjs`:
24
+
25
+ ```js
26
+ import cmsMarker from '@nuasite/cms-marker'
27
+ import { defineConfig } from 'astro/config'
28
+
29
+ export default defineConfig({
30
+ integrations: [
31
+ cmsMarker({
32
+ // Optional configuration
33
+ attributeName: 'data-cms-id',
34
+ includeTags: null, // null = all tags, or specify array like ['h1', 'p', 'a']
35
+ excludeTags: ['html', 'head', 'body', 'script', 'style'],
36
+ includeEmptyText: false,
37
+ }),
38
+ ],
39
+ })
40
+ ```
41
+
42
+ ## How It Works
43
+
44
+ ### 1. HTML Marking
45
+
46
+ The integration processes your HTML and adds unique IDs:
47
+
48
+ ```html
49
+ <!-- Before -->
50
+ <h1>Welcome to my site</h1>
51
+ <p>This is some content</p>
52
+
53
+ <!-- After -->
54
+ <h1 data-cms-id="cms-0">Welcome to my site</h1>
55
+ <p data-cms-id="cms-1">This is some content</p>
56
+ ```
57
+
58
+ ### 2. Source Location Tracking
59
+
60
+ It searches your `.astro` source files to find where content originates:
61
+
62
+ ```astro
63
+ <!-- src/components/Hero.astro -->
64
+ ---
65
+ const title = "Welcome to my site";
66
+ ---
67
+ <h1>{title}</h1> <!-- Line 4 -->
68
+ ```
69
+
70
+ ### 3. Manifest Generation
71
+
72
+ Creates a JSON manifest mapping IDs to source locations:
73
+
74
+ ```json
75
+ {
76
+ "cms-0": {
77
+ "id": "cms-0",
78
+ "file": "index.html",
79
+ "tag": "h1",
80
+ "text": "Welcome to my site",
81
+ "sourcePath": "src/components/Hero.astro",
82
+ "sourceLine": 4
83
+ }
84
+ }
85
+ ```
86
+
87
+ ## Configuration Options
88
+
89
+ ### `attributeName`
90
+
91
+ - **Type**: `string`
92
+ - **Default**: `'data-cms-id'`
93
+ - The HTML attribute name to use for marking elements.
94
+
95
+ ### `includeTags`
96
+
97
+ - **Type**: `string[] | null`
98
+ - **Default**: `null`
99
+ - If `null`, all tags are included. Otherwise, only specified tags are marked.
100
+
101
+ ### `excludeTags`
102
+
103
+ - **Type**: `string[]`
104
+ - **Default**: `['html', 'head', 'body', 'script', 'style']`
105
+ - Tags to exclude from marking.
106
+
107
+ ### `includeEmptyText`
108
+
109
+ - **Type**: `boolean`
110
+ - **Default**: `false`
111
+ - Whether to mark elements with no text content.
112
+
113
+ ## Manifest Entry Structure
114
+
115
+ Each entry in the manifest contains:
116
+
117
+ ```typescript
118
+ export interface ManifestEntry {
119
+ id: string // The CMS ID (e.g., "cms-0")
120
+ file: string // Output HTML file (e.g., "index.html")
121
+ tag: string // HTML tag name (e.g., "h1")
122
+ text: string // Text content with placeholders for nested elements
123
+ sourcePath?: string // Source .astro file path
124
+ sourceLine?: number // Line number in source file
125
+ sourceSnippet?: string // Source code snippet
126
+ sourceType?: 'static' | 'variable' | 'prop' | 'computed' // Type of source
127
+ variableName?: string // Variable name if source is a variable
128
+ childCmsIds?: string[] // IDs of nested CMS elements
129
+ }
130
+ ```
131
+
132
+ ## Supported Patterns
133
+
134
+ ### ✅ Static Text
135
+
136
+ ```astro
137
+ <h1>Hello World</h1>
138
+ ```
139
+
140
+ ### ✅ Simple Variables
141
+
142
+ ```astro
143
+ ---
144
+ const title = "My Title";
145
+ ---
146
+ <h1>{title}</h1>
147
+ ```
148
+
149
+ ### ✅ Variables with Type Annotations
150
+
151
+ ```astro
152
+ ---
153
+ const path: string = "src/pages";
154
+ ---
155
+ <pre>{path}</pre>
156
+ ```
157
+
158
+ ### ✅ Object Properties
159
+
160
+ ```astro
161
+ ---
162
+ const content = {
163
+ title: "Welcome",
164
+ subtitle: "Get Started"
165
+ };
166
+ ---
167
+ <h1>{content.title}</h1>
168
+ <h2>{content.subtitle}</h2>
169
+ ```
170
+
171
+ ### ✅ Nested CMS Elements
172
+
173
+ ```astro
174
+ <h1>Start <span>nested</span> end</h1>
175
+ <!-- Manifest text: "Start {{cms:cms-1}} end" -->
176
+ ```
177
+
178
+ ### ✅ Escaped Quotes and Special Characters
179
+
180
+ ```astro
181
+ ---
182
+ const text = 'What\'s up';
183
+ const message = "Hello & goodbye";
184
+ ---
185
+ <p>{text}</p>
186
+ <span>{message}</span>
187
+ ```
188
+
189
+ ### ⚠️ Partial Support
190
+
191
+ Some patterns have limited support and may not always resolve to exact source locations:
192
+
193
+ - Complex variable expressions
194
+ - Props passed from parent components
195
+ - Template literals with expressions
196
+ - Computed values
197
+
198
+ In these cases, the manifest will still include the entry but `sourceLine` may be `undefined`.
199
+
200
+ ## Development
201
+
202
+ ```bash
203
+ # Install dependencies
204
+ bun install
205
+
206
+ # Run tests
207
+ bun test
208
+
209
+ # Run tests in watch mode
210
+ bun test --watch
211
+ ```
212
+
213
+ ## Testing
214
+
215
+ The package includes comprehensive tests (27 tests, all passing) covering:
216
+
217
+ - HTML processing and ID assignment
218
+ - Tag inclusion/exclusion rules
219
+ - Manifest generation
220
+ - Source location finding
221
+ - Variable reference detection
222
+ - Escaped quotes and HTML entities
223
+ - Multiple identical tags disambiguation
224
+ - Edge cases and error handling
225
+
226
+ Run tests with:
227
+
228
+ ```bash
229
+ bun test
230
+ ```
231
+
232
+ See `src/tests/` for all test cases.
233
+
234
+ ## License
235
+
236
+ MIT
237
+
238
+ ## Contributing
239
+
240
+ Contributions are welcome! Please feel free to submit issues and pull requests.
package/package.json ADDED
@@ -0,0 +1,42 @@
1
+ {
2
+ "name": "@nuasite/cms-marker",
3
+ "description": "Mark html tags with a unique identifier and generate a JSON file with the mapping.",
4
+ "files": [
5
+ "dist/**",
6
+ "src/**",
7
+ "README.md",
8
+ "package.json"
9
+ ],
10
+ "homepage": "https://github.com/nuasite/nuasite/blob/main/packages/cms-marker/README.md",
11
+ "repository": {
12
+ "type": "git",
13
+ "url": "git+https://github.com/nuasite/nuasite.git",
14
+ "directory": "packages/cms-marker"
15
+ },
16
+ "license": "Apache-2.0",
17
+ "version": "0.0.42",
18
+ "module": "src/index.ts",
19
+ "types": "src/index.ts",
20
+ "type": "module",
21
+ "dependencies": {
22
+ "@astrojs/compiler": "catalog:astro",
23
+ "astro": "catalog:astro",
24
+ "node-html-parser": "catalog:parsers"
25
+ },
26
+ "devDependencies": {
27
+ "@types/bun": "catalog:build"
28
+ },
29
+ "peerDependencies": {
30
+ "typescript": "catalog:build",
31
+ "vite": "catalog:build"
32
+ },
33
+ "scripts": {
34
+ "test": "bun test"
35
+ },
36
+ "keywords": [
37
+ "devtools",
38
+ "nuasite",
39
+ "tooling",
40
+ "withastro"
41
+ ]
42
+ }
@@ -0,0 +1,193 @@
1
+ import { parse as parseAstro } from '@astrojs/compiler'
2
+ import fs from 'node:fs/promises'
3
+ import path from 'node:path'
4
+ import type { Plugin } from 'vite'
5
+
6
+ export interface AstroTransformOptions {
7
+ markComponents?: boolean
8
+ }
9
+
10
+ /**
11
+ * Vite plugin that transforms .astro files to inject source location metadata
12
+ * This runs during Astro's compilation phase and adds data-astro-source-file and
13
+ * data-astro-source-line attributes to HTML elements in the template.
14
+ *
15
+ * NOTE: Component marking is NOT done here because modifying component tags
16
+ * in the raw .astro source breaks Astro's JSX-like parser. Component marking
17
+ * is done at the HTML output level instead (in dev-middleware and build-processor).
18
+ */
19
+ export function createAstroTransformPlugin(options: AstroTransformOptions = {}): Plugin {
20
+ // Component marking is intentionally disabled at the transform level
21
+ // const { markComponents = true } = options;
22
+
23
+ return {
24
+ name: 'astro-cms-source-injector',
25
+ enforce: 'pre', // Run before Astro's own transforms
26
+
27
+ async transform(code: string, id: string) {
28
+ if (!id.endsWith('.astro')) {
29
+ return null
30
+ }
31
+
32
+ if (id.includes('node_modules')) {
33
+ return null
34
+ }
35
+
36
+ try {
37
+ const rawCode = await fs.readFile(id, 'utf-8')
38
+ const relativePath = path.relative(process.cwd(), id)
39
+ const result = await parseAstro(rawCode, { position: true })
40
+
41
+ if (!result.ast) {
42
+ return null
43
+ }
44
+
45
+ const transformed = injectSourceAttributes(rawCode, result.ast, relativePath)
46
+
47
+ if (transformed !== rawCode) {
48
+ return {
49
+ code: transformed,
50
+ map: null,
51
+ }
52
+ }
53
+
54
+ return null
55
+ } catch (error) {
56
+ console.warn(`[astro-cms-marker] Failed to transform ${id}:`, error)
57
+ return null
58
+ }
59
+ },
60
+ }
61
+ }
62
+
63
+ /**
64
+ * Inject source location attributes into HTML elements
65
+ * NOTE: Component marking is NOT done here - it breaks Astro's parser
66
+ */
67
+ function injectSourceAttributes(code: string, ast: any, filePath: string): string {
68
+ const lines = code.split('\n')
69
+ const modifications: Array<{ line: number; column: number; insertion: string }> = []
70
+
71
+ // Find the template section (after frontmatter)
72
+ let inFrontmatter = false
73
+ let frontmatterEnd = -1
74
+
75
+ for (let i = 0; i < lines.length; i++) {
76
+ if (lines[i]?.trim() === '---') {
77
+ if (!inFrontmatter) {
78
+ inFrontmatter = true
79
+ } else {
80
+ frontmatterEnd = i
81
+ break
82
+ }
83
+ }
84
+ }
85
+
86
+ // If no frontmatter, start from line 0 (all lines are template)
87
+ if (frontmatterEnd === -1) {
88
+ frontmatterEnd = -1 // Will make check start.line > 0
89
+ }
90
+
91
+ // Walk the AST and collect modifications
92
+ const collectElements = (node: any, depth: number = 0) => {
93
+ if (!node) {
94
+ return
95
+ }
96
+
97
+ // Only process regular HTML elements, NOT components
98
+ if (node.type === 'element' && node.position) {
99
+ const { start } = node.position
100
+ const tagName = node.name?.toLowerCase()
101
+
102
+ // Only process elements in template section (after frontmatter or from start if no frontmatter)
103
+ const templateStartLine = frontmatterEnd === -1 ? 0 : frontmatterEnd + 1
104
+ if (start.line > templateStartLine) {
105
+ // Skip certain elements
106
+ if (['html', 'head', 'body', 'script', 'style', 'slot', 'fragment'].includes(tagName)) {
107
+ // Still process children
108
+ if (node.children && Array.isArray(node.children)) {
109
+ for (const child of node.children) {
110
+ collectElements(child, depth + 1)
111
+ }
112
+ }
113
+ return
114
+ }
115
+
116
+ // Find where to insert the attribute (after tag name, before other attributes or >)
117
+ const lineIndex = start.line - 1
118
+ if (lineIndex < 0 || lineIndex >= lines.length) {
119
+ return
120
+ }
121
+
122
+ const line = lines[lineIndex]
123
+ const tagStartCol = start.column - 1
124
+
125
+ // Find the position after the tag name
126
+ const tagMatch = line?.slice(tagStartCol).match(/^<(\w+)/)
127
+ if (!tagMatch) {
128
+ return
129
+ }
130
+
131
+ const insertCol = tagStartCol + tagMatch[0].length
132
+ const sourceAttr = ` data-astro-source-file="${filePath}" data-astro-source-line="${start.line}"`
133
+
134
+ modifications.push({
135
+ line: lineIndex,
136
+ column: insertCol,
137
+ insertion: sourceAttr,
138
+ })
139
+ }
140
+ }
141
+
142
+ // Recursively process children
143
+ if (node.children && Array.isArray(node.children)) {
144
+ for (const child of node.children) {
145
+ collectElements(child, depth + 1)
146
+ }
147
+ }
148
+ }
149
+
150
+ // Start walking from root children
151
+ if (ast.children && Array.isArray(ast.children)) {
152
+ for (const child of ast.children) {
153
+ collectElements(child, 0)
154
+ }
155
+ }
156
+
157
+ // Sort modifications by position (reverse order so we can apply them without recalculating positions)
158
+ modifications.sort((a, b) => {
159
+ if (a.line !== b.line) {
160
+ return b.line - a.line
161
+ }
162
+ return b.column - a.column
163
+ })
164
+
165
+ // Apply modifications
166
+ const modifiedLines = [...lines]
167
+ for (const mod of modifications) {
168
+ const line = modifiedLines[mod.line]
169
+
170
+ // Validate line exists - if not, there's a bug in AST positions or bounds checking
171
+ if (line === undefined) {
172
+ console.error(
173
+ `[astro-cms-marker] Invalid modification at line ${mod.line + 1} in ${filePath}. ` +
174
+ `This indicates a bug in @astrojs/compiler AST positions or bounds checking. Skipping modification.`
175
+ )
176
+ continue
177
+ }
178
+
179
+ // Validate column is within line bounds
180
+ if (mod.column < 0 || mod.column > line.length) {
181
+ console.error(
182
+ `[astro-cms-marker] Invalid column ${mod.column} at line ${mod.line + 1} in ${filePath}. ` +
183
+ `Line length is ${line.length}. Skipping modification.`
184
+ )
185
+ continue
186
+ }
187
+
188
+ // Apply the modification safely
189
+ modifiedLines[mod.line] = line.slice(0, mod.column) + mod.insertion + line.slice(mod.column)
190
+ }
191
+
192
+ return modifiedLines.join('\n')
193
+ }
@@ -0,0 +1,164 @@
1
+ import type { AstroIntegrationLogger } from 'astro'
2
+ import fs from 'node:fs/promises'
3
+ import path from 'node:path'
4
+ import { fileURLToPath } from 'node:url'
5
+ import { processHtml } from './html-processor'
6
+ import type { ManifestWriter } from './manifest-writer'
7
+ import type { CmsMarkerOptions } from './types'
8
+
9
+ // Concurrency limit for parallel processing
10
+ const MAX_CONCURRENT = 10
11
+
12
+ /**
13
+ * Get the page path from an HTML file path
14
+ * For example: /about/index.html -> /about
15
+ * /index.html -> /
16
+ * /blog/post.html -> /blog/post
17
+ */
18
+ function getPagePath(htmlPath: string, outDir: string): string {
19
+ const relPath = path.relative(outDir, htmlPath)
20
+ const parts = relPath.split(path.sep)
21
+
22
+ // Handle index.html files
23
+ if (parts[parts.length - 1] === 'index.html') {
24
+ parts.pop()
25
+ return '/' + parts.join('/')
26
+ }
27
+
28
+ // Handle other .html files (remove extension)
29
+ const last = parts[parts.length - 1]
30
+ if (last) {
31
+ parts[parts.length - 1] = last.replace('.html', '')
32
+ }
33
+ return '/' + parts.join('/')
34
+ }
35
+
36
+ /**
37
+ * Process a single HTML file
38
+ */
39
+ async function processFile(
40
+ filePath: string,
41
+ outDir: string,
42
+ config: Required<CmsMarkerOptions>,
43
+ manifestWriter: ManifestWriter,
44
+ idCounter: { value: number },
45
+ ): Promise<number> {
46
+ const relPath = path.relative(outDir, filePath)
47
+ const pagePath = getPagePath(filePath, outDir)
48
+ const html = await fs.readFile(filePath, 'utf-8')
49
+
50
+ // Create ID generator - use atomic increment
51
+ const pageIdStart = idCounter.value
52
+ const idGenerator = () => `cms-${idCounter.value++}`
53
+
54
+ const result = await processHtml(
55
+ html,
56
+ relPath,
57
+ {
58
+ attributeName: config.attributeName,
59
+ includeTags: config.includeTags,
60
+ excludeTags: config.excludeTags,
61
+ includeEmptyText: config.includeEmptyText,
62
+ generateManifest: config.generateManifest,
63
+ markComponents: config.markComponents,
64
+ componentDirs: config.componentDirs,
65
+ },
66
+ idGenerator,
67
+ )
68
+
69
+ // Note: Source locations are now extracted from Astro's compiler attributes
70
+ // in html-processor.ts, so we don't need the expensive findSourceLocation calls
71
+
72
+ // Add to manifest writer (handles per-page manifest writes)
73
+ manifestWriter.addPage(pagePath, result.entries, result.components)
74
+
75
+ // Write transformed HTML back
76
+ await fs.writeFile(filePath, result.html, 'utf-8')
77
+
78
+ return Object.keys(result.entries).length
79
+ }
80
+
81
+ /**
82
+ * Process HTML files in parallel with concurrency limit
83
+ */
84
+ async function processFilesInBatches(
85
+ files: string[],
86
+ outDir: string,
87
+ config: Required<CmsMarkerOptions>,
88
+ manifestWriter: ManifestWriter,
89
+ idCounter: { value: number },
90
+ ): Promise<number> {
91
+ let totalEntries = 0
92
+
93
+ // Process files in batches of MAX_CONCURRENT
94
+ for (let i = 0; i < files.length; i += MAX_CONCURRENT) {
95
+ const batch = files.slice(i, i + MAX_CONCURRENT)
96
+ const results = await Promise.all(
97
+ batch.map(file => processFile(file, outDir, config, manifestWriter, idCounter)),
98
+ )
99
+ totalEntries += results.reduce((sum, count) => sum + count, 0)
100
+ }
101
+
102
+ return totalEntries
103
+ }
104
+
105
+ /**
106
+ * Process build output - processes all HTML files in parallel
107
+ */
108
+ export async function processBuildOutput(
109
+ dir: URL,
110
+ config: Required<CmsMarkerOptions>,
111
+ manifestWriter: ManifestWriter,
112
+ idCounter: { value: number },
113
+ logger?: AstroIntegrationLogger,
114
+ ): Promise<void> {
115
+ const outDir = fileURLToPath(dir)
116
+ manifestWriter.setOutDir(outDir)
117
+
118
+ const htmlFiles = await findHtmlFiles(outDir)
119
+
120
+ if (htmlFiles.length === 0) {
121
+ logger?.info('No HTML files found to process')
122
+ return
123
+ }
124
+
125
+ const startTime = Date.now()
126
+
127
+ // Process all files in parallel batches
128
+ await processFilesInBatches(htmlFiles, outDir, config, manifestWriter, idCounter)
129
+
130
+ // Finalize manifest (writes global manifest and waits for all per-page writes)
131
+ const stats = await manifestWriter.finalize()
132
+
133
+ const duration = Date.now() - startTime
134
+ const msg = `Processed ${stats.totalPages} pages with ${stats.totalEntries} entries and ${stats.totalComponents} components in ${duration}ms`
135
+
136
+ if (logger) {
137
+ logger.info(msg)
138
+ } else {
139
+ console.log(`[astro-cms-marker] ${msg}`)
140
+ }
141
+ }
142
+
143
+ /**
144
+ * Recursively find all HTML files in a directory (parallel version)
145
+ */
146
+ async function findHtmlFiles(dir: string): Promise<string[]> {
147
+ const result: string[] = []
148
+
149
+ async function scan(currentDir: string): Promise<void> {
150
+ const entries = await fs.readdir(currentDir, { withFileTypes: true })
151
+
152
+ await Promise.all(entries.map(async (entry) => {
153
+ const fullPath = path.join(currentDir, entry.name)
154
+ if (entry.isDirectory()) {
155
+ await scan(fullPath)
156
+ } else if (entry.isFile() && fullPath.endsWith('.html')) {
157
+ result.push(fullPath)
158
+ }
159
+ }))
160
+ }
161
+
162
+ await scan(dir)
163
+ return result
164
+ }