@abraca/convert 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,169 @@
1
+ // ── Manifest: docId <-> filesystem path mapping ─────────────────────────────
2
+ //
3
+ // The manifest tracks which doc on the server maps to which file on
4
+ // disk, plus per-doc upload (binary attachment) records and content
5
+ // hashes for change detection.
6
+ //
7
+ // Filesystem I/O is INJECTABLE so Node hosts (memfs for tests, fs/promises
8
+ // for Electron, @tauri-apps/plugin-fs for Tauri) can all use this code
9
+ // without the package taking a hard dep on any one. If no adapter has
10
+ // been wired and we're inside Tauri, we lazily fall back to the Tauri
11
+ // plugin. Pure-test setups should always call `setFsAdapter()` first.
12
+
13
+ export interface FsAdapter {
14
+ readTextFile: (path: string) => Promise<string>
15
+ writeTextFile: (path: string, contents: string) => Promise<void>
16
+ mkdir: (path: string, options?: { recursive?: boolean }) => Promise<void>
17
+ readBinaryFile?: (path: string) => Promise<Uint8Array>
18
+ writeBinaryFile?: (path: string, contents: Uint8Array) => Promise<void>
19
+ exists?: (path: string) => Promise<boolean>
20
+ remove?: (path: string) => Promise<void>
21
+ }
22
+
23
+ let _adapter: FsAdapter | null = null
24
+
25
+ /** Install a filesystem adapter. Call this once at boot. */
26
+ export function setFsAdapter(adapter: FsAdapter): void {
27
+ _adapter = adapter
28
+ }
29
+
30
+ /** Read back the active adapter — useful for tests and for layered code. */
31
+ export function getFsAdapter(): FsAdapter | null {
32
+ return _adapter
33
+ }
34
+
35
+ async function loadFsApi(): Promise<FsAdapter> {
36
+ if (_adapter) return _adapter
37
+ throw new Error(
38
+ '@abraca/convert: no FsAdapter installed. '
39
+ + 'Call setFsAdapter({readTextFile, writeTextFile, mkdir, …}) at boot. '
40
+ + 'Tauri hosts can pass `@tauri-apps/plugin-fs` directly; Node hosts can pass '
41
+ + 'a wrapper over `fs/promises`; tests can use the in-memory adapter from '
42
+ + 'tests/file-blocks.test.ts as a template.',
43
+ )
44
+ }
45
+
46
+ // ── Types ────────────────────────────────────────────────────────────────────
47
+
48
+ export interface UploadManifestEntry {
49
+ uploadId: string
50
+ filename: string
51
+ relativePath: string // e.g. "_files/abc123/photo.jpg"
52
+ contentHash: string
53
+ }
54
+
55
+ export interface ManifestEntry {
56
+ docId: string
57
+ relativePath: string // e.g. "projects/my-project/_index.md"
58
+ contentHash: string // hash of last-written/read markdown
59
+ lastWrittenAt: number
60
+ lastReadAt: number
61
+ uploads: UploadManifestEntry[]
62
+ }
63
+
64
+ export interface FsSyncManifest {
65
+ version: 2
66
+ spaceId: string
67
+ lastSyncAt: number
68
+ entries: Record<string, ManifestEntry> // keyed by docId
69
+ }
70
+
71
+ // ── CRUD ─────────────────────────────────────────────────────────────────────
72
+
73
+ const MANIFEST_DIR = '.abracadabra'
74
+ const MANIFEST_FILE = 'manifest.json'
75
+
76
+ function manifestPath(syncDir: string): string {
77
+ return `${syncDir}/${MANIFEST_DIR}/${MANIFEST_FILE}`
78
+ }
79
+
80
+ export function manifestDir(syncDir: string): string {
81
+ return `${syncDir}/${MANIFEST_DIR}`
82
+ }
83
+
84
+ export function trashDir(syncDir: string): string {
85
+ return `${syncDir}/${MANIFEST_DIR}/trash`
86
+ }
87
+
88
+ export function orphansDir(syncDir: string): string {
89
+ return `${syncDir}/${MANIFEST_DIR}/orphans`
90
+ }
91
+
92
+ export function conflictsDir(syncDir: string): string {
93
+ return `${syncDir}/${MANIFEST_DIR}/conflicts`
94
+ }
95
+
96
+ export function createEmptyManifest(spaceId: string): FsSyncManifest {
97
+ return {
98
+ version: 2,
99
+ spaceId,
100
+ lastSyncAt: 0,
101
+ entries: {}
102
+ }
103
+ }
104
+
105
+ export async function loadManifest(syncDir: string, spaceId: string): Promise<FsSyncManifest> {
106
+ const fs = await loadFsApi()
107
+ try {
108
+ const raw = await fs.readTextFile(manifestPath(syncDir))
109
+ const parsed = JSON.parse(raw) as FsSyncManifest
110
+ if (parsed.version === 2) return parsed
111
+ }
112
+ catch {
113
+ // missing or corrupt — return fresh
114
+ }
115
+ return createEmptyManifest(spaceId)
116
+ }
117
+
118
+ export async function saveManifest(syncDir: string, manifest: FsSyncManifest): Promise<void> {
119
+ const fs = await loadFsApi()
120
+ const dir = `${syncDir}/${MANIFEST_DIR}`
121
+ try {
122
+ await fs.mkdir(dir, { recursive: true })
123
+ }
124
+ catch {
125
+ // already exists
126
+ }
127
+ manifest.lastSyncAt = Date.now()
128
+ await fs.writeTextFile(manifestPath(syncDir), JSON.stringify(manifest, null, 2))
129
+ }
130
+
131
+ // ── Lookups ──────────────────────────────────────────────────────────────────
132
+
133
+ export function lookupByDocId(manifest: FsSyncManifest, docId: string): ManifestEntry | undefined {
134
+ return manifest.entries[docId]
135
+ }
136
+
137
+ export function lookupByPath(manifest: FsSyncManifest, relativePath: string): ManifestEntry | undefined {
138
+ for (const entry of Object.values(manifest.entries)) {
139
+ if (entry.relativePath === relativePath) return entry
140
+ }
141
+ return undefined
142
+ }
143
+
144
+ export function lookupByHash(manifest: FsSyncManifest, contentHash: string): ManifestEntry | undefined {
145
+ for (const entry of Object.values(manifest.entries)) {
146
+ if (entry.contentHash === contentHash) return entry
147
+ }
148
+ return undefined
149
+ }
150
+
151
+ export function setEntry(manifest: FsSyncManifest, entry: ManifestEntry): void {
152
+ manifest.entries[entry.docId] = entry
153
+ }
154
+
155
+ export function removeEntry(manifest: FsSyncManifest, docId: string): ManifestEntry | undefined {
156
+ const entry = manifest.entries[docId]
157
+ if (entry) delete manifest.entries[docId]
158
+ return entry
159
+ }
160
+
161
+ // ── Build reverse lookup (path -> docId) ─────────────────────────────────────
162
+
163
+ export function buildReverseLookup(manifest: FsSyncManifest): Map<string, string> {
164
+ const map = new Map<string, string>()
165
+ for (const [docId, entry] of Object.entries(manifest.entries)) {
166
+ map.set(entry.relativePath, docId)
167
+ }
168
+ return map
169
+ }
@@ -0,0 +1,207 @@
1
+ // ── Collision-safe path building for FS sync ─────────────────────────────────
2
+
3
+ import type { FsSyncManifest } from './manifest.ts'
4
+
5
+ export interface FsTreeEntry {
6
+ label: string
7
+ parentId: string | null
8
+ order: number
9
+ type?: string
10
+ meta?: any
11
+ }
12
+
13
+ /**
14
+ * Convert a document label to a filesystem-safe filename (without extension).
15
+ * e.g. "My Project!" -> "my-project"
16
+ */
17
+ export function labelToFilename(label: string): string {
18
+ return (
19
+ label
20
+ .toLowerCase()
21
+ .replace(/[^a-z0-9\s-]/g, '')
22
+ .replace(/\s+/g, '-')
23
+ .replace(/-+/g, '-')
24
+ .replace(/^-|-$/g, '') || 'untitled'
25
+ )
26
+ }
27
+
28
+ /**
29
+ * Convert a filename back to a label (best-effort).
30
+ * e.g. "my-project" -> "my project", "my-project~a3f2" -> "my project"
31
+ */
32
+ export function fsFilenameToLabel(filename: string): string {
33
+ // Strip collision suffix
34
+ const cleaned = filename.replace(/~[a-z0-9]{4}$/, '')
35
+ return cleaned.replace(/-/g, ' ')
36
+ }
37
+
38
+ /**
39
+ * Check if a doc has children in the tree (needs _index.md convention).
40
+ */
41
+ export function hasChildren(docId: string, treeData: Record<string, FsTreeEntry>): boolean {
42
+ for (const entry of Object.values(treeData)) {
43
+ if (entry.parentId === docId) return true
44
+ }
45
+ return false
46
+ }
47
+
48
+ /**
49
+ * Resolve filename collisions by appending ~XXXX (first 4 chars of docId).
50
+ * Returns the filename (without extension) that should be used.
51
+ */
52
+ function resolveCollision(
53
+ desiredFilename: string,
54
+ docId: string,
55
+ parentPath: string,
56
+ manifest: FsSyncManifest,
57
+ isIndex: boolean
58
+ ): string {
59
+ const ext = '.md'
60
+ const desiredRelative = isIndex
61
+ ? `${parentPath}${parentPath ? '/' : ''}${desiredFilename}/_index${ext}`
62
+ : `${parentPath}${parentPath ? '/' : ''}${desiredFilename}${ext}`
63
+
64
+ // Check if any other doc already claims this path
65
+ for (const [entryDocId, entry] of Object.entries(manifest.entries)) {
66
+ if (entryDocId === docId) continue
67
+ if (entry.relativePath === desiredRelative) {
68
+ // Collision — append disambiguator
69
+ return `${desiredFilename}~${docId.substring(0, 4)}`
70
+ }
71
+ }
72
+
73
+ return desiredFilename
74
+ }
75
+
76
+ /**
77
+ * Build the relative path for a document (from syncDir root).
78
+ * Handles:
79
+ * - Ancestor chain walking
80
+ * - _index.md for docs with children
81
+ * - Collision resolution via ~XXXX suffix
82
+ */
83
+ export function buildRelativePath(
84
+ docId: string,
85
+ treeData: Record<string, FsTreeEntry>,
86
+ manifest: FsSyncManifest
87
+ ): string {
88
+ const entry = treeData[docId]
89
+ if (!entry) return `${docId}.md` // fallback for unknown docs
90
+
91
+ // Walk up the ancestor chain to build path segments
92
+ const segments: string[] = []
93
+ let current: FsTreeEntry | undefined = entry
94
+ let currentId = docId
95
+
96
+ const visited = new Set<string>()
97
+ while (current) {
98
+ if (visited.has(currentId)) break // circular reference guard
99
+ visited.add(currentId)
100
+
101
+ segments.unshift(labelToFilename(current.label))
102
+ if (!current.parentId) break
103
+ currentId = current.parentId
104
+ current = treeData[currentId]
105
+ }
106
+
107
+ // The last segment is the doc itself; everything before is parent dirs
108
+ const filename = segments.pop()!
109
+ const parentPath = segments.join('/')
110
+
111
+ // Check if this doc needs _index.md (has children)
112
+ const isIndex = hasChildren(docId, treeData)
113
+
114
+ // Resolve collisions
115
+ const resolvedFilename = resolveCollision(filename, docId, parentPath, manifest, isIndex)
116
+
117
+ if (isIndex) {
118
+ return `${parentPath}${parentPath ? '/' : ''}${resolvedFilename}/_index.md`
119
+ }
120
+ return `${parentPath}${parentPath ? '/' : ''}${resolvedFilename}.md`
121
+ }
122
+
123
+ /**
124
+ * Get the directory portion of a relative path for a doc.
125
+ * For _index.md docs: returns the directory containing _index.md
126
+ * For leaf docs: returns the parent directory
127
+ */
128
+ export function getDocDir(relativePath: string): string {
129
+ if (relativePath.endsWith('/_index.md')) {
130
+ // e.g. "projects/my-project/_index.md" -> "projects/my-project"
131
+ return relativePath.replace('/_index.md', '')
132
+ }
133
+ // e.g. "projects/my-project/task.md" -> "projects/my-project"
134
+ const lastSlash = relativePath.lastIndexOf('/')
135
+ return lastSlash >= 0 ? relativePath.substring(0, lastSlash) : ''
136
+ }
137
+
138
+ /**
139
+ * Determine the parent docId from a filesystem relative path by walking the
140
+ * path segments and matching against the tree.
141
+ */
142
+ export function resolveParentFromPath(
143
+ relativePath: string,
144
+ treeData: Record<string, FsTreeEntry>
145
+ ): string | null {
146
+ // Strip filename to get directory parts
147
+ const parts = relativePath.split('/')
148
+ parts.pop() // remove filename
149
+
150
+ // For _index.md, also remove the doc's own directory name
151
+ if (relativePath.endsWith('/_index.md') && parts.length > 0) {
152
+ parts.pop()
153
+ }
154
+
155
+ if (parts.length === 0) return null // root-level doc
156
+
157
+ let parentId: string | null = null
158
+ for (const segment of parts) {
159
+ const found = Object.entries(treeData).find(
160
+ ([, e]) => labelToFilename(e.label) === segment && e.parentId === parentId
161
+ )
162
+ if (found) {
163
+ parentId = found[0]
164
+ } else {
165
+ // Can't resolve further — return last known parent
166
+ break
167
+ }
168
+ }
169
+ return parentId
170
+ }
171
+
172
+ /**
173
+ * Simple string hash (same as current useFsSync).
174
+ */
175
+ export function simpleHash(str: string): string {
176
+ let hash = 0
177
+ for (let i = 0; i < str.length; i++) {
178
+ hash = ((hash << 5) - hash + str.charCodeAt(i)) | 0
179
+ }
180
+ return hash.toString(36)
181
+ }
182
+
183
+ /**
184
+ * Get all tree data as a flat record.
185
+ */
186
+ export function getTreeData(treeMap: any): Record<string, FsTreeEntry> {
187
+ const data: Record<string, FsTreeEntry> = {}
188
+ treeMap.forEach((val: any, key: string) => {
189
+ if (val && typeof val === 'object') {
190
+ data[key] = val as FsTreeEntry
191
+ }
192
+ })
193
+ return data
194
+ }
195
+
196
+ /**
197
+ * Find the next order value for a given parent (max sibling order + 1).
198
+ */
199
+ export function nextOrder(treeData: Record<string, FsTreeEntry>, parentId: string | null): number {
200
+ let max = -1
201
+ for (const entry of Object.values(treeData)) {
202
+ if (entry.parentId === parentId && entry.order > max) {
203
+ max = entry.order
204
+ }
205
+ }
206
+ return max + 1
207
+ }
@@ -0,0 +1,322 @@
1
+ import * as Y from 'yjs'
2
+
3
+ // ── HTML → Y.js converter ───────────────────────────────────────────────────
4
+ //
5
+ // Uses DOMParser (browser builtin) to parse HTML, then walks the DOM tree
6
+ // and writes TipTap-compatible Y.XmlElement nodes into the fragment.
7
+ //
8
+ // Follows the same attach-before-fill pattern as markdownToYjs.ts:
9
+ // always attach nodes to the doc before inserting text into them.
10
+
11
+ interface ActiveMarks {
12
+ bold?: true
13
+ italic?: true
14
+ code?: true
15
+ strike?: true
16
+ link?: { href: string }
17
+ }
18
+
19
+ function getTextContent(node: Node): string {
20
+ return node.textContent ?? ''
21
+ }
22
+
23
+ function langFromClass(el: Element): string {
24
+ for (const cls of Array.from(el.classList)) {
25
+ if (cls.startsWith('language-')) return cls.slice(9)
26
+ }
27
+ return ''
28
+ }
29
+
30
+ /**
31
+ * Collect inline marks from a DOM element's ancestry.
32
+ * Returns merged marks object accumulated from the provided stack.
33
+ */
34
+ function mergeMarks(stack: ActiveMarks[]): ActiveMarks {
35
+ const merged: ActiveMarks = {}
36
+ for (const m of stack) {
37
+ if (m.bold) merged.bold = true
38
+ if (m.italic) merged.italic = true
39
+ if (m.code) merged.code = true
40
+ if (m.strike) merged.strike = true
41
+ if (m.link) merged.link = m.link
42
+ }
43
+ return merged
44
+ }
45
+
46
+ interface TextRun {
47
+ text: string
48
+ marks: ActiveMarks
49
+ }
50
+
51
+ /** Walk an inline subtree and collect { text, marks } runs. */
52
+ function collectInlineRuns(node: Node, markStack: ActiveMarks[]): TextRun[] {
53
+ const runs: TextRun[] = []
54
+
55
+ if (node.nodeType === Node.TEXT_NODE) {
56
+ const text = node.textContent ?? ''
57
+ if (text) runs.push({ text, marks: mergeMarks(markStack) })
58
+ return runs
59
+ }
60
+
61
+ if (node.nodeType !== Node.ELEMENT_NODE) return runs
62
+
63
+ const el = node as Element
64
+ const tag = el.tagName.toLowerCase()
65
+
66
+ const newMarks: ActiveMarks = {}
67
+ if (tag === 'strong' || tag === 'b') newMarks.bold = true
68
+ if (tag === 'em' || tag === 'i') newMarks.italic = true
69
+ if (tag === 'code') newMarks.code = true
70
+ if (tag === 's' || tag === 'del' || tag === 'strike') newMarks.strike = true
71
+ if (tag === 'a') {
72
+ const href = el.getAttribute('href')
73
+ if (href) newMarks.link = { href }
74
+ }
75
+
76
+ const nextStack = Object.keys(newMarks).length ? [...markStack, newMarks] : markStack
77
+
78
+ for (const child of Array.from(el.childNodes)) {
79
+ runs.push(...collectInlineRuns(child, nextStack))
80
+ }
81
+ return runs
82
+ }
83
+
84
+ /** Fill an already-attached Y.XmlElement with inline text runs. */
85
+ function fillInlineRuns(paraEl: Y.XmlElement, runs: TextRun[]): void {
86
+ if (!runs.length) return
87
+ const xtNodes = runs.map(() => new Y.XmlText())
88
+ paraEl.insert(0, xtNodes)
89
+ runs.forEach((run, i) => {
90
+ const attrs: Record<string, boolean | object> = {}
91
+ if (run.marks.bold) attrs['bold'] = true
92
+ if (run.marks.italic) attrs['italic'] = true
93
+ if (run.marks.code) attrs['code'] = true
94
+ if (run.marks.strike) attrs['strike'] = true
95
+ if (run.marks.link) attrs['link'] = run.marks.link
96
+ if (Object.keys(attrs).length) {
97
+ xtNodes[i]!.insert(0, run.text, attrs)
98
+ } else {
99
+ xtNodes[i]!.insert(0, run.text)
100
+ }
101
+ })
102
+ }
103
+
104
+ /** Convert a single block-level DOM element to Y.XmlElement(s) and append to `container`. */
105
+ function convertBlockElement(el: Element, container: Y.XmlElement | Y.XmlFragment): void {
106
+ const tag = el.tagName.toLowerCase()
107
+
108
+ // fileBlock: DOMSerializer renders <div data-type="file-block" uploadid="..." ...>
109
+ // HTML lowercases attribute names; read in lowercase, store in camelCase for Y.js
110
+ if (tag === 'div' && el.getAttribute('data-type') === 'file-block') {
111
+ const fileBlockEl = new Y.XmlElement('fileBlock')
112
+ container.insert(container.length, [fileBlockEl])
113
+ const uploadId = el.getAttribute('uploadid') ?? ''
114
+ const docId = el.getAttribute('docid') ?? ''
115
+ const filename = el.getAttribute('filename') ?? ''
116
+ const mimeType = el.getAttribute('mimetype') ?? ''
117
+ if (uploadId) fileBlockEl.setAttribute('uploadId', uploadId)
118
+ if (docId) fileBlockEl.setAttribute('docId', docId)
119
+ if (filename) fileBlockEl.setAttribute('filename', filename)
120
+ if (mimeType) fileBlockEl.setAttribute('mimeType', mimeType)
121
+ return
122
+ }
123
+
124
+ // Headings
125
+ const headingMatch = tag.match(/^h([1-6])$/)
126
+ if (headingMatch) {
127
+ const level = parseInt(headingMatch[1]!)
128
+ const headingEl = new Y.XmlElement('heading')
129
+ container.insert(container.length, [headingEl])
130
+ headingEl.setAttribute('level', level as unknown as string)
131
+ fillInlineRuns(headingEl, collectInlineRuns(el, []))
132
+ return
133
+ }
134
+
135
+ if (tag === 'p') {
136
+ const paraEl = new Y.XmlElement('paragraph')
137
+ container.insert(container.length, [paraEl])
138
+ fillInlineRuns(paraEl, collectInlineRuns(el, []))
139
+ return
140
+ }
141
+
142
+ if (tag === 'hr') {
143
+ const hrEl = new Y.XmlElement('horizontalRule')
144
+ container.insert(container.length, [hrEl])
145
+ return
146
+ }
147
+
148
+ if (tag === 'pre') {
149
+ const codeEl = el.querySelector('code')
150
+ const codeBlock = new Y.XmlElement('codeBlock')
151
+ container.insert(container.length, [codeBlock])
152
+ const lang = codeEl ? langFromClass(codeEl) : ''
153
+ if (lang) codeBlock.setAttribute('language', lang)
154
+ const xt = new Y.XmlText()
155
+ codeBlock.insert(0, [xt])
156
+ xt.insert(0, (codeEl ?? el).textContent ?? '')
157
+ return
158
+ }
159
+
160
+ if (tag === 'blockquote') {
161
+ const bqEl = new Y.XmlElement('blockquote')
162
+ container.insert(container.length, [bqEl])
163
+ // Wrap text in a paragraph
164
+ const paraEl = new Y.XmlElement('paragraph')
165
+ bqEl.insert(0, [paraEl])
166
+ fillInlineRuns(paraEl, collectInlineRuns(el, []))
167
+ return
168
+ }
169
+
170
+ if (tag === 'ul' || tag === 'ol') {
171
+ // Check if any li contains a checkbox → taskList
172
+ const items = Array.from(el.querySelectorAll(':scope > li'))
173
+ const hasCheckbox = items.some(li => li.querySelector('input[type="checkbox"]'))
174
+
175
+ if (hasCheckbox) {
176
+ const taskListEl = new Y.XmlElement('taskList')
177
+ container.insert(container.length, [taskListEl])
178
+ const taskItemEls = items.map(() => new Y.XmlElement('taskItem'))
179
+ taskListEl.insert(0, taskItemEls)
180
+ items.forEach((li, i) => {
181
+ const checkbox = li.querySelector('input[type="checkbox"]') as HTMLInputElement | null
182
+ const checked = checkbox?.checked ?? false
183
+ taskItemEls[i]!.setAttribute('checked', checked as unknown as string)
184
+ const paraEl = new Y.XmlElement('paragraph')
185
+ taskItemEls[i]!.insert(0, [paraEl])
186
+ // Remove checkbox from text extraction
187
+ const clone = li.cloneNode(true) as Element
188
+ clone.querySelector('input')?.remove()
189
+ fillInlineRuns(paraEl, collectInlineRuns(clone, []))
190
+ })
191
+ } else {
192
+ const listType = tag === 'ul' ? 'bulletList' : 'orderedList'
193
+ const listEl = new Y.XmlElement(listType)
194
+ container.insert(container.length, [listEl])
195
+ const listItemEls = items.map(() => new Y.XmlElement('listItem'))
196
+ listEl.insert(0, listItemEls)
197
+ items.forEach((li, i) => {
198
+ const paraEl = new Y.XmlElement('paragraph')
199
+ listItemEls[i]!.insert(0, [paraEl])
200
+ fillInlineRuns(paraEl, collectInlineRuns(li, []))
201
+ })
202
+ }
203
+ return
204
+ }
205
+
206
+ if (tag === 'table') {
207
+ const tableEl = new Y.XmlElement('table')
208
+ container.insert(container.length, [tableEl])
209
+
210
+ const rows = Array.from(el.querySelectorAll('tr'))
211
+ const rowEls = rows.map(() => new Y.XmlElement('tableRow'))
212
+ tableEl.insert(0, rowEls)
213
+
214
+ rows.forEach((row, ri) => {
215
+ const cells = Array.from(row.querySelectorAll('th, td'))
216
+ const isHeader = cells.some(c => c.tagName.toLowerCase() === 'th')
217
+ const cellType = isHeader ? 'tableHeader' : 'tableCell'
218
+ const cellEls = cells.map(() => new Y.XmlElement(cellType))
219
+ rowEls[ri]!.insert(0, cellEls)
220
+ cells.forEach((cell, ci) => {
221
+ const paraEl = new Y.XmlElement('paragraph')
222
+ cellEls[ci]!.insert(0, [paraEl])
223
+ fillInlineRuns(paraEl, collectInlineRuns(cell, []))
224
+ })
225
+ })
226
+ return
227
+ }
228
+
229
+ // Fallback: treat as paragraph (div, section, article, etc.)
230
+ const text = getTextContent(el).trim()
231
+ if (text) {
232
+ const paraEl = new Y.XmlElement('paragraph')
233
+ container.insert(container.length, [paraEl])
234
+ fillInlineRuns(paraEl, collectInlineRuns(el, []))
235
+ }
236
+ }
237
+
238
+ // ── Public API ───────────────────────────────────────────────────────────────
239
+
240
+ /**
241
+ * Parses an HTML string and writes the result into a Y.XmlFragment that
242
+ * TipTap's Collaboration extension can read.
243
+ *
244
+ * @param fragment The target `Y.Doc.getXmlFragment('default')`
245
+ * @param html Raw HTML string
246
+ * @param fallbackTitle Used when no <title> or <h1> is found
247
+ */
248
+ export function populateYDocFromHtml(
249
+ fragment: Y.XmlFragment,
250
+ html: string,
251
+ fallbackTitle = 'Untitled'
252
+ ): void {
253
+ const ydoc = fragment.doc
254
+ if (!ydoc) {
255
+ console.warn('[htmlToYjs] fragment has no doc — skipping population')
256
+ return
257
+ }
258
+
259
+ const doc = new DOMParser().parseFromString(html, 'text/html')
260
+
261
+ // Determine title
262
+ let title = doc.title?.trim() || fallbackTitle
263
+ const firstH1 = doc.body.querySelector('h1')
264
+ if (firstH1) title = firstH1.textContent?.trim() || title
265
+
266
+ ydoc.transact(() => {
267
+ const headerEl = new Y.XmlElement('documentHeader')
268
+ const metaEl = new Y.XmlElement('documentMeta')
269
+ fragment.insert(0, [headerEl, metaEl])
270
+
271
+ const headerXt = new Y.XmlText()
272
+ headerEl.insert(0, [headerXt])
273
+ headerXt.insert(0, title)
274
+
275
+ // Walk body block children
276
+ const blockTags = new Set([
277
+ 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
278
+ 'ul', 'ol', 'pre', 'blockquote', 'table', 'hr',
279
+ 'div', 'section', 'article', 'header', 'footer', 'main', 'aside'
280
+ ])
281
+
282
+ let hasContent = false
283
+ for (const child of Array.from(doc.body.children)) {
284
+ const tag = child.tagName.toLowerCase()
285
+ if (!blockTags.has(tag)) continue
286
+ // Skip first h1 (already used as title)
287
+ if (tag === 'h1' && child === firstH1) continue
288
+ convertBlockElement(child, fragment)
289
+ hasContent = true
290
+ }
291
+
292
+ // Ensure at least one paragraph exists
293
+ if (!hasContent) {
294
+ const paraEl = new Y.XmlElement('paragraph')
295
+ fragment.insert(fragment.length, [paraEl])
296
+ }
297
+ })
298
+ }
299
+
300
+ /**
301
+ * Appends blocks from an HTML string to an existing Y.XmlFragment.
302
+ * Does NOT insert documentHeader/documentMeta — for appending to an existing doc.
303
+ */
304
+ export function appendHtmlToFragment(fragment: Y.XmlFragment, html: string): void {
305
+ const ydoc = fragment.doc
306
+ if (!ydoc) {
307
+ console.warn('[htmlToYjs] appendHtmlToFragment: fragment has no doc — skipping')
308
+ return
309
+ }
310
+ const doc = new DOMParser().parseFromString(html, 'text/html')
311
+ const blockTags = new Set([
312
+ 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
313
+ 'ul', 'ol', 'pre', 'blockquote', 'table', 'hr',
314
+ 'div', 'section', 'article', 'header', 'footer', 'main', 'aside'
315
+ ])
316
+ ydoc.transact(() => {
317
+ for (const child of Array.from(doc.body.children)) {
318
+ if (blockTags.has(child.tagName.toLowerCase()))
319
+ convertBlockElement(child, fragment)
320
+ }
321
+ })
322
+ }