@abraca/convert 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/abracadabra-convert.cjs +3237 -0
- package/dist/abracadabra-convert.cjs.map +1 -0
- package/dist/abracadabra-convert.esm.js +3163 -0
- package/dist/abracadabra-convert.esm.js.map +1 -0
- package/dist/index.d.ts +356 -0
- package/package.json +41 -0
- package/src/diff.ts +302 -0
- package/src/file-blocks/manifest.ts +169 -0
- package/src/file-blocks/paths.ts +207 -0
- package/src/html-to-yjs.ts +322 -0
- package/src/index.ts +103 -0
- package/src/markdown-to-yjs.ts +1208 -0
- package/src/spec/index.ts +7 -0
- package/src/spec/marks.ts +92 -0
- package/src/spec/nodes.ts +333 -0
- package/src/spec/universal-meta.ts +147 -0
- package/src/types.ts +89 -0
- package/src/yjs-to-markdown.ts +820 -0
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
// ── Manifest: docId <-> filesystem path mapping ─────────────────────────────
|
|
2
|
+
//
|
|
3
|
+
// The manifest tracks which doc on the server maps to which file on
|
|
4
|
+
// disk, plus per-doc upload (binary attachment) records and content
|
|
5
|
+
// hashes for change detection.
|
|
6
|
+
//
|
|
7
|
+
// Filesystem I/O is INJECTABLE so Node hosts (memfs for tests, fs/promises
|
|
8
|
+
// for Electron, @tauri-apps/plugin-fs for Tauri) can all use this code
|
|
9
|
+
// without the package taking a hard dep on any one. If no adapter has
|
|
10
|
+
// been wired and we're inside Tauri, we lazily fall back to the Tauri
|
|
11
|
+
// plugin. Pure-test setups should always call `setFsAdapter()` first.
|
|
12
|
+
|
|
13
|
+
export interface FsAdapter {
|
|
14
|
+
readTextFile: (path: string) => Promise<string>
|
|
15
|
+
writeTextFile: (path: string, contents: string) => Promise<void>
|
|
16
|
+
mkdir: (path: string, options?: { recursive?: boolean }) => Promise<void>
|
|
17
|
+
readBinaryFile?: (path: string) => Promise<Uint8Array>
|
|
18
|
+
writeBinaryFile?: (path: string, contents: Uint8Array) => Promise<void>
|
|
19
|
+
exists?: (path: string) => Promise<boolean>
|
|
20
|
+
remove?: (path: string) => Promise<void>
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
let _adapter: FsAdapter | null = null
|
|
24
|
+
|
|
25
|
+
/** Install a filesystem adapter. Call this once at boot. */
|
|
26
|
+
export function setFsAdapter(adapter: FsAdapter): void {
|
|
27
|
+
_adapter = adapter
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/** Read back the active adapter — useful for tests and for layered code. */
|
|
31
|
+
export function getFsAdapter(): FsAdapter | null {
|
|
32
|
+
return _adapter
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
async function loadFsApi(): Promise<FsAdapter> {
|
|
36
|
+
if (_adapter) return _adapter
|
|
37
|
+
throw new Error(
|
|
38
|
+
'@abraca/convert: no FsAdapter installed. '
|
|
39
|
+
+ 'Call setFsAdapter({readTextFile, writeTextFile, mkdir, …}) at boot. '
|
|
40
|
+
+ 'Tauri hosts can pass `@tauri-apps/plugin-fs` directly; Node hosts can pass '
|
|
41
|
+
+ 'a wrapper over `fs/promises`; tests can use the in-memory adapter from '
|
|
42
|
+
+ 'tests/file-blocks.test.ts as a template.',
|
|
43
|
+
)
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// ── Types ────────────────────────────────────────────────────────────────────
|
|
47
|
+
|
|
48
|
+
export interface UploadManifestEntry {
|
|
49
|
+
uploadId: string
|
|
50
|
+
filename: string
|
|
51
|
+
relativePath: string // e.g. "_files/abc123/photo.jpg"
|
|
52
|
+
contentHash: string
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export interface ManifestEntry {
|
|
56
|
+
docId: string
|
|
57
|
+
relativePath: string // e.g. "projects/my-project/_index.md"
|
|
58
|
+
contentHash: string // hash of last-written/read markdown
|
|
59
|
+
lastWrittenAt: number
|
|
60
|
+
lastReadAt: number
|
|
61
|
+
uploads: UploadManifestEntry[]
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
export interface FsSyncManifest {
|
|
65
|
+
version: 2
|
|
66
|
+
spaceId: string
|
|
67
|
+
lastSyncAt: number
|
|
68
|
+
entries: Record<string, ManifestEntry> // keyed by docId
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// ── CRUD ─────────────────────────────────────────────────────────────────────
|
|
72
|
+
|
|
73
|
+
const MANIFEST_DIR = '.abracadabra'
|
|
74
|
+
const MANIFEST_FILE = 'manifest.json'
|
|
75
|
+
|
|
76
|
+
function manifestPath(syncDir: string): string {
|
|
77
|
+
return `${syncDir}/${MANIFEST_DIR}/${MANIFEST_FILE}`
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export function manifestDir(syncDir: string): string {
|
|
81
|
+
return `${syncDir}/${MANIFEST_DIR}`
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
export function trashDir(syncDir: string): string {
|
|
85
|
+
return `${syncDir}/${MANIFEST_DIR}/trash`
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
export function orphansDir(syncDir: string): string {
|
|
89
|
+
return `${syncDir}/${MANIFEST_DIR}/orphans`
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
export function conflictsDir(syncDir: string): string {
|
|
93
|
+
return `${syncDir}/${MANIFEST_DIR}/conflicts`
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
export function createEmptyManifest(spaceId: string): FsSyncManifest {
|
|
97
|
+
return {
|
|
98
|
+
version: 2,
|
|
99
|
+
spaceId,
|
|
100
|
+
lastSyncAt: 0,
|
|
101
|
+
entries: {}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
export async function loadManifest(syncDir: string, spaceId: string): Promise<FsSyncManifest> {
|
|
106
|
+
const fs = await loadFsApi()
|
|
107
|
+
try {
|
|
108
|
+
const raw = await fs.readTextFile(manifestPath(syncDir))
|
|
109
|
+
const parsed = JSON.parse(raw) as FsSyncManifest
|
|
110
|
+
if (parsed.version === 2) return parsed
|
|
111
|
+
}
|
|
112
|
+
catch {
|
|
113
|
+
// missing or corrupt — return fresh
|
|
114
|
+
}
|
|
115
|
+
return createEmptyManifest(spaceId)
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
export async function saveManifest(syncDir: string, manifest: FsSyncManifest): Promise<void> {
|
|
119
|
+
const fs = await loadFsApi()
|
|
120
|
+
const dir = `${syncDir}/${MANIFEST_DIR}`
|
|
121
|
+
try {
|
|
122
|
+
await fs.mkdir(dir, { recursive: true })
|
|
123
|
+
}
|
|
124
|
+
catch {
|
|
125
|
+
// already exists
|
|
126
|
+
}
|
|
127
|
+
manifest.lastSyncAt = Date.now()
|
|
128
|
+
await fs.writeTextFile(manifestPath(syncDir), JSON.stringify(manifest, null, 2))
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// ── Lookups ──────────────────────────────────────────────────────────────────
|
|
132
|
+
|
|
133
|
+
export function lookupByDocId(manifest: FsSyncManifest, docId: string): ManifestEntry | undefined {
|
|
134
|
+
return manifest.entries[docId]
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
export function lookupByPath(manifest: FsSyncManifest, relativePath: string): ManifestEntry | undefined {
|
|
138
|
+
for (const entry of Object.values(manifest.entries)) {
|
|
139
|
+
if (entry.relativePath === relativePath) return entry
|
|
140
|
+
}
|
|
141
|
+
return undefined
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
export function lookupByHash(manifest: FsSyncManifest, contentHash: string): ManifestEntry | undefined {
|
|
145
|
+
for (const entry of Object.values(manifest.entries)) {
|
|
146
|
+
if (entry.contentHash === contentHash) return entry
|
|
147
|
+
}
|
|
148
|
+
return undefined
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
export function setEntry(manifest: FsSyncManifest, entry: ManifestEntry): void {
|
|
152
|
+
manifest.entries[entry.docId] = entry
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
export function removeEntry(manifest: FsSyncManifest, docId: string): ManifestEntry | undefined {
|
|
156
|
+
const entry = manifest.entries[docId]
|
|
157
|
+
if (entry) delete manifest.entries[docId]
|
|
158
|
+
return entry
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// ── Build reverse lookup (path -> docId) ─────────────────────────────────────
|
|
162
|
+
|
|
163
|
+
export function buildReverseLookup(manifest: FsSyncManifest): Map<string, string> {
|
|
164
|
+
const map = new Map<string, string>()
|
|
165
|
+
for (const [docId, entry] of Object.entries(manifest.entries)) {
|
|
166
|
+
map.set(entry.relativePath, docId)
|
|
167
|
+
}
|
|
168
|
+
return map
|
|
169
|
+
}
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
// ── Collision-safe path building for FS sync ─────────────────────────────────
|
|
2
|
+
|
|
3
|
+
import type { FsSyncManifest } from './manifest.ts'
|
|
4
|
+
|
|
5
|
+
export interface FsTreeEntry {
|
|
6
|
+
label: string
|
|
7
|
+
parentId: string | null
|
|
8
|
+
order: number
|
|
9
|
+
type?: string
|
|
10
|
+
meta?: any
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Convert a document label to a filesystem-safe filename (without extension).
|
|
15
|
+
* e.g. "My Project!" -> "my-project"
|
|
16
|
+
*/
|
|
17
|
+
export function labelToFilename(label: string): string {
|
|
18
|
+
return (
|
|
19
|
+
label
|
|
20
|
+
.toLowerCase()
|
|
21
|
+
.replace(/[^a-z0-9\s-]/g, '')
|
|
22
|
+
.replace(/\s+/g, '-')
|
|
23
|
+
.replace(/-+/g, '-')
|
|
24
|
+
.replace(/^-|-$/g, '') || 'untitled'
|
|
25
|
+
)
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Convert a filename back to a label (best-effort).
|
|
30
|
+
* e.g. "my-project" -> "my project", "my-project~a3f2" -> "my project"
|
|
31
|
+
*/
|
|
32
|
+
export function fsFilenameToLabel(filename: string): string {
|
|
33
|
+
// Strip collision suffix
|
|
34
|
+
const cleaned = filename.replace(/~[a-z0-9]{4}$/, '')
|
|
35
|
+
return cleaned.replace(/-/g, ' ')
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Check if a doc has children in the tree (needs _index.md convention).
|
|
40
|
+
*/
|
|
41
|
+
export function hasChildren(docId: string, treeData: Record<string, FsTreeEntry>): boolean {
|
|
42
|
+
for (const entry of Object.values(treeData)) {
|
|
43
|
+
if (entry.parentId === docId) return true
|
|
44
|
+
}
|
|
45
|
+
return false
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Resolve filename collisions by appending ~XXXX (first 4 chars of docId).
|
|
50
|
+
* Returns the filename (without extension) that should be used.
|
|
51
|
+
*/
|
|
52
|
+
function resolveCollision(
|
|
53
|
+
desiredFilename: string,
|
|
54
|
+
docId: string,
|
|
55
|
+
parentPath: string,
|
|
56
|
+
manifest: FsSyncManifest,
|
|
57
|
+
isIndex: boolean
|
|
58
|
+
): string {
|
|
59
|
+
const ext = '.md'
|
|
60
|
+
const desiredRelative = isIndex
|
|
61
|
+
? `${parentPath}${parentPath ? '/' : ''}${desiredFilename}/_index${ext}`
|
|
62
|
+
: `${parentPath}${parentPath ? '/' : ''}${desiredFilename}${ext}`
|
|
63
|
+
|
|
64
|
+
// Check if any other doc already claims this path
|
|
65
|
+
for (const [entryDocId, entry] of Object.entries(manifest.entries)) {
|
|
66
|
+
if (entryDocId === docId) continue
|
|
67
|
+
if (entry.relativePath === desiredRelative) {
|
|
68
|
+
// Collision — append disambiguator
|
|
69
|
+
return `${desiredFilename}~${docId.substring(0, 4)}`
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
return desiredFilename
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Build the relative path for a document (from syncDir root).
|
|
78
|
+
* Handles:
|
|
79
|
+
* - Ancestor chain walking
|
|
80
|
+
* - _index.md for docs with children
|
|
81
|
+
* - Collision resolution via ~XXXX suffix
|
|
82
|
+
*/
|
|
83
|
+
export function buildRelativePath(
|
|
84
|
+
docId: string,
|
|
85
|
+
treeData: Record<string, FsTreeEntry>,
|
|
86
|
+
manifest: FsSyncManifest
|
|
87
|
+
): string {
|
|
88
|
+
const entry = treeData[docId]
|
|
89
|
+
if (!entry) return `${docId}.md` // fallback for unknown docs
|
|
90
|
+
|
|
91
|
+
// Walk up the ancestor chain to build path segments
|
|
92
|
+
const segments: string[] = []
|
|
93
|
+
let current: FsTreeEntry | undefined = entry
|
|
94
|
+
let currentId = docId
|
|
95
|
+
|
|
96
|
+
const visited = new Set<string>()
|
|
97
|
+
while (current) {
|
|
98
|
+
if (visited.has(currentId)) break // circular reference guard
|
|
99
|
+
visited.add(currentId)
|
|
100
|
+
|
|
101
|
+
segments.unshift(labelToFilename(current.label))
|
|
102
|
+
if (!current.parentId) break
|
|
103
|
+
currentId = current.parentId
|
|
104
|
+
current = treeData[currentId]
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// The last segment is the doc itself; everything before is parent dirs
|
|
108
|
+
const filename = segments.pop()!
|
|
109
|
+
const parentPath = segments.join('/')
|
|
110
|
+
|
|
111
|
+
// Check if this doc needs _index.md (has children)
|
|
112
|
+
const isIndex = hasChildren(docId, treeData)
|
|
113
|
+
|
|
114
|
+
// Resolve collisions
|
|
115
|
+
const resolvedFilename = resolveCollision(filename, docId, parentPath, manifest, isIndex)
|
|
116
|
+
|
|
117
|
+
if (isIndex) {
|
|
118
|
+
return `${parentPath}${parentPath ? '/' : ''}${resolvedFilename}/_index.md`
|
|
119
|
+
}
|
|
120
|
+
return `${parentPath}${parentPath ? '/' : ''}${resolvedFilename}.md`
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Get the directory portion of a relative path for a doc.
|
|
125
|
+
* For _index.md docs: returns the directory containing _index.md
|
|
126
|
+
* For leaf docs: returns the parent directory
|
|
127
|
+
*/
|
|
128
|
+
export function getDocDir(relativePath: string): string {
|
|
129
|
+
if (relativePath.endsWith('/_index.md')) {
|
|
130
|
+
// e.g. "projects/my-project/_index.md" -> "projects/my-project"
|
|
131
|
+
return relativePath.replace('/_index.md', '')
|
|
132
|
+
}
|
|
133
|
+
// e.g. "projects/my-project/task.md" -> "projects/my-project"
|
|
134
|
+
const lastSlash = relativePath.lastIndexOf('/')
|
|
135
|
+
return lastSlash >= 0 ? relativePath.substring(0, lastSlash) : ''
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Determine the parent docId from a filesystem relative path by walking the
|
|
140
|
+
* path segments and matching against the tree.
|
|
141
|
+
*/
|
|
142
|
+
export function resolveParentFromPath(
|
|
143
|
+
relativePath: string,
|
|
144
|
+
treeData: Record<string, FsTreeEntry>
|
|
145
|
+
): string | null {
|
|
146
|
+
// Strip filename to get directory parts
|
|
147
|
+
const parts = relativePath.split('/')
|
|
148
|
+
parts.pop() // remove filename
|
|
149
|
+
|
|
150
|
+
// For _index.md, also remove the doc's own directory name
|
|
151
|
+
if (relativePath.endsWith('/_index.md') && parts.length > 0) {
|
|
152
|
+
parts.pop()
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
if (parts.length === 0) return null // root-level doc
|
|
156
|
+
|
|
157
|
+
let parentId: string | null = null
|
|
158
|
+
for (const segment of parts) {
|
|
159
|
+
const found = Object.entries(treeData).find(
|
|
160
|
+
([, e]) => labelToFilename(e.label) === segment && e.parentId === parentId
|
|
161
|
+
)
|
|
162
|
+
if (found) {
|
|
163
|
+
parentId = found[0]
|
|
164
|
+
} else {
|
|
165
|
+
// Can't resolve further — return last known parent
|
|
166
|
+
break
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
return parentId
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Simple string hash (same as current useFsSync).
|
|
174
|
+
*/
|
|
175
|
+
export function simpleHash(str: string): string {
|
|
176
|
+
let hash = 0
|
|
177
|
+
for (let i = 0; i < str.length; i++) {
|
|
178
|
+
hash = ((hash << 5) - hash + str.charCodeAt(i)) | 0
|
|
179
|
+
}
|
|
180
|
+
return hash.toString(36)
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Get all tree data as a flat record.
|
|
185
|
+
*/
|
|
186
|
+
export function getTreeData(treeMap: any): Record<string, FsTreeEntry> {
|
|
187
|
+
const data: Record<string, FsTreeEntry> = {}
|
|
188
|
+
treeMap.forEach((val: any, key: string) => {
|
|
189
|
+
if (val && typeof val === 'object') {
|
|
190
|
+
data[key] = val as FsTreeEntry
|
|
191
|
+
}
|
|
192
|
+
})
|
|
193
|
+
return data
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Find the next order value for a given parent (max sibling order + 1).
|
|
198
|
+
*/
|
|
199
|
+
export function nextOrder(treeData: Record<string, FsTreeEntry>, parentId: string | null): number {
|
|
200
|
+
let max = -1
|
|
201
|
+
for (const entry of Object.values(treeData)) {
|
|
202
|
+
if (entry.parentId === parentId && entry.order > max) {
|
|
203
|
+
max = entry.order
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
return max + 1
|
|
207
|
+
}
|
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
import * as Y from 'yjs'
|
|
2
|
+
|
|
3
|
+
// ── HTML → Y.js converter ───────────────────────────────────────────────────
|
|
4
|
+
//
|
|
5
|
+
// Uses DOMParser (browser builtin) to parse HTML, then walks the DOM tree
|
|
6
|
+
// and writes TipTap-compatible Y.XmlElement nodes into the fragment.
|
|
7
|
+
//
|
|
8
|
+
// Follows the same attach-before-fill pattern as markdownToYjs.ts:
|
|
9
|
+
// always attach nodes to the doc before inserting text into them.
|
|
10
|
+
|
|
11
|
+
interface ActiveMarks {
|
|
12
|
+
bold?: true
|
|
13
|
+
italic?: true
|
|
14
|
+
code?: true
|
|
15
|
+
strike?: true
|
|
16
|
+
link?: { href: string }
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function getTextContent(node: Node): string {
|
|
20
|
+
return node.textContent ?? ''
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function langFromClass(el: Element): string {
|
|
24
|
+
for (const cls of Array.from(el.classList)) {
|
|
25
|
+
if (cls.startsWith('language-')) return cls.slice(9)
|
|
26
|
+
}
|
|
27
|
+
return ''
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Collect inline marks from a DOM element's ancestry.
|
|
32
|
+
* Returns merged marks object accumulated from the provided stack.
|
|
33
|
+
*/
|
|
34
|
+
function mergeMarks(stack: ActiveMarks[]): ActiveMarks {
|
|
35
|
+
const merged: ActiveMarks = {}
|
|
36
|
+
for (const m of stack) {
|
|
37
|
+
if (m.bold) merged.bold = true
|
|
38
|
+
if (m.italic) merged.italic = true
|
|
39
|
+
if (m.code) merged.code = true
|
|
40
|
+
if (m.strike) merged.strike = true
|
|
41
|
+
if (m.link) merged.link = m.link
|
|
42
|
+
}
|
|
43
|
+
return merged
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
interface TextRun {
|
|
47
|
+
text: string
|
|
48
|
+
marks: ActiveMarks
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/** Walk an inline subtree and collect { text, marks } runs. */
|
|
52
|
+
function collectInlineRuns(node: Node, markStack: ActiveMarks[]): TextRun[] {
|
|
53
|
+
const runs: TextRun[] = []
|
|
54
|
+
|
|
55
|
+
if (node.nodeType === Node.TEXT_NODE) {
|
|
56
|
+
const text = node.textContent ?? ''
|
|
57
|
+
if (text) runs.push({ text, marks: mergeMarks(markStack) })
|
|
58
|
+
return runs
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
if (node.nodeType !== Node.ELEMENT_NODE) return runs
|
|
62
|
+
|
|
63
|
+
const el = node as Element
|
|
64
|
+
const tag = el.tagName.toLowerCase()
|
|
65
|
+
|
|
66
|
+
const newMarks: ActiveMarks = {}
|
|
67
|
+
if (tag === 'strong' || tag === 'b') newMarks.bold = true
|
|
68
|
+
if (tag === 'em' || tag === 'i') newMarks.italic = true
|
|
69
|
+
if (tag === 'code') newMarks.code = true
|
|
70
|
+
if (tag === 's' || tag === 'del' || tag === 'strike') newMarks.strike = true
|
|
71
|
+
if (tag === 'a') {
|
|
72
|
+
const href = el.getAttribute('href')
|
|
73
|
+
if (href) newMarks.link = { href }
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const nextStack = Object.keys(newMarks).length ? [...markStack, newMarks] : markStack
|
|
77
|
+
|
|
78
|
+
for (const child of Array.from(el.childNodes)) {
|
|
79
|
+
runs.push(...collectInlineRuns(child, nextStack))
|
|
80
|
+
}
|
|
81
|
+
return runs
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/** Fill an already-attached Y.XmlElement with inline text runs. */
|
|
85
|
+
function fillInlineRuns(paraEl: Y.XmlElement, runs: TextRun[]): void {
|
|
86
|
+
if (!runs.length) return
|
|
87
|
+
const xtNodes = runs.map(() => new Y.XmlText())
|
|
88
|
+
paraEl.insert(0, xtNodes)
|
|
89
|
+
runs.forEach((run, i) => {
|
|
90
|
+
const attrs: Record<string, boolean | object> = {}
|
|
91
|
+
if (run.marks.bold) attrs['bold'] = true
|
|
92
|
+
if (run.marks.italic) attrs['italic'] = true
|
|
93
|
+
if (run.marks.code) attrs['code'] = true
|
|
94
|
+
if (run.marks.strike) attrs['strike'] = true
|
|
95
|
+
if (run.marks.link) attrs['link'] = run.marks.link
|
|
96
|
+
if (Object.keys(attrs).length) {
|
|
97
|
+
xtNodes[i]!.insert(0, run.text, attrs)
|
|
98
|
+
} else {
|
|
99
|
+
xtNodes[i]!.insert(0, run.text)
|
|
100
|
+
}
|
|
101
|
+
})
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/** Convert a single block-level DOM element to Y.XmlElement(s) and append to `container`. */
|
|
105
|
+
function convertBlockElement(el: Element, container: Y.XmlElement | Y.XmlFragment): void {
|
|
106
|
+
const tag = el.tagName.toLowerCase()
|
|
107
|
+
|
|
108
|
+
// fileBlock: DOMSerializer renders <div data-type="file-block" uploadid="..." ...>
|
|
109
|
+
// HTML lowercases attribute names; read in lowercase, store in camelCase for Y.js
|
|
110
|
+
if (tag === 'div' && el.getAttribute('data-type') === 'file-block') {
|
|
111
|
+
const fileBlockEl = new Y.XmlElement('fileBlock')
|
|
112
|
+
container.insert(container.length, [fileBlockEl])
|
|
113
|
+
const uploadId = el.getAttribute('uploadid') ?? ''
|
|
114
|
+
const docId = el.getAttribute('docid') ?? ''
|
|
115
|
+
const filename = el.getAttribute('filename') ?? ''
|
|
116
|
+
const mimeType = el.getAttribute('mimetype') ?? ''
|
|
117
|
+
if (uploadId) fileBlockEl.setAttribute('uploadId', uploadId)
|
|
118
|
+
if (docId) fileBlockEl.setAttribute('docId', docId)
|
|
119
|
+
if (filename) fileBlockEl.setAttribute('filename', filename)
|
|
120
|
+
if (mimeType) fileBlockEl.setAttribute('mimeType', mimeType)
|
|
121
|
+
return
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// Headings
|
|
125
|
+
const headingMatch = tag.match(/^h([1-6])$/)
|
|
126
|
+
if (headingMatch) {
|
|
127
|
+
const level = parseInt(headingMatch[1]!)
|
|
128
|
+
const headingEl = new Y.XmlElement('heading')
|
|
129
|
+
container.insert(container.length, [headingEl])
|
|
130
|
+
headingEl.setAttribute('level', level as unknown as string)
|
|
131
|
+
fillInlineRuns(headingEl, collectInlineRuns(el, []))
|
|
132
|
+
return
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
if (tag === 'p') {
|
|
136
|
+
const paraEl = new Y.XmlElement('paragraph')
|
|
137
|
+
container.insert(container.length, [paraEl])
|
|
138
|
+
fillInlineRuns(paraEl, collectInlineRuns(el, []))
|
|
139
|
+
return
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
if (tag === 'hr') {
|
|
143
|
+
const hrEl = new Y.XmlElement('horizontalRule')
|
|
144
|
+
container.insert(container.length, [hrEl])
|
|
145
|
+
return
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
if (tag === 'pre') {
|
|
149
|
+
const codeEl = el.querySelector('code')
|
|
150
|
+
const codeBlock = new Y.XmlElement('codeBlock')
|
|
151
|
+
container.insert(container.length, [codeBlock])
|
|
152
|
+
const lang = codeEl ? langFromClass(codeEl) : ''
|
|
153
|
+
if (lang) codeBlock.setAttribute('language', lang)
|
|
154
|
+
const xt = new Y.XmlText()
|
|
155
|
+
codeBlock.insert(0, [xt])
|
|
156
|
+
xt.insert(0, (codeEl ?? el).textContent ?? '')
|
|
157
|
+
return
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
if (tag === 'blockquote') {
|
|
161
|
+
const bqEl = new Y.XmlElement('blockquote')
|
|
162
|
+
container.insert(container.length, [bqEl])
|
|
163
|
+
// Wrap text in a paragraph
|
|
164
|
+
const paraEl = new Y.XmlElement('paragraph')
|
|
165
|
+
bqEl.insert(0, [paraEl])
|
|
166
|
+
fillInlineRuns(paraEl, collectInlineRuns(el, []))
|
|
167
|
+
return
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
if (tag === 'ul' || tag === 'ol') {
|
|
171
|
+
// Check if any li contains a checkbox → taskList
|
|
172
|
+
const items = Array.from(el.querySelectorAll(':scope > li'))
|
|
173
|
+
const hasCheckbox = items.some(li => li.querySelector('input[type="checkbox"]'))
|
|
174
|
+
|
|
175
|
+
if (hasCheckbox) {
|
|
176
|
+
const taskListEl = new Y.XmlElement('taskList')
|
|
177
|
+
container.insert(container.length, [taskListEl])
|
|
178
|
+
const taskItemEls = items.map(() => new Y.XmlElement('taskItem'))
|
|
179
|
+
taskListEl.insert(0, taskItemEls)
|
|
180
|
+
items.forEach((li, i) => {
|
|
181
|
+
const checkbox = li.querySelector('input[type="checkbox"]') as HTMLInputElement | null
|
|
182
|
+
const checked = checkbox?.checked ?? false
|
|
183
|
+
taskItemEls[i]!.setAttribute('checked', checked as unknown as string)
|
|
184
|
+
const paraEl = new Y.XmlElement('paragraph')
|
|
185
|
+
taskItemEls[i]!.insert(0, [paraEl])
|
|
186
|
+
// Remove checkbox from text extraction
|
|
187
|
+
const clone = li.cloneNode(true) as Element
|
|
188
|
+
clone.querySelector('input')?.remove()
|
|
189
|
+
fillInlineRuns(paraEl, collectInlineRuns(clone, []))
|
|
190
|
+
})
|
|
191
|
+
} else {
|
|
192
|
+
const listType = tag === 'ul' ? 'bulletList' : 'orderedList'
|
|
193
|
+
const listEl = new Y.XmlElement(listType)
|
|
194
|
+
container.insert(container.length, [listEl])
|
|
195
|
+
const listItemEls = items.map(() => new Y.XmlElement('listItem'))
|
|
196
|
+
listEl.insert(0, listItemEls)
|
|
197
|
+
items.forEach((li, i) => {
|
|
198
|
+
const paraEl = new Y.XmlElement('paragraph')
|
|
199
|
+
listItemEls[i]!.insert(0, [paraEl])
|
|
200
|
+
fillInlineRuns(paraEl, collectInlineRuns(li, []))
|
|
201
|
+
})
|
|
202
|
+
}
|
|
203
|
+
return
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
if (tag === 'table') {
|
|
207
|
+
const tableEl = new Y.XmlElement('table')
|
|
208
|
+
container.insert(container.length, [tableEl])
|
|
209
|
+
|
|
210
|
+
const rows = Array.from(el.querySelectorAll('tr'))
|
|
211
|
+
const rowEls = rows.map(() => new Y.XmlElement('tableRow'))
|
|
212
|
+
tableEl.insert(0, rowEls)
|
|
213
|
+
|
|
214
|
+
rows.forEach((row, ri) => {
|
|
215
|
+
const cells = Array.from(row.querySelectorAll('th, td'))
|
|
216
|
+
const isHeader = cells.some(c => c.tagName.toLowerCase() === 'th')
|
|
217
|
+
const cellType = isHeader ? 'tableHeader' : 'tableCell'
|
|
218
|
+
const cellEls = cells.map(() => new Y.XmlElement(cellType))
|
|
219
|
+
rowEls[ri]!.insert(0, cellEls)
|
|
220
|
+
cells.forEach((cell, ci) => {
|
|
221
|
+
const paraEl = new Y.XmlElement('paragraph')
|
|
222
|
+
cellEls[ci]!.insert(0, [paraEl])
|
|
223
|
+
fillInlineRuns(paraEl, collectInlineRuns(cell, []))
|
|
224
|
+
})
|
|
225
|
+
})
|
|
226
|
+
return
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// Fallback: treat as paragraph (div, section, article, etc.)
|
|
230
|
+
const text = getTextContent(el).trim()
|
|
231
|
+
if (text) {
|
|
232
|
+
const paraEl = new Y.XmlElement('paragraph')
|
|
233
|
+
container.insert(container.length, [paraEl])
|
|
234
|
+
fillInlineRuns(paraEl, collectInlineRuns(el, []))
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// ── Public API ───────────────────────────────────────────────────────────────
|
|
239
|
+
|
|
240
|
+
/**
|
|
241
|
+
* Parses an HTML string and writes the result into a Y.XmlFragment that
|
|
242
|
+
* TipTap's Collaboration extension can read.
|
|
243
|
+
*
|
|
244
|
+
* @param fragment The target `Y.Doc.getXmlFragment('default')`
|
|
245
|
+
* @param html Raw HTML string
|
|
246
|
+
* @param fallbackTitle Used when no <title> or <h1> is found
|
|
247
|
+
*/
|
|
248
|
+
export function populateYDocFromHtml(
|
|
249
|
+
fragment: Y.XmlFragment,
|
|
250
|
+
html: string,
|
|
251
|
+
fallbackTitle = 'Untitled'
|
|
252
|
+
): void {
|
|
253
|
+
const ydoc = fragment.doc
|
|
254
|
+
if (!ydoc) {
|
|
255
|
+
console.warn('[htmlToYjs] fragment has no doc — skipping population')
|
|
256
|
+
return
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
const doc = new DOMParser().parseFromString(html, 'text/html')
|
|
260
|
+
|
|
261
|
+
// Determine title
|
|
262
|
+
let title = doc.title?.trim() || fallbackTitle
|
|
263
|
+
const firstH1 = doc.body.querySelector('h1')
|
|
264
|
+
if (firstH1) title = firstH1.textContent?.trim() || title
|
|
265
|
+
|
|
266
|
+
ydoc.transact(() => {
|
|
267
|
+
const headerEl = new Y.XmlElement('documentHeader')
|
|
268
|
+
const metaEl = new Y.XmlElement('documentMeta')
|
|
269
|
+
fragment.insert(0, [headerEl, metaEl])
|
|
270
|
+
|
|
271
|
+
const headerXt = new Y.XmlText()
|
|
272
|
+
headerEl.insert(0, [headerXt])
|
|
273
|
+
headerXt.insert(0, title)
|
|
274
|
+
|
|
275
|
+
// Walk body block children
|
|
276
|
+
const blockTags = new Set([
|
|
277
|
+
'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
|
278
|
+
'ul', 'ol', 'pre', 'blockquote', 'table', 'hr',
|
|
279
|
+
'div', 'section', 'article', 'header', 'footer', 'main', 'aside'
|
|
280
|
+
])
|
|
281
|
+
|
|
282
|
+
let hasContent = false
|
|
283
|
+
for (const child of Array.from(doc.body.children)) {
|
|
284
|
+
const tag = child.tagName.toLowerCase()
|
|
285
|
+
if (!blockTags.has(tag)) continue
|
|
286
|
+
// Skip first h1 (already used as title)
|
|
287
|
+
if (tag === 'h1' && child === firstH1) continue
|
|
288
|
+
convertBlockElement(child, fragment)
|
|
289
|
+
hasContent = true
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
// Ensure at least one paragraph exists
|
|
293
|
+
if (!hasContent) {
|
|
294
|
+
const paraEl = new Y.XmlElement('paragraph')
|
|
295
|
+
fragment.insert(fragment.length, [paraEl])
|
|
296
|
+
}
|
|
297
|
+
})
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
/**
|
|
301
|
+
* Appends blocks from an HTML string to an existing Y.XmlFragment.
|
|
302
|
+
* Does NOT insert documentHeader/documentMeta — for appending to an existing doc.
|
|
303
|
+
*/
|
|
304
|
+
export function appendHtmlToFragment(fragment: Y.XmlFragment, html: string): void {
|
|
305
|
+
const ydoc = fragment.doc
|
|
306
|
+
if (!ydoc) {
|
|
307
|
+
console.warn('[htmlToYjs] appendHtmlToFragment: fragment has no doc — skipping')
|
|
308
|
+
return
|
|
309
|
+
}
|
|
310
|
+
const doc = new DOMParser().parseFromString(html, 'text/html')
|
|
311
|
+
const blockTags = new Set([
|
|
312
|
+
'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
|
313
|
+
'ul', 'ol', 'pre', 'blockquote', 'table', 'hr',
|
|
314
|
+
'div', 'section', 'article', 'header', 'footer', 'main', 'aside'
|
|
315
|
+
])
|
|
316
|
+
ydoc.transact(() => {
|
|
317
|
+
for (const child of Array.from(doc.body.children)) {
|
|
318
|
+
if (blockTags.has(child.tagName.toLowerCase()))
|
|
319
|
+
convertBlockElement(child, fragment)
|
|
320
|
+
}
|
|
321
|
+
})
|
|
322
|
+
}
|