@knpkv/confluence-to-markdown 0.2.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +60 -0
- package/LICENSE +21 -0
- package/README.md +282 -14
- package/dist/ConfluenceAuth.d.ts +76 -0
- package/dist/ConfluenceAuth.d.ts.map +1 -0
- package/dist/ConfluenceAuth.js +356 -0
- package/dist/ConfluenceAuth.js.map +1 -0
- package/dist/ConfluenceClient.d.ts +26 -2
- package/dist/ConfluenceClient.d.ts.map +1 -1
- package/dist/ConfluenceClient.js +98 -92
- package/dist/ConfluenceClient.js.map +1 -1
- package/dist/ConfluenceConfig.d.ts +4 -24
- package/dist/ConfluenceConfig.d.ts.map +1 -1
- package/dist/ConfluenceConfig.js +45 -7
- package/dist/ConfluenceConfig.js.map +1 -1
- package/dist/ConfluenceError.d.ts +89 -6
- package/dist/ConfluenceError.d.ts.map +1 -1
- package/dist/ConfluenceError.js +88 -5
- package/dist/ConfluenceError.js.map +1 -1
- package/dist/GitError.d.ts +103 -0
- package/dist/GitError.d.ts.map +1 -0
- package/dist/GitError.js +85 -0
- package/dist/GitError.js.map +1 -0
- package/dist/GitService.d.ts +175 -0
- package/dist/GitService.d.ts.map +1 -0
- package/dist/GitService.js +431 -0
- package/dist/GitService.js.map +1 -0
- package/dist/LocalFileSystem.d.ts +29 -4
- package/dist/LocalFileSystem.d.ts.map +1 -1
- package/dist/LocalFileSystem.js +80 -6
- package/dist/LocalFileSystem.js.map +1 -1
- package/dist/MarkdownConverter.d.ts +49 -2
- package/dist/MarkdownConverter.d.ts.map +1 -1
- package/dist/MarkdownConverter.js +73 -111
- package/dist/MarkdownConverter.js.map +1 -1
- package/dist/SchemaConverterError.d.ts +108 -0
- package/dist/SchemaConverterError.d.ts.map +1 -0
- package/dist/SchemaConverterError.js +84 -0
- package/dist/SchemaConverterError.js.map +1 -0
- package/dist/Schemas.d.ts +225 -1
- package/dist/Schemas.d.ts.map +1 -1
- package/dist/Schemas.js +155 -6
- package/dist/Schemas.js.map +1 -1
- package/dist/SyncEngine.d.ts +30 -20
- package/dist/SyncEngine.d.ts.map +1 -1
- package/dist/SyncEngine.js +566 -117
- package/dist/SyncEngine.js.map +1 -1
- package/dist/ast/BlockNode.d.ts +468 -0
- package/dist/ast/BlockNode.d.ts.map +1 -0
- package/dist/ast/BlockNode.js +319 -0
- package/dist/ast/BlockNode.js.map +1 -0
- package/dist/ast/Document.d.ts +244 -0
- package/dist/ast/Document.d.ts.map +1 -0
- package/dist/ast/Document.js +69 -0
- package/dist/ast/Document.js.map +1 -0
- package/dist/ast/InlineNode.d.ts +477 -0
- package/dist/ast/InlineNode.d.ts.map +1 -0
- package/dist/ast/InlineNode.js +263 -0
- package/dist/ast/InlineNode.js.map +1 -0
- package/dist/ast/MacroNode.d.ts +267 -0
- package/dist/ast/MacroNode.d.ts.map +1 -0
- package/dist/ast/MacroNode.js +164 -0
- package/dist/ast/MacroNode.js.map +1 -0
- package/dist/ast/index.d.ts +10 -0
- package/dist/ast/index.d.ts.map +1 -0
- package/dist/ast/index.js +14 -0
- package/dist/ast/index.js.map +1 -0
- package/dist/bin.js +33 -149
- package/dist/bin.js.map +1 -1
- package/dist/commands/auth.d.ts +15 -0
- package/dist/commands/auth.d.ts.map +1 -0
- package/dist/commands/auth.js +86 -0
- package/dist/commands/auth.js.map +1 -0
- package/dist/commands/clone.d.ts +12 -0
- package/dist/commands/clone.d.ts.map +1 -0
- package/dist/commands/clone.js +93 -0
- package/dist/commands/clone.js.map +1 -0
- package/dist/commands/delete.d.ts +13 -0
- package/dist/commands/delete.d.ts.map +1 -0
- package/dist/commands/delete.js +48 -0
- package/dist/commands/delete.js.map +1 -0
- package/dist/commands/errorHandler.d.ts +14 -0
- package/dist/commands/errorHandler.d.ts.map +1 -0
- package/dist/commands/errorHandler.js +33 -0
- package/dist/commands/errorHandler.js.map +1 -0
- package/dist/commands/git.d.ts +22 -0
- package/dist/commands/git.d.ts.map +1 -0
- package/dist/commands/git.js +72 -0
- package/dist/commands/git.js.map +1 -0
- package/dist/commands/index.d.ts +11 -0
- package/dist/commands/index.d.ts.map +1 -0
- package/dist/commands/index.js +11 -0
- package/dist/commands/index.js.map +1 -0
- package/dist/commands/layers.d.ts +31 -0
- package/dist/commands/layers.d.ts.map +1 -0
- package/dist/commands/layers.js +137 -0
- package/dist/commands/layers.js.map +1 -0
- package/dist/commands/new.d.ts +9 -0
- package/dist/commands/new.d.ts.map +1 -0
- package/dist/commands/new.js +80 -0
- package/dist/commands/new.js.map +1 -0
- package/dist/commands/pageTree.d.ts +18 -0
- package/dist/commands/pageTree.d.ts.map +1 -0
- package/dist/commands/pageTree.js +20 -0
- package/dist/commands/pageTree.js.map +1 -0
- package/dist/commands/shared.d.ts +15 -0
- package/dist/commands/shared.d.ts.map +1 -0
- package/dist/commands/shared.js +27 -0
- package/dist/commands/shared.js.map +1 -0
- package/dist/commands/sync.d.ts +15 -0
- package/dist/commands/sync.d.ts.map +1 -0
- package/dist/commands/sync.js +101 -0
- package/dist/commands/sync.js.map +1 -0
- package/dist/index.d.ts +10 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +14 -0
- package/dist/index.js.map +1 -1
- package/dist/internal/NodeLayers.d.ts +7 -0
- package/dist/internal/NodeLayers.d.ts.map +1 -0
- package/dist/internal/NodeLayers.js +19 -0
- package/dist/internal/NodeLayers.js.map +1 -0
- package/dist/internal/frontmatter.d.ts +10 -0
- package/dist/internal/frontmatter.d.ts.map +1 -1
- package/dist/internal/frontmatter.js +16 -0
- package/dist/internal/frontmatter.js.map +1 -1
- package/dist/internal/gitCommands.d.ts +78 -0
- package/dist/internal/gitCommands.d.ts.map +1 -0
- package/dist/internal/gitCommands.js +156 -0
- package/dist/internal/gitCommands.js.map +1 -0
- package/dist/internal/hashUtils.d.ts +42 -1
- package/dist/internal/hashUtils.d.ts.map +1 -1
- package/dist/internal/hashUtils.js +38 -2
- package/dist/internal/hashUtils.js.map +1 -1
- package/dist/internal/oauthServer.d.ts +55 -0
- package/dist/internal/oauthServer.d.ts.map +1 -0
- package/dist/internal/oauthServer.js +110 -0
- package/dist/internal/oauthServer.js.map +1 -0
- package/dist/internal/pathUtils.d.ts +21 -4
- package/dist/internal/pathUtils.d.ts.map +1 -1
- package/dist/internal/pathUtils.js +24 -13
- package/dist/internal/pathUtils.js.map +1 -1
- package/dist/internal/tokenStorage.d.ts +75 -0
- package/dist/internal/tokenStorage.d.ts.map +1 -0
- package/dist/internal/tokenStorage.js +149 -0
- package/dist/internal/tokenStorage.js.map +1 -0
- package/dist/internal/userCache.d.ts +42 -0
- package/dist/internal/userCache.d.ts.map +1 -0
- package/dist/internal/userCache.js +51 -0
- package/dist/internal/userCache.js.map +1 -0
- package/dist/parsers/ConfluenceParser.d.ts +26 -0
- package/dist/parsers/ConfluenceParser.d.ts.map +1 -0
- package/dist/parsers/ConfluenceParser.js +792 -0
- package/dist/parsers/ConfluenceParser.js.map +1 -0
- package/dist/parsers/MarkdownParser.d.ts +26 -0
- package/dist/parsers/MarkdownParser.d.ts.map +1 -0
- package/dist/parsers/MarkdownParser.js +873 -0
- package/dist/parsers/MarkdownParser.js.map +1 -0
- package/dist/parsers/index.d.ts +8 -0
- package/dist/parsers/index.d.ts.map +1 -0
- package/dist/parsers/index.js +8 -0
- package/dist/parsers/index.js.map +1 -0
- package/dist/schemas/ConfluenceSchema.d.ts +21 -0
- package/dist/schemas/ConfluenceSchema.d.ts.map +1 -0
- package/dist/schemas/ConfluenceSchema.js +38 -0
- package/dist/schemas/ConfluenceSchema.js.map +1 -0
- package/dist/schemas/ConversionSchema.d.ts +35 -0
- package/dist/schemas/ConversionSchema.d.ts.map +1 -0
- package/dist/schemas/ConversionSchema.js +208 -0
- package/dist/schemas/ConversionSchema.js.map +1 -0
- package/dist/schemas/MarkdownSchema.d.ts +21 -0
- package/dist/schemas/MarkdownSchema.d.ts.map +1 -0
- package/dist/schemas/MarkdownSchema.js +38 -0
- package/dist/schemas/MarkdownSchema.js.map +1 -0
- package/dist/schemas/hast/HastFromHtml.d.ts +27 -0
- package/dist/schemas/hast/HastFromHtml.d.ts.map +1 -0
- package/dist/schemas/hast/HastFromHtml.js +107 -0
- package/dist/schemas/hast/HastFromHtml.js.map +1 -0
- package/dist/schemas/hast/HastSchema.d.ts +195 -0
- package/dist/schemas/hast/HastSchema.d.ts.map +1 -0
- package/dist/schemas/hast/HastSchema.js +183 -0
- package/dist/schemas/hast/HastSchema.js.map +1 -0
- package/dist/schemas/hast/index.d.ts +9 -0
- package/dist/schemas/hast/index.d.ts.map +1 -0
- package/dist/schemas/hast/index.js +3 -0
- package/dist/schemas/hast/index.js.map +1 -0
- package/dist/schemas/index.d.ts +14 -0
- package/dist/schemas/index.d.ts.map +1 -0
- package/dist/schemas/index.js +16 -0
- package/dist/schemas/index.js.map +1 -0
- package/dist/schemas/mdast/MdastFromMarkdown.d.ts +30 -0
- package/dist/schemas/mdast/MdastFromMarkdown.d.ts.map +1 -0
- package/dist/schemas/mdast/MdastFromMarkdown.js +79 -0
- package/dist/schemas/mdast/MdastFromMarkdown.js.map +1 -0
- package/dist/schemas/mdast/MdastSchema.d.ts +385 -0
- package/dist/schemas/mdast/MdastSchema.d.ts.map +1 -0
- package/dist/schemas/mdast/MdastSchema.js +266 -0
- package/dist/schemas/mdast/MdastSchema.js.map +1 -0
- package/dist/schemas/mdast/index.d.ts +10 -0
- package/dist/schemas/mdast/index.d.ts.map +1 -0
- package/dist/schemas/mdast/index.js +4 -0
- package/dist/schemas/mdast/index.js.map +1 -0
- package/dist/schemas/mdast/mdastToString.d.ts +13 -0
- package/dist/schemas/mdast/mdastToString.d.ts.map +1 -0
- package/dist/schemas/mdast/mdastToString.js +85 -0
- package/dist/schemas/mdast/mdastToString.js.map +1 -0
- package/dist/schemas/nodes/block/BlockSchema.d.ts +43 -0
- package/dist/schemas/nodes/block/BlockSchema.d.ts.map +1 -0
- package/dist/schemas/nodes/block/BlockSchema.js +634 -0
- package/dist/schemas/nodes/block/BlockSchema.js.map +1 -0
- package/dist/schemas/nodes/block/index.d.ts +7 -0
- package/dist/schemas/nodes/block/index.d.ts.map +1 -0
- package/dist/schemas/nodes/block/index.js +7 -0
- package/dist/schemas/nodes/block/index.js.map +1 -0
- package/dist/schemas/nodes/index.d.ts +9 -0
- package/dist/schemas/nodes/index.d.ts.map +1 -0
- package/dist/schemas/nodes/index.js +12 -0
- package/dist/schemas/nodes/index.js.map +1 -0
- package/dist/schemas/nodes/inline/InlineSchema.d.ts +48 -0
- package/dist/schemas/nodes/inline/InlineSchema.d.ts.map +1 -0
- package/dist/schemas/nodes/inline/InlineSchema.js +436 -0
- package/dist/schemas/nodes/inline/InlineSchema.js.map +1 -0
- package/dist/schemas/nodes/inline/index.d.ts +7 -0
- package/dist/schemas/nodes/inline/index.d.ts.map +1 -0
- package/dist/schemas/nodes/inline/index.js +7 -0
- package/dist/schemas/nodes/inline/index.js.map +1 -0
- package/dist/schemas/nodes/macro/MacroSchema.d.ts +27 -0
- package/dist/schemas/nodes/macro/MacroSchema.d.ts.map +1 -0
- package/dist/schemas/nodes/macro/MacroSchema.js +162 -0
- package/dist/schemas/nodes/macro/MacroSchema.js.map +1 -0
- package/dist/schemas/nodes/macro/index.d.ts +7 -0
- package/dist/schemas/nodes/macro/index.d.ts.map +1 -0
- package/dist/schemas/nodes/macro/index.js +7 -0
- package/dist/schemas/nodes/macro/index.js.map +1 -0
- package/dist/schemas/preprocessing/ConfluencePreprocessor.d.ts +24 -0
- package/dist/schemas/preprocessing/ConfluencePreprocessor.d.ts.map +1 -0
- package/dist/schemas/preprocessing/ConfluencePreprocessor.js +351 -0
- package/dist/schemas/preprocessing/ConfluencePreprocessor.js.map +1 -0
- package/dist/schemas/preprocessing/index.d.ts +8 -0
- package/dist/schemas/preprocessing/index.d.ts.map +1 -0
- package/dist/schemas/preprocessing/index.js +2 -0
- package/dist/schemas/preprocessing/index.js.map +1 -0
- package/dist/serializers/ConfluenceSerializer.d.ts +30 -0
- package/dist/serializers/ConfluenceSerializer.d.ts.map +1 -0
- package/dist/serializers/ConfluenceSerializer.js +551 -0
- package/dist/serializers/ConfluenceSerializer.js.map +1 -0
- package/dist/serializers/MarkdownSerializer.d.ts +34 -0
- package/dist/serializers/MarkdownSerializer.d.ts.map +1 -0
- package/dist/serializers/MarkdownSerializer.js +355 -0
- package/dist/serializers/MarkdownSerializer.js.map +1 -0
- package/dist/serializers/index.d.ts +8 -0
- package/dist/serializers/index.d.ts.map +1 -0
- package/dist/serializers/index.js +8 -0
- package/dist/serializers/index.js.map +1 -0
- package/package.json +27 -16
- package/src/ConfluenceAuth.ts +571 -0
- package/src/ConfluenceClient.ts +188 -156
- package/src/ConfluenceConfig.ts +63 -7
- package/src/ConfluenceError.ts +110 -14
- package/src/GitError.ts +92 -0
- package/src/GitService.ts +859 -0
- package/src/LocalFileSystem.ts +179 -9
- package/src/MarkdownConverter.ts +126 -122
- package/src/SchemaConverterError.ts +108 -0
- package/src/Schemas.ts +223 -6
- package/src/SyncEngine.ts +745 -162
- package/src/ast/BlockNode.ts +425 -0
- package/src/ast/Document.ts +90 -0
- package/src/ast/InlineNode.ts +323 -0
- package/src/ast/MacroNode.ts +245 -0
- package/src/ast/index.ts +83 -0
- package/src/bin.ts +50 -249
- package/src/commands/auth.ts +117 -0
- package/src/commands/clone.ts +145 -0
- package/src/commands/delete.ts +57 -0
- package/src/commands/errorHandler.ts +32 -0
- package/src/commands/git.ts +114 -0
- package/src/commands/index.ts +10 -0
- package/src/commands/layers.ts +211 -0
- package/src/commands/new.ts +99 -0
- package/src/commands/pageTree.ts +40 -0
- package/src/commands/shared.ts +35 -0
- package/src/commands/sync.ts +129 -0
- package/src/index.ts +21 -1
- package/src/internal/NodeLayers.ts +21 -0
- package/src/internal/frontmatter.ts +21 -0
- package/src/internal/gitCommands.ts +229 -0
- package/src/internal/hashUtils.ts +65 -3
- package/src/internal/oauthServer.ts +199 -0
- package/src/internal/pathUtils.ts +34 -17
- package/src/internal/tokenStorage.ts +240 -0
- package/src/internal/userCache.ts +90 -0
- package/src/parsers/ConfluenceParser.ts +950 -0
- package/src/parsers/MarkdownParser.ts +1198 -0
- package/src/parsers/index.ts +8 -0
- package/src/schemas/ConfluenceSchema.ts +56 -0
- package/src/schemas/ConversionSchema.ts +318 -0
- package/src/schemas/MarkdownSchema.ts +56 -0
- package/src/schemas/hast/HastFromHtml.ts +153 -0
- package/src/schemas/hast/HastSchema.ts +274 -0
- package/src/schemas/hast/index.ts +35 -0
- package/src/schemas/index.ts +20 -0
- package/src/schemas/mdast/MdastFromMarkdown.ts +118 -0
- package/src/schemas/mdast/MdastSchema.ts +566 -0
- package/src/schemas/mdast/index.ts +59 -0
- package/src/schemas/mdast/mdastToString.ts +102 -0
- package/src/schemas/nodes/block/BlockSchema.ts +773 -0
- package/src/schemas/nodes/block/index.ts +13 -0
- package/src/schemas/nodes/index.ts +20 -0
- package/src/schemas/nodes/inline/InlineSchema.ts +523 -0
- package/src/schemas/nodes/inline/index.ts +14 -0
- package/src/schemas/nodes/macro/MacroSchema.ts +226 -0
- package/src/schemas/nodes/macro/index.ts +6 -0
- package/src/schemas/preprocessing/ConfluencePreprocessor.ts +446 -0
- package/src/schemas/preprocessing/index.ts +8 -0
- package/src/serializers/ConfluenceSerializer.ts +717 -0
- package/src/serializers/MarkdownSerializer.ts +493 -0
- package/src/serializers/index.ts +8 -0
- package/test/GitService.test.ts +209 -0
- package/test/MarkdownConverter.test.ts +37 -3
- package/test/Schemas.test.ts +97 -2
- package/test/ast/BlockNode.test.ts +265 -0
- package/test/ast/Document.test.ts +126 -0
- package/test/ast/InlineNode.test.ts +161 -0
- package/test/fixtures/integration-test.html.fixture +103 -0
- package/test/fixtures/integration-test.md.expected +257 -0
- package/test/integration.test.ts +269 -0
- package/test/oauthServer.test.ts +50 -0
- package/test/parsers/ConfluenceParser.test.ts +283 -0
- package/test/schemas/ConfluencePreprocessor.test.ts +180 -0
- package/test/schemas/ConversionSchema.test.ts +159 -0
- package/test/schemas/HastSchema.test.ts +138 -0
- package/test/schemas/MdastSchema.test.ts +145 -0
- package/test/schemas/nodes/block/BlockSchema.test.ts +173 -0
- package/test/schemas/nodes/inline/InlineSchema.test.ts +198 -0
- package/test/schemas/nodes/macro/MacroSchema.test.ts +142 -0
- package/test/tokenStorage.test.ts +99 -0
|
@@ -0,0 +1,950 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parser for Confluence storage format (HTML) to AST.
|
|
3
|
+
*
|
|
4
|
+
* @module
|
|
5
|
+
*/
|
|
6
|
+
import * as Effect from "effect/Effect"
|
|
7
|
+
import * as Schema from "effect/Schema"
|
|
8
|
+
import rehypeParse from "rehype-parse"
|
|
9
|
+
import { unified } from "unified"
|
|
10
|
+
import {
|
|
11
|
+
CodeBlock,
|
|
12
|
+
Heading,
|
|
13
|
+
Image,
|
|
14
|
+
Paragraph,
|
|
15
|
+
Table,
|
|
16
|
+
TableCell,
|
|
17
|
+
TableRow,
|
|
18
|
+
type TaskItem,
|
|
19
|
+
type TaskList,
|
|
20
|
+
ThematicBreak,
|
|
21
|
+
UnsupportedBlock
|
|
22
|
+
} from "../ast/BlockNode.js"
|
|
23
|
+
import { type Document, type DocumentNode, makeDocument } from "../ast/Document.js"
|
|
24
|
+
import {
|
|
25
|
+
ColoredText,
|
|
26
|
+
DateTime,
|
|
27
|
+
Emoticon,
|
|
28
|
+
Emphasis,
|
|
29
|
+
Highlight,
|
|
30
|
+
InlineCode,
|
|
31
|
+
type InlineNode,
|
|
32
|
+
LineBreak,
|
|
33
|
+
Link,
|
|
34
|
+
Strikethrough,
|
|
35
|
+
Strong,
|
|
36
|
+
Subscript,
|
|
37
|
+
Superscript,
|
|
38
|
+
Text,
|
|
39
|
+
Underline,
|
|
40
|
+
UnsupportedInline,
|
|
41
|
+
UserMention
|
|
42
|
+
} from "../ast/InlineNode.js"
|
|
43
|
+
import { type ExpandMacro, type InfoPanel, PanelTypes, type TocMacro } from "../ast/MacroNode.js"
|
|
44
|
+
import { ParseError } from "../SchemaConverterError.js"
|
|
45
|
+
import { PreprocessedHtmlFromConfluence } from "../schemas/preprocessing/index.js"
|
|
46
|
+
|
|
47
|
+
// Hast types (inline to avoid dependency)
|
|
48
|
+
interface HastText {
|
|
49
|
+
type: "text"
|
|
50
|
+
value: string
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
interface HastElement {
|
|
54
|
+
type: "element"
|
|
55
|
+
tagName: string
|
|
56
|
+
properties?: Record<string, unknown>
|
|
57
|
+
children: Array<HastNode>
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
interface HastRoot {
|
|
61
|
+
type: "root"
|
|
62
|
+
children: Array<HastNode>
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
type HastNode = HastText | HastElement | HastRoot | { type: string }
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Parse Confluence storage format HTML to Document AST.
|
|
69
|
+
*
|
|
70
|
+
* @example
|
|
71
|
+
* ```typescript
|
|
72
|
+
* import { parseConfluenceHtml } from "@knpkv/confluence-to-markdown/parsers/ConfluenceParser"
|
|
73
|
+
* import { Effect } from "effect"
|
|
74
|
+
*
|
|
75
|
+
* Effect.gen(function* () {
|
|
76
|
+
* const doc = yield* parseConfluenceHtml("<h1>Title</h1><p>Content</p>")
|
|
77
|
+
* console.log(doc.children.length) // 2
|
|
78
|
+
* })
|
|
79
|
+
* ```
|
|
80
|
+
*
|
|
81
|
+
* @category Parsers
|
|
82
|
+
*/
|
|
83
|
+
export const parseConfluenceHtml = (html: string): Effect.Effect<Document, ParseError> =>
|
|
84
|
+
Effect.gen(function*() {
|
|
85
|
+
// Pre-process Confluence macros (includes size validation)
|
|
86
|
+
const preprocessed = yield* Schema.decode(PreprocessedHtmlFromConfluence)(html).pipe(
|
|
87
|
+
Effect.mapError((error) =>
|
|
88
|
+
new ParseError({
|
|
89
|
+
source: "confluence",
|
|
90
|
+
message: `Preprocessing error: ${error.message}`,
|
|
91
|
+
rawContent: html.slice(0, 200)
|
|
92
|
+
})
|
|
93
|
+
)
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
// Parse HTML to hast
|
|
97
|
+
const hast = yield* Effect.try({
|
|
98
|
+
try: () => unified().use(rehypeParse, { fragment: true }).parse(preprocessed) as HastRoot,
|
|
99
|
+
catch: (error) =>
|
|
100
|
+
new ParseError({
|
|
101
|
+
source: "confluence",
|
|
102
|
+
message: `HTML parse error: ${error instanceof Error ? error.message : String(error)}`,
|
|
103
|
+
rawContent: html.slice(0, 200)
|
|
104
|
+
})
|
|
105
|
+
})
|
|
106
|
+
|
|
107
|
+
// Convert hast to AST
|
|
108
|
+
const children = yield* hastToDocumentNodes(hast)
|
|
109
|
+
// Store original HTML for 1-to-1 roundtrip
|
|
110
|
+
return makeDocument(children, html)
|
|
111
|
+
})
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Convert hast Root to document nodes.
|
|
115
|
+
*/
|
|
116
|
+
const hastToDocumentNodes = (root: HastRoot): Effect.Effect<Array<DocumentNode>, ParseError> =>
|
|
117
|
+
Effect.gen(function*() {
|
|
118
|
+
const nodes: Array<DocumentNode> = []
|
|
119
|
+
for (const child of root.children) {
|
|
120
|
+
if (child.type === "element") {
|
|
121
|
+
const el = child as HastElement
|
|
122
|
+
// Check for marker div containing a cf: comment
|
|
123
|
+
if (el.tagName === "div" && el.properties?.["dataCfMarker"] !== undefined) {
|
|
124
|
+
const commentChild = el.children.find((c) => c.type === "comment")
|
|
125
|
+
if (commentChild) {
|
|
126
|
+
const comment = (commentChild as { type: "comment"; value: string }).value
|
|
127
|
+
const node = yield* parseCommentNode(comment)
|
|
128
|
+
if (node !== null) nodes.push(node)
|
|
129
|
+
}
|
|
130
|
+
} else {
|
|
131
|
+
const node = yield* hastElementToNode(el)
|
|
132
|
+
if (node !== null) nodes.push(node)
|
|
133
|
+
}
|
|
134
|
+
} else if (child.type === "comment") {
|
|
135
|
+
// Handle cf: comment-encoded elements at root level
|
|
136
|
+
const comment = (child as { type: "comment"; value: string }).value
|
|
137
|
+
const node = yield* parseCommentNode(comment)
|
|
138
|
+
if (node !== null) nodes.push(node)
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
return nodes
|
|
142
|
+
})
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Parse comment-encoded elements (decision lists, layout markers, etc).
|
|
146
|
+
*/
|
|
147
|
+
const parseCommentNode = (comment: string): Effect.Effect<DocumentNode | null, ParseError> =>
|
|
148
|
+
Effect.gen(function*() {
|
|
149
|
+
// Decision list: cf:decision:localId;state;content|localId;state;content
|
|
150
|
+
const decisionMatch = comment.match(/^cf:decision:(.*)$/)
|
|
151
|
+
if (decisionMatch) {
|
|
152
|
+
const itemsStr = decisionMatch[1] ?? ""
|
|
153
|
+
// Return as UnsupportedBlock with the encoded comment for roundtrip
|
|
154
|
+
return new UnsupportedBlock({
|
|
155
|
+
rawHtml: `<!--cf:decision:${itemsStr}-->`,
|
|
156
|
+
source: "confluence"
|
|
157
|
+
})
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// Layout markers - these are structural markers, preserve for roundtrip
|
|
161
|
+
// cf:layout-start, cf:layout-end
|
|
162
|
+
if (comment === "cf:layout-start" || comment === "cf:layout-end") {
|
|
163
|
+
return new UnsupportedBlock({
|
|
164
|
+
rawHtml: `<!--${comment}-->`,
|
|
165
|
+
source: "confluence"
|
|
166
|
+
})
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// cf:section:index;type;breakoutMode;breakoutWidth;cellCount
|
|
170
|
+
const sectionMatch = comment.match(/^cf:section:(\d+);([^;]*);([^;]*);([^;]*);(\d+)$/)
|
|
171
|
+
if (sectionMatch) {
|
|
172
|
+
return new UnsupportedBlock({
|
|
173
|
+
rawHtml: `<!--${comment}-->`,
|
|
174
|
+
source: "confluence"
|
|
175
|
+
})
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// cf:section-end:index
|
|
179
|
+
if (comment.startsWith("cf:section-end:")) {
|
|
180
|
+
return new UnsupportedBlock({
|
|
181
|
+
rawHtml: `<!--${comment}-->`,
|
|
182
|
+
source: "confluence"
|
|
183
|
+
})
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// cf:cell:sectionIndex;cellIndex
|
|
187
|
+
const cellMatch = comment.match(/^cf:cell:(\d+);(\d+)$/)
|
|
188
|
+
if (cellMatch) {
|
|
189
|
+
return new UnsupportedBlock({
|
|
190
|
+
rawHtml: `<!--${comment}-->`,
|
|
191
|
+
source: "confluence"
|
|
192
|
+
})
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
return null
|
|
196
|
+
})
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* Convert hast Element to BlockNode or MacroNode.
|
|
200
|
+
*/
|
|
201
|
+
const hastElementToNode = (element: HastElement): Effect.Effect<DocumentNode | null, ParseError> =>
|
|
202
|
+
Effect.gen(function*() {
|
|
203
|
+
const tagName = element.tagName.toLowerCase()
|
|
204
|
+
|
|
205
|
+
// Heading
|
|
206
|
+
if (/^h[1-6]$/.test(tagName)) {
|
|
207
|
+
const levelStr = tagName[1]
|
|
208
|
+
if (!levelStr) return null
|
|
209
|
+
const level = parseInt(levelStr) as 1 | 2 | 3 | 4 | 5 | 6
|
|
210
|
+
const children = yield* hastChildrenToInline(element.children)
|
|
211
|
+
return new Heading({ level, children })
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// Paragraph (with optional alignment and indent)
|
|
215
|
+
if (tagName === "p") {
|
|
216
|
+
const children = yield* hastChildrenToInline(element.children)
|
|
217
|
+
const style = element.properties?.style as string | undefined
|
|
218
|
+
let alignment: "left" | "center" | "right" | undefined
|
|
219
|
+
let indent: number | undefined
|
|
220
|
+
|
|
221
|
+
if (style) {
|
|
222
|
+
const alignMatch = style.match(/text-align:\s*(left|center|right)/)
|
|
223
|
+
if (alignMatch?.[1]) {
|
|
224
|
+
alignment = alignMatch[1] as "left" | "center" | "right"
|
|
225
|
+
}
|
|
226
|
+
const marginMatch = style.match(/margin-left:\s*(\d+(?:\.\d+)?)\s*px/)
|
|
227
|
+
if (marginMatch?.[1]) {
|
|
228
|
+
indent = parseFloat(marginMatch[1])
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
if (alignment !== undefined || indent !== undefined) {
|
|
233
|
+
return new Paragraph({
|
|
234
|
+
children,
|
|
235
|
+
...(alignment !== undefined ? { alignment } : {}),
|
|
236
|
+
...(indent !== undefined ? { indent } : {})
|
|
237
|
+
})
|
|
238
|
+
}
|
|
239
|
+
return new Paragraph({ children })
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
// Code block
|
|
243
|
+
if (tagName === "pre") {
|
|
244
|
+
const codeEl = element.children.find(
|
|
245
|
+
(c): c is HastElement => c.type === "element" && (c as HastElement).tagName === "code"
|
|
246
|
+
)
|
|
247
|
+
const code = codeEl ? getTextContent(codeEl) : getTextContent(element)
|
|
248
|
+
const language = (element.properties?.["dataLanguage"] as string) || undefined
|
|
249
|
+
return new CodeBlock({ code, language })
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
// Thematic break
|
|
253
|
+
if (tagName === "hr") {
|
|
254
|
+
return new ThematicBreak({})
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// Image (supports both URL and Confluence attachments from preprocessed data)
|
|
258
|
+
if (tagName === "img") {
|
|
259
|
+
const src = element.properties?.src as string | undefined
|
|
260
|
+
const dataAttachment = element.properties?.["dataAttachment"] as string | undefined
|
|
261
|
+
const dataAlign = element.properties?.["dataAlign"] as string | undefined
|
|
262
|
+
const dataWidth = element.properties?.["dataWidth"] as string | undefined
|
|
263
|
+
const alt = (element.properties?.alt as string) || undefined
|
|
264
|
+
|
|
265
|
+
// Confluence attachment (preprocessed)
|
|
266
|
+
if (dataAttachment) {
|
|
267
|
+
return new Image({
|
|
268
|
+
attachment: { filename: dataAttachment },
|
|
269
|
+
alt,
|
|
270
|
+
...(dataAlign ? { align: dataAlign } : {}),
|
|
271
|
+
...(dataWidth ? { width: parseInt(dataWidth) } : {})
|
|
272
|
+
})
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
// URL-based image
|
|
276
|
+
if (!src) return null
|
|
277
|
+
return new Image({
|
|
278
|
+
src,
|
|
279
|
+
alt,
|
|
280
|
+
title: (element.properties?.title as string) || undefined
|
|
281
|
+
})
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
// Table
|
|
285
|
+
if (tagName === "table") {
|
|
286
|
+
return yield* parseTable(element)
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
// Task list (from preprocessed data)
|
|
290
|
+
if (tagName === "ul" && element.properties?.["dataMacro"] === "task-list") {
|
|
291
|
+
return yield* parseTaskList(element)
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
// Lists
|
|
295
|
+
if (tagName === "ul" || tagName === "ol") {
|
|
296
|
+
return yield* parseList(element, tagName === "ol")
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
// Block quote
|
|
300
|
+
if (tagName === "blockquote") {
|
|
301
|
+
const children = yield* hastChildrenToSimpleBlocks(element.children)
|
|
302
|
+
return { _tag: "BlockQuote" as const, version: 1, children }
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
// Macro divs
|
|
306
|
+
if (tagName === "div" && element.properties?.["dataMacro"]) {
|
|
307
|
+
const macro = element.properties["dataMacro"] as string
|
|
308
|
+
if ((PanelTypes as ReadonlyArray<string>).includes(macro)) {
|
|
309
|
+
const children = yield* hastChildrenToSimpleBlocks(element.children)
|
|
310
|
+
return {
|
|
311
|
+
_tag: "InfoPanel" as const,
|
|
312
|
+
version: 1,
|
|
313
|
+
panelType: macro as (typeof PanelTypes)[number],
|
|
314
|
+
title: (element.properties["dataTitle"] as string) || undefined,
|
|
315
|
+
children
|
|
316
|
+
} satisfies InfoPanel
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
// Expand/details
|
|
321
|
+
if (tagName === "details") {
|
|
322
|
+
const summary = element.children.find(
|
|
323
|
+
(c): c is HastElement => c.type === "element" && (c as HastElement).tagName === "summary"
|
|
324
|
+
)
|
|
325
|
+
const title = summary ? getTextContent(summary) : undefined
|
|
326
|
+
const contentChildren = element.children.filter(
|
|
327
|
+
(c) => !(c.type === "element" && (c as HastElement).tagName === "summary")
|
|
328
|
+
)
|
|
329
|
+
const children = yield* hastChildrenToSimpleBlocks(contentChildren)
|
|
330
|
+
return {
|
|
331
|
+
_tag: "ExpandMacro" as const,
|
|
332
|
+
version: 1,
|
|
333
|
+
title,
|
|
334
|
+
children
|
|
335
|
+
} satisfies ExpandMacro
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
// TOC
|
|
339
|
+
if (tagName === "nav" && element.properties?.["dataMacro"] === "toc") {
|
|
340
|
+
const minStr = element.properties["dataMin"] as string | undefined
|
|
341
|
+
const maxStr = element.properties["dataMax"] as string | undefined
|
|
342
|
+
return {
|
|
343
|
+
_tag: "TocMacro" as const,
|
|
344
|
+
version: 1,
|
|
345
|
+
minLevel: minStr ? parseInt(minStr) : undefined,
|
|
346
|
+
maxLevel: maxStr ? parseInt(maxStr) : undefined
|
|
347
|
+
} satisfies TocMacro
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
// Unsupported macro
|
|
351
|
+
if (element.properties?.["dataUnsupportedMacro"]) {
|
|
352
|
+
return new UnsupportedBlock({
|
|
353
|
+
rawHtml: hastToHtml(element),
|
|
354
|
+
source: "confluence"
|
|
355
|
+
})
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
// Generic div - recurse into children
|
|
359
|
+
if (tagName === "div" || tagName === "section" || tagName === "article") {
|
|
360
|
+
const children: Array<DocumentNode> = []
|
|
361
|
+
for (const child of element.children) {
|
|
362
|
+
if (child.type === "element") {
|
|
363
|
+
const node = yield* hastElementToNode(child as HastElement)
|
|
364
|
+
if (node !== null) children.push(node)
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
if (children.length >= 1) {
|
|
368
|
+
const first = children[0]
|
|
369
|
+
return first !== undefined ? first : null
|
|
370
|
+
}
|
|
371
|
+
return null
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
// Ignore common layout elements
|
|
375
|
+
if (["br", "html", "head", "body"].includes(tagName)) {
|
|
376
|
+
return null
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
// Inline elements at block level - wrap in paragraph
|
|
380
|
+
if (["a", "strong", "em", "b", "i", "u", "code", "del", "sub", "sup", "span"].includes(tagName)) {
|
|
381
|
+
const inlineNode = yield* hastElementToInline(element)
|
|
382
|
+
if (inlineNode) {
|
|
383
|
+
return new Paragraph({ children: [inlineNode] })
|
|
384
|
+
}
|
|
385
|
+
return null
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
// Unknown block element
|
|
389
|
+
return new UnsupportedBlock({
|
|
390
|
+
rawHtml: hastToHtml(element),
|
|
391
|
+
source: "confluence"
|
|
392
|
+
})
|
|
393
|
+
})
|
|
394
|
+
|
|
395
|
+
/**
|
|
396
|
+
* Convert hast children to inline nodes.
|
|
397
|
+
*/
|
|
398
|
+
const hastChildrenToInline = (
|
|
399
|
+
children: Array<HastNode>
|
|
400
|
+
): Effect.Effect<Array<InlineNode>, ParseError> =>
|
|
401
|
+
Effect.gen(function*() {
|
|
402
|
+
const nodes: Array<InlineNode> = []
|
|
403
|
+
for (const child of children) {
|
|
404
|
+
if (child.type === "text") {
|
|
405
|
+
const textNode = child as HastText
|
|
406
|
+
if (textNode.value.trim() || nodes.length > 0) {
|
|
407
|
+
nodes.push(new Text({ value: textNode.value }))
|
|
408
|
+
}
|
|
409
|
+
} else if (child.type === "element") {
|
|
410
|
+
const node = yield* hastElementToInline(child as HastElement)
|
|
411
|
+
if (node !== null) nodes.push(node)
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
return nodes
|
|
415
|
+
})
|
|
416
|
+
|
|
417
|
+
/**
|
|
418
|
+
* Convert hast Element to InlineNode.
|
|
419
|
+
*/
|
|
420
|
+
const hastElementToInline = (element: HastElement): Effect.Effect<InlineNode | null, ParseError> =>
|
|
421
|
+
Effect.gen(function*() {
|
|
422
|
+
const tagName = element.tagName.toLowerCase()
|
|
423
|
+
|
|
424
|
+
// Strong/bold
|
|
425
|
+
if (tagName === "strong" || tagName === "b") {
|
|
426
|
+
const children = yield* hastChildrenToBaseInline(element.children)
|
|
427
|
+
return new Strong({ children })
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
// Emphasis/italic
|
|
431
|
+
if (tagName === "em" || tagName === "i") {
|
|
432
|
+
const children = yield* hastChildrenToBaseInline(element.children)
|
|
433
|
+
return new Emphasis({ children })
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
// Underline
|
|
437
|
+
if (tagName === "u") {
|
|
438
|
+
const children = yield* hastChildrenToBaseInline(element.children)
|
|
439
|
+
return new Underline({ children })
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
// Strikethrough
|
|
443
|
+
if (tagName === "del" || tagName === "s") {
|
|
444
|
+
const children = yield* hastChildrenToBaseInline(element.children)
|
|
445
|
+
return new Strikethrough({ children })
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
// Subscript
|
|
449
|
+
if (tagName === "sub") {
|
|
450
|
+
const children = yield* hastChildrenToBaseInline(element.children)
|
|
451
|
+
return new Subscript({ children })
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
// Superscript
|
|
455
|
+
if (tagName === "sup") {
|
|
456
|
+
const children = yield* hastChildrenToBaseInline(element.children)
|
|
457
|
+
return new Superscript({ children })
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
// Inline code
|
|
461
|
+
if (tagName === "code") {
|
|
462
|
+
return new InlineCode({ value: getTextContent(element) })
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
// Smart link (Jira, Confluence search, etc.) - preserve datasource for roundtrip
|
|
466
|
+
if (tagName === "a" && element.properties?.["dataDatasource"]) {
|
|
467
|
+
const href = element.properties?.href as string | undefined
|
|
468
|
+
const appearance = (element.properties?.["dataCardAppearance"] as string) || "inline"
|
|
469
|
+
const datasource = element.properties["dataDatasource"] as string
|
|
470
|
+
return new UnsupportedInline({
|
|
471
|
+
raw: `<!--cf:smartlink:${encodeURIComponent(href ?? "")};${encodeURIComponent(appearance)};${
|
|
472
|
+
encodeURIComponent(datasource)
|
|
473
|
+
}-->`,
|
|
474
|
+
source: "confluence"
|
|
475
|
+
})
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
// Link
|
|
479
|
+
if (tagName === "a") {
|
|
480
|
+
const href = element.properties?.href as string | undefined
|
|
481
|
+
if (!href) return null
|
|
482
|
+
const children = yield* hastChildrenToBaseInline(element.children)
|
|
483
|
+
return new Link({
|
|
484
|
+
href,
|
|
485
|
+
title: (element.properties?.title as string) || undefined,
|
|
486
|
+
children
|
|
487
|
+
})
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
// Line break
|
|
491
|
+
if (tagName === "br") {
|
|
492
|
+
return new LineBreak({})
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
// Date/time (rehype converts datetime attr to camelCase dateTime)
|
|
496
|
+
if (tagName === "time") {
|
|
497
|
+
const datetime = (element.properties?.dateTime as string) || ""
|
|
498
|
+
return new DateTime({ datetime })
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
// Emoticon (preprocessed from ac:emoticon)
|
|
502
|
+
if (tagName === "span" && element.properties?.["dataEmoji"]) {
|
|
503
|
+
const shortname = (element.properties["dataEmoji"] as string) || ""
|
|
504
|
+
const emojiId = (element.properties["dataEmojiId"] as string) || ""
|
|
505
|
+
const fallback = getTextContent(element)
|
|
506
|
+
return new Emoticon({ shortname, emojiId, fallback })
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
// User mention (preprocessed from ac:link > ri:user)
|
|
510
|
+
if (tagName === "span" && element.properties?.["dataUserMention"]) {
|
|
511
|
+
const accountId = (element.properties["dataUserMention"] as string) || ""
|
|
512
|
+
return new UserMention({ accountId })
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
// Confluence link with link-body (preprocessed from ac:link > ac:link-body)
|
|
516
|
+
if (tagName === "span" && element.properties?.["dataConfluenceLink"] !== undefined) {
|
|
517
|
+
const linkText = getTextContent(element)
|
|
518
|
+
return new UnsupportedInline({
|
|
519
|
+
raw: `<!--cf:link:${encodeURIComponent(linkText)}-->`,
|
|
520
|
+
source: "confluence"
|
|
521
|
+
})
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
// Status macro (inline) - use comment encoding for roundtrip
|
|
525
|
+
if (tagName === "span" && element.properties?.["dataMacro"] === "status") {
|
|
526
|
+
const color = (element.properties["dataColor"] as string) || ""
|
|
527
|
+
const title = getTextContent(element)
|
|
528
|
+
return new UnsupportedInline({
|
|
529
|
+
raw: `<!--cf:status:${encodeURIComponent(title)};${encodeURIComponent(color)}-->`,
|
|
530
|
+
source: "confluence"
|
|
531
|
+
})
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
// TOC macro in inline context (e.g., inside table cell) - use comment encoding
|
|
535
|
+
// Use ; as separator (not | which breaks markdown tables)
|
|
536
|
+
if (tagName === "nav" && element.properties?.["dataMacro"] === "toc") {
|
|
537
|
+
const minStr = element.properties["dataMin"] as string | undefined
|
|
538
|
+
const maxStr = element.properties["dataMax"] as string | undefined
|
|
539
|
+
return new UnsupportedInline({
|
|
540
|
+
raw: `<!--cf:toc:${minStr ?? ""};${maxStr ?? ""}-->`,
|
|
541
|
+
source: "confluence"
|
|
542
|
+
})
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
// Colored text (span with color style)
|
|
546
|
+
if (tagName === "span") {
|
|
547
|
+
const style = element.properties?.style as string | undefined
|
|
548
|
+
if (style) {
|
|
549
|
+
const colorMatch = style.match(/(?:^|;)\s*color:\s*([^;]+)/)
|
|
550
|
+
const bgMatch = style.match(/(?:^|;)\s*background-color:\s*([^;]+)/)
|
|
551
|
+
|
|
552
|
+
if (colorMatch?.[1]) {
|
|
553
|
+
const children = yield* hastChildrenToBaseInline(element.children)
|
|
554
|
+
return new ColoredText({ color: colorMatch[1].trim(), children })
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
if (bgMatch?.[1]) {
|
|
558
|
+
const children = yield* hastChildrenToBaseInline(element.children)
|
|
559
|
+
return new Highlight({ backgroundColor: bgMatch[1].trim(), children })
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
// Nested inline elements - extract content
|
|
564
|
+
const children = yield* hastChildrenToInline(element.children)
|
|
565
|
+
if (children.length === 1) {
|
|
566
|
+
const first = children[0]
|
|
567
|
+
return first !== undefined ? first : null
|
|
568
|
+
}
|
|
569
|
+
return null
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
// Images can be inline too
|
|
573
|
+
if (tagName === "img") {
|
|
574
|
+
return new UnsupportedInline({
|
|
575
|
+
raw: hastToHtml(element),
|
|
576
|
+
source: "confluence"
|
|
577
|
+
})
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
// Unknown inline element
|
|
581
|
+
return new UnsupportedInline({
|
|
582
|
+
raw: hastToHtml(element),
|
|
583
|
+
source: "confluence"
|
|
584
|
+
})
|
|
585
|
+
})
|
|
586
|
+
|
|
587
|
+
/**
|
|
588
|
+
* Convert hast children to base inline nodes (for Strong/Emphasis/Link children).
|
|
589
|
+
*/
|
|
590
|
+
const hastChildrenToBaseInline = (
|
|
591
|
+
children: Array<HastNode>
|
|
592
|
+
): Effect.Effect<Array<Text | InlineCode | LineBreak | UnsupportedInline>, ParseError> =>
|
|
593
|
+
Effect.gen(function*() {
|
|
594
|
+
const nodes: Array<Text | InlineCode | LineBreak | UnsupportedInline> = []
|
|
595
|
+
for (const child of children) {
|
|
596
|
+
if (child.type === "text") {
|
|
597
|
+
const textNode = child as HastText
|
|
598
|
+
nodes.push(new Text({ value: textNode.value }))
|
|
599
|
+
} else if (child.type === "element") {
|
|
600
|
+
const el = child as HastElement
|
|
601
|
+
const tagName = el.tagName.toLowerCase()
|
|
602
|
+
if (tagName === "code") {
|
|
603
|
+
nodes.push(new InlineCode({ value: getTextContent(el) }))
|
|
604
|
+
} else if (tagName === "br") {
|
|
605
|
+
nodes.push(new LineBreak({}))
|
|
606
|
+
} else {
|
|
607
|
+
nodes.push(new UnsupportedInline({ raw: hastToHtml(el), source: "confluence" }))
|
|
608
|
+
}
|
|
609
|
+
}
|
|
610
|
+
}
|
|
611
|
+
return nodes
|
|
612
|
+
})
|
|
613
|
+
|
|
614
|
+
/**
|
|
615
|
+
* Convert hast children to simple block nodes (non-recursive).
|
|
616
|
+
*/
|
|
617
|
+
const hastChildrenToSimpleBlocks = (
|
|
618
|
+
children: Array<HastNode>
|
|
619
|
+
): Effect.Effect<
|
|
620
|
+
Array<Heading | Paragraph | CodeBlock | ThematicBreak | Image | Table | UnsupportedBlock>,
|
|
621
|
+
ParseError
|
|
622
|
+
> =>
|
|
623
|
+
Effect.gen(function*() {
|
|
624
|
+
const blocks: Array<Heading | Paragraph | CodeBlock | ThematicBreak | Image | Table | UnsupportedBlock> = []
|
|
625
|
+
for (const child of children) {
|
|
626
|
+
if (child.type === "element") {
|
|
627
|
+
const el = child as HastElement
|
|
628
|
+
const tagName = el.tagName.toLowerCase()
|
|
629
|
+
|
|
630
|
+
if (/^h[1-6]$/.test(tagName)) {
|
|
631
|
+
const levelStr = tagName[1]
|
|
632
|
+
if (levelStr) {
|
|
633
|
+
const level = parseInt(levelStr) as 1 | 2 | 3 | 4 | 5 | 6
|
|
634
|
+
const inlineChildren = yield* hastChildrenToInline(el.children)
|
|
635
|
+
blocks.push(new Heading({ level, children: inlineChildren }))
|
|
636
|
+
}
|
|
637
|
+
} else if (tagName === "p") {
|
|
638
|
+
const inlineChildren = yield* hastChildrenToInline(el.children)
|
|
639
|
+
blocks.push(new Paragraph({ children: inlineChildren }))
|
|
640
|
+
} else if (tagName === "pre") {
|
|
641
|
+
const codeEl = el.children.find(
|
|
642
|
+
(c): c is HastElement => c.type === "element" && (c as HastElement).tagName === "code"
|
|
643
|
+
)
|
|
644
|
+
const code = codeEl ? getTextContent(codeEl) : getTextContent(el)
|
|
645
|
+
blocks.push(new CodeBlock({ code }))
|
|
646
|
+
} else if (tagName === "hr") {
|
|
647
|
+
blocks.push(new ThematicBreak({}))
|
|
648
|
+
} else if (tagName === "img") {
|
|
649
|
+
const src = el.properties?.src as string | undefined
|
|
650
|
+
if (src) blocks.push(new Image({ src }))
|
|
651
|
+
} else if (tagName === "table") {
|
|
652
|
+
blocks.push(yield* parseTable(el))
|
|
653
|
+
} else {
|
|
654
|
+
blocks.push(new UnsupportedBlock({ rawHtml: hastToHtml(el), source: "confluence" }))
|
|
655
|
+
}
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
return blocks
|
|
659
|
+
})
|
|
660
|
+
|
|
661
|
+
/**
|
|
662
|
+
* Parse table element.
|
|
663
|
+
*/
|
|
664
|
+
const parseTable = (element: HastElement): Effect.Effect<Table, ParseError> =>
|
|
665
|
+
Effect.gen(function*() {
|
|
666
|
+
let header: TableRow | undefined
|
|
667
|
+
const rows: Array<TableRow> = []
|
|
668
|
+
|
|
669
|
+
for (const child of element.children) {
|
|
670
|
+
if (child.type !== "element") continue
|
|
671
|
+
const el = child as HastElement
|
|
672
|
+
|
|
673
|
+
if (el.tagName === "thead") {
|
|
674
|
+
const tr = el.children.find(
|
|
675
|
+
(c): c is HastElement => c.type === "element" && (c as HastElement).tagName === "tr"
|
|
676
|
+
)
|
|
677
|
+
if (tr) {
|
|
678
|
+
header = yield* parseTableRow(tr, true)
|
|
679
|
+
}
|
|
680
|
+
} else if (el.tagName === "tbody") {
|
|
681
|
+
for (const row of el.children) {
|
|
682
|
+
if (row.type === "element" && (row as HastElement).tagName === "tr") {
|
|
683
|
+
const tr = row as HastElement
|
|
684
|
+
// Check if this row has all <th> cells - treat as header if no header yet
|
|
685
|
+
const allTh = tr.children
|
|
686
|
+
.filter((c) => c.type === "element")
|
|
687
|
+
.every((c) => (c as HastElement).tagName === "th")
|
|
688
|
+
if (allTh && !header && rows.length === 0) {
|
|
689
|
+
header = yield* parseTableRow(tr, true)
|
|
690
|
+
} else {
|
|
691
|
+
rows.push(yield* parseTableRow(tr, false))
|
|
692
|
+
}
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
} else if (el.tagName === "tr") {
|
|
696
|
+
rows.push(yield* parseTableRow(el, false))
|
|
697
|
+
}
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
return new Table({ header, rows })
|
|
701
|
+
})
|
|
702
|
+
|
|
703
|
+
/**
|
|
704
|
+
* Parse table row.
|
|
705
|
+
*/
|
|
706
|
+
const parseTableRow = (element: HastElement, isHeader: boolean): Effect.Effect<TableRow, ParseError> =>
|
|
707
|
+
Effect.gen(function*() {
|
|
708
|
+
const cells: Array<TableCell> = []
|
|
709
|
+
for (const child of element.children) {
|
|
710
|
+
if (child.type === "element") {
|
|
711
|
+
const el = child as HastElement
|
|
712
|
+
if (el.tagName === "td" || el.tagName === "th") {
|
|
713
|
+
const cellIsHeader = isHeader || el.tagName === "th"
|
|
714
|
+
// Unwrap single <p> elements inside cells
|
|
715
|
+
const children = yield* parseCellContent(el.children)
|
|
716
|
+
cells.push(new TableCell({ isHeader: cellIsHeader, children }))
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
}
|
|
720
|
+
return new TableRow({ cells })
|
|
721
|
+
})
|
|
722
|
+
|
|
723
|
+
/**
|
|
724
|
+
* Parse cell content, unwrapping single <p> elements.
|
|
725
|
+
*/
|
|
726
|
+
const parseCellContent = (children: Array<HastNode>): Effect.Effect<Array<InlineNode>, ParseError> =>
|
|
727
|
+
Effect.gen(function*() {
|
|
728
|
+
// Find actual element children (skip whitespace text)
|
|
729
|
+
const elementChildren = children.filter((c) => {
|
|
730
|
+
if (c.type === "element") return true
|
|
731
|
+
if (c.type === "text" && (c as HastText).value.trim()) return true
|
|
732
|
+
return false
|
|
733
|
+
})
|
|
734
|
+
|
|
735
|
+
// If single <p> element, unwrap it
|
|
736
|
+
if (elementChildren.length === 1) {
|
|
737
|
+
const first = elementChildren[0]
|
|
738
|
+
if (first && first.type === "element" && (first as HastElement).tagName === "p") {
|
|
739
|
+
return yield* hastChildrenToInline((first as HastElement).children)
|
|
740
|
+
}
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
// Otherwise parse normally
|
|
744
|
+
return yield* hastChildrenToInline(children)
|
|
745
|
+
})
|
|
746
|
+
|
|
747
|
+
// Type for simple blocks used in lists
|
|
748
|
+
type SimpleBlock = Heading | Paragraph | CodeBlock | ThematicBreak | Image | Table | UnsupportedBlock
|
|
749
|
+
|
|
750
|
+
/**
|
|
751
|
+
* Parse task list element (preprocessed from ac:task-list).
|
|
752
|
+
*/
|
|
753
|
+
const parseTaskList = (
|
|
754
|
+
element: HastElement
|
|
755
|
+
): Effect.Effect<TaskList, ParseError> =>
|
|
756
|
+
Effect.gen(function*() {
|
|
757
|
+
const items: Array<TaskItem> = []
|
|
758
|
+
|
|
759
|
+
for (const child of element.children) {
|
|
760
|
+
if (child.type === "element" && (child as HastElement).tagName === "li") {
|
|
761
|
+
const li = child as HastElement
|
|
762
|
+
const id = (li.properties?.["dataTaskId"] as string) || ""
|
|
763
|
+
const uuid = (li.properties?.["dataTaskUuid"] as string) || ""
|
|
764
|
+
const status = (li.properties?.["dataTaskStatus"] as string) === "complete"
|
|
765
|
+
? "complete" as const
|
|
766
|
+
: "incomplete" as const
|
|
767
|
+
const body = yield* hastChildrenToInline(li.children)
|
|
768
|
+
|
|
769
|
+
items.push({
|
|
770
|
+
_tag: "TaskItem",
|
|
771
|
+
id,
|
|
772
|
+
uuid,
|
|
773
|
+
status,
|
|
774
|
+
body
|
|
775
|
+
})
|
|
776
|
+
}
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
return {
|
|
780
|
+
_tag: "TaskList" as const,
|
|
781
|
+
version: 1,
|
|
782
|
+
children: items
|
|
783
|
+
}
|
|
784
|
+
})
|
|
785
|
+
|
|
786
|
+
/**
|
|
787
|
+
* Parse list element.
|
|
788
|
+
*/
|
|
789
|
+
const parseList = (
|
|
790
|
+
element: HastElement,
|
|
791
|
+
ordered: boolean
|
|
792
|
+
): Effect.Effect<
|
|
793
|
+
{
|
|
794
|
+
_tag: "List"
|
|
795
|
+
version: number
|
|
796
|
+
ordered: boolean
|
|
797
|
+
start?: number
|
|
798
|
+
children: Array<{ _tag: "ListItem"; checked?: boolean; children: Array<SimpleBlock> }>
|
|
799
|
+
},
|
|
800
|
+
ParseError
|
|
801
|
+
> =>
|
|
802
|
+
Effect.gen(function*() {
|
|
803
|
+
const items: Array<{ _tag: "ListItem"; checked?: boolean; children: Array<SimpleBlock> }> = []
|
|
804
|
+
const startProp = element.properties?.start
|
|
805
|
+
const start = ordered && startProp ? parseInt(String(startProp)) : undefined
|
|
806
|
+
|
|
807
|
+
for (const child of element.children) {
|
|
808
|
+
if (child.type === "element" && (child as HastElement).tagName === "li") {
|
|
809
|
+
const li = child as HastElement
|
|
810
|
+
const children = yield* parseListItemContent(li.children)
|
|
811
|
+
// Check for task list items
|
|
812
|
+
const checkbox = li.children.find(
|
|
813
|
+
(c): c is HastElement =>
|
|
814
|
+
c.type === "element" &&
|
|
815
|
+
(c as HastElement).tagName === "input" &&
|
|
816
|
+
(c as HastElement).properties?.type === "checkbox"
|
|
817
|
+
)
|
|
818
|
+
const checked = checkbox ? (checkbox.properties?.checked === true) : undefined
|
|
819
|
+
if (checked !== undefined) {
|
|
820
|
+
items.push({ _tag: "ListItem", checked, children })
|
|
821
|
+
} else {
|
|
822
|
+
items.push({ _tag: "ListItem", children })
|
|
823
|
+
}
|
|
824
|
+
}
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
if (start !== undefined) {
|
|
828
|
+
return { _tag: "List" as const, version: 1, ordered, start, children: items }
|
|
829
|
+
}
|
|
830
|
+
return { _tag: "List" as const, version: 1, ordered, children: items }
|
|
831
|
+
})
|
|
832
|
+
|
|
833
|
+
/**
|
|
834
|
+
* Parse list item content, handling nested lists and unwrapping single <p>.
|
|
835
|
+
* Also handles direct text/inline content without wrapper elements.
|
|
836
|
+
*/
|
|
837
|
+
const parseListItemContent = (
|
|
838
|
+
children: Array<HastNode>
|
|
839
|
+
): Effect.Effect<Array<SimpleBlock>, ParseError> =>
|
|
840
|
+
Effect.gen(function*() {
|
|
841
|
+
const blocks: Array<SimpleBlock> = []
|
|
842
|
+
|
|
843
|
+
// Check if there's any direct text/inline content (not wrapped in <p>)
|
|
844
|
+
const hasDirectInlineContent = children.some((child) => {
|
|
845
|
+
if (child.type === "text") {
|
|
846
|
+
return (child as HastText).value.trim() !== ""
|
|
847
|
+
}
|
|
848
|
+
if (child.type === "element") {
|
|
849
|
+
const tagName = (child as HastElement).tagName.toLowerCase()
|
|
850
|
+
// Inline elements that should be wrapped in a paragraph
|
|
851
|
+
return ["a", "strong", "em", "b", "i", "u", "code", "span", "del", "sub", "sup"].includes(tagName)
|
|
852
|
+
}
|
|
853
|
+
return false
|
|
854
|
+
})
|
|
855
|
+
|
|
856
|
+
// If there's direct inline content, wrap it all in a paragraph
|
|
857
|
+
if (hasDirectInlineContent) {
|
|
858
|
+
const inlineChildren = yield* hastChildrenToInline(children)
|
|
859
|
+
if (inlineChildren.length > 0) {
|
|
860
|
+
blocks.push(new Paragraph({ children: inlineChildren }))
|
|
861
|
+
}
|
|
862
|
+
// Also check for nested lists after the inline content
|
|
863
|
+
for (const child of children) {
|
|
864
|
+
if (child.type === "element") {
|
|
865
|
+
const el = child as HastElement
|
|
866
|
+
const tagName = el.tagName.toLowerCase()
|
|
867
|
+
if (tagName === "ul" || tagName === "ol") {
|
|
868
|
+
blocks.push(new UnsupportedBlock({ rawHtml: hastToHtml(el), source: "confluence" }))
|
|
869
|
+
}
|
|
870
|
+
}
|
|
871
|
+
}
|
|
872
|
+
return blocks
|
|
873
|
+
}
|
|
874
|
+
|
|
875
|
+
for (const child of children) {
|
|
876
|
+
if (child.type !== "element") continue
|
|
877
|
+
const el = child as HastElement
|
|
878
|
+
const tagName = el.tagName.toLowerCase()
|
|
879
|
+
|
|
880
|
+
// Single <p> inside list item - extract inline content as paragraph
|
|
881
|
+
if (tagName === "p") {
|
|
882
|
+
const inlineChildren = yield* hastChildrenToInline(el.children)
|
|
883
|
+
blocks.push(new Paragraph({ children: inlineChildren }))
|
|
884
|
+
} // Nested lists - convert to paragraph with raw HTML for now (will be handled later)
|
|
885
|
+
else if (tagName === "ul" || tagName === "ol") {
|
|
886
|
+
// For nested lists, preserve as unsupported for now
|
|
887
|
+
blocks.push(new UnsupportedBlock({ rawHtml: hastToHtml(el), source: "confluence" }))
|
|
888
|
+
} // Other block elements
|
|
889
|
+
else if (tagName === "pre") {
|
|
890
|
+
const codeEl = el.children.find(
|
|
891
|
+
(c): c is HastElement => c.type === "element" && (c as HastElement).tagName === "code"
|
|
892
|
+
)
|
|
893
|
+
const code = codeEl ? getTextContent(codeEl) : getTextContent(el)
|
|
894
|
+
blocks.push(new CodeBlock({ code }))
|
|
895
|
+
} else if (tagName === "hr") {
|
|
896
|
+
blocks.push(new ThematicBreak({}))
|
|
897
|
+
} else if (tagName === "img") {
|
|
898
|
+
const src = el.properties?.src as string | undefined
|
|
899
|
+
if (src) blocks.push(new Image({ src }))
|
|
900
|
+
} else if (tagName === "table") {
|
|
901
|
+
blocks.push(yield* parseTable(el))
|
|
902
|
+
} else if (/^h[1-6]$/.test(tagName)) {
|
|
903
|
+
const levelStr = tagName[1]
|
|
904
|
+
if (levelStr) {
|
|
905
|
+
const level = parseInt(levelStr) as 1 | 2 | 3 | 4 | 5 | 6
|
|
906
|
+
const inlineChildren = yield* hastChildrenToInline(el.children)
|
|
907
|
+
blocks.push(new Heading({ level, children: inlineChildren }))
|
|
908
|
+
}
|
|
909
|
+
}
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
return blocks
|
|
913
|
+
})
|
|
914
|
+
|
|
915
|
+
/**
|
|
916
|
+
* Get text content from hast node.
|
|
917
|
+
*/
|
|
918
|
+
const getTextContent = (element: HastElement): string => {
|
|
919
|
+
let text = ""
|
|
920
|
+
for (const child of element.children) {
|
|
921
|
+
if (child.type === "text") {
|
|
922
|
+
text += (child as HastText).value
|
|
923
|
+
} else if (child.type === "element") {
|
|
924
|
+
text += getTextContent(child as HastElement)
|
|
925
|
+
}
|
|
926
|
+
}
|
|
927
|
+
return text
|
|
928
|
+
}
|
|
929
|
+
|
|
930
|
+
/**
|
|
931
|
+
* Convert hast element back to HTML string (for unsupported elements).
|
|
932
|
+
*/
|
|
933
|
+
const hastToHtml = (element: HastElement): string => {
|
|
934
|
+
const props = Object.entries(element.properties || {})
|
|
935
|
+
.map(([k, v]) => {
|
|
936
|
+
const attrName = k.replace(/([A-Z])/g, "-$1").toLowerCase()
|
|
937
|
+
return `${attrName}="${String(v)}"`
|
|
938
|
+
})
|
|
939
|
+
.join(" ")
|
|
940
|
+
const openTag = props ? `<${element.tagName} ${props}>` : `<${element.tagName}>`
|
|
941
|
+
const closeTag = `</${element.tagName}>`
|
|
942
|
+
const content = element.children
|
|
943
|
+
.map((c) => {
|
|
944
|
+
if (c.type === "text") return (c as HastText).value
|
|
945
|
+
if (c.type === "element") return hastToHtml(c as HastElement)
|
|
946
|
+
return ""
|
|
947
|
+
})
|
|
948
|
+
.join("")
|
|
949
|
+
return `${openTag}${content}${closeTag}`
|
|
950
|
+
}
|