@portabletext/block-tools 4.1.8 → 4.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/lib/_chunks-es/helpers.js +81 -9
  2. package/lib/_chunks-es/helpers.js.map +1 -1
  3. package/lib/index.js +14 -18
  4. package/lib/index.js.map +1 -1
  5. package/package.json +7 -10
  6. package/src/HtmlDeserializer/flatten-nested-blocks.test.ts +0 -248
  7. package/src/HtmlDeserializer/flatten-nested-blocks.ts +0 -173
  8. package/src/HtmlDeserializer/helpers.ts +0 -108
  9. package/src/HtmlDeserializer/index.ts +0 -315
  10. package/src/HtmlDeserializer/preprocessors/index.ts +0 -15
  11. package/src/HtmlDeserializer/preprocessors/preprocessor.gdocs.ts +0 -66
  12. package/src/HtmlDeserializer/preprocessors/preprocessor.html.ts +0 -57
  13. package/src/HtmlDeserializer/preprocessors/preprocessor.notion.ts +0 -25
  14. package/src/HtmlDeserializer/preprocessors/preprocessor.whitespace.ts +0 -56
  15. package/src/HtmlDeserializer/preprocessors/preprocessor.word.ts +0 -92
  16. package/src/HtmlDeserializer/preprocessors/xpathResult.ts +0 -13
  17. package/src/HtmlDeserializer/rules/index.ts +0 -21
  18. package/src/HtmlDeserializer/rules/rules.gdocs.ts +0 -188
  19. package/src/HtmlDeserializer/rules/rules.html.ts +0 -356
  20. package/src/HtmlDeserializer/rules/rules.notion.ts +0 -57
  21. package/src/HtmlDeserializer/rules/rules.whitespace-text-node.ts +0 -31
  22. package/src/HtmlDeserializer/rules/rules.word.ts +0 -95
  23. package/src/HtmlDeserializer/trim-whitespace.ts +0 -157
  24. package/src/HtmlDeserializer/word-online/asserters.word-online.ts +0 -153
  25. package/src/HtmlDeserializer/word-online/preprocessor.word-online.ts +0 -263
  26. package/src/HtmlDeserializer/word-online/rules.word-online.ts +0 -390
  27. package/src/constants.ts +0 -104
  28. package/src/index.ts +0 -49
  29. package/src/rules/_exports/index.ts +0 -1
  30. package/src/rules/flatten-tables.test.ts +0 -495
  31. package/src/rules/flatten-tables.ts +0 -216
  32. package/src/rules/index.ts +0 -1
  33. package/src/schema-matchers.ts +0 -41
  34. package/src/types.ts +0 -100
  35. package/src/util/findBlockType.ts +0 -13
  36. package/src/util/normalizeBlock.ts +0 -171
  37. package/src/util/randomKey.ts +0 -28
  38. package/src/util/resolveJsType.ts +0 -44
@@ -1,390 +0,0 @@
1
- import type {Schema} from '@portabletext/schema'
2
- import {DEFAULT_SPAN, HTML_BLOCK_TAGS, HTML_HEADER_TAGS} from '../../constants'
3
- import type {SchemaMatchers} from '../../schema-matchers'
4
- import type {DeserializerRule} from '../../types'
5
- import {keyGenerator} from '../../util/randomKey'
6
- import {isElement, tagName} from '../helpers'
7
- import {
8
- hasEmphasisFormatting,
9
- hasStrikethroughFormatting,
10
- hasStrongFormatting,
11
- hasUnderlineFormatting,
12
- isFindHit,
13
- isInBlockquote,
14
- isInHeading,
15
- isNormalTextRun,
16
- isWordOnlineTextRun,
17
- } from './asserters.word-online'
18
-
19
- function mapParaStyleToBlockStyle(schema: Schema, paraStyle: string) {
20
- const blockStyleMap: Record<string, string> = {
21
- 'heading 1': 'h1',
22
- 'heading 2': 'h2',
23
- 'heading 3': 'h3',
24
- 'heading 4': 'h4',
25
- 'heading 5': 'h5',
26
- 'heading 6': 'h6',
27
- 'Quote': 'blockquote',
28
- }
29
-
30
- const blockStyle = blockStyleMap[paraStyle] ?? 'normal'
31
-
32
- return schema.styles.find((style) => style.name === blockStyle)?.name
33
- }
34
-
35
- export function createWordOnlineRules(
36
- schema: Schema,
37
- options: {keyGenerator?: () => string; matchers?: SchemaMatchers},
38
- ): DeserializerRule[] {
39
- return [
40
- // Image rule - handles bare Word Online <img> tags with WACImage class
41
- {
42
- deserialize(el) {
43
- if (!isElement(el) || tagName(el) !== 'img') {
44
- return undefined
45
- }
46
-
47
- // Handle className which might be a string or SVGAnimatedString
48
- const classNameRaw = el.className
49
- let className = ''
50
- if (typeof classNameRaw === 'string') {
51
- className = classNameRaw
52
- } else if (classNameRaw && typeof classNameRaw === 'object') {
53
- // SVGAnimatedString has baseVal property
54
- className = (classNameRaw as {baseVal?: string}).baseVal || ''
55
- }
56
-
57
- if (!className.includes('WACImage')) {
58
- return undefined
59
- }
60
-
61
- const src = el.getAttribute('src') ?? undefined
62
- const alt = el.getAttribute('alt') ?? undefined
63
-
64
- const props = Object.fromEntries(
65
- Array.from(el.attributes).map((attr) => [attr.name, attr.value]),
66
- )
67
-
68
- // Bare <img> tags are typically block-level, not inline
69
- // They should be returned as block images
70
- const image = options.matchers?.image?.({
71
- context: {
72
- schema: schema,
73
- keyGenerator: options.keyGenerator ?? keyGenerator,
74
- },
75
- props: {
76
- ...props,
77
- ...(src ? {src} : {}),
78
- ...(alt ? {alt} : {}),
79
- },
80
- })
81
-
82
- if (image) {
83
- return {
84
- _type: '__block',
85
- block: image,
86
- }
87
- }
88
-
89
- return undefined
90
- },
91
- },
92
- // Image rule - handles Word Online images wrapped in WACImageContainer
93
- {
94
- deserialize(el) {
95
- if (!isElement(el)) {
96
- return undefined
97
- }
98
-
99
- // Handle className which might be a string or SVGAnimatedString
100
- const classNameRaw = el.className
101
- let className = ''
102
- if (typeof classNameRaw === 'string') {
103
- className = classNameRaw
104
- } else if (classNameRaw && typeof classNameRaw === 'object') {
105
- // SVGAnimatedString has baseVal property
106
- className = (classNameRaw as {baseVal?: string}).baseVal || ''
107
- }
108
- if (!className.includes('WACImageContainer')) {
109
- return undefined
110
- }
111
-
112
- // Find the img element inside
113
- const img = el.querySelector('img')
114
- if (!img) {
115
- return undefined
116
- }
117
-
118
- const src = img.getAttribute('src') ?? undefined
119
- const alt = img.getAttribute('alt') ?? undefined
120
-
121
- const props = Object.fromEntries(
122
- Array.from(img.attributes).map((attr) => [attr.name, attr.value]),
123
- )
124
-
125
- // Determine if this should be an inline or block-level image
126
- // Word Online inline images:
127
- // 1. Siblings of TextRun spans (not wrapped in paragraphs)
128
- // 2. Inside list items (should be inline relative to the list item)
129
- const isInsideListItem = el.closest('li') !== null
130
- const isInsideParagraph = el.closest('p') !== null
131
-
132
- if (!isInsideParagraph || isInsideListItem) {
133
- // Inline image (either not in a paragraph, or inside a list item)
134
- const inlineImage = options.matchers?.inlineImage?.({
135
- context: {
136
- schema: schema,
137
- keyGenerator: options.keyGenerator ?? keyGenerator,
138
- },
139
- props: {
140
- ...props,
141
- ...(src ? {src} : {}),
142
- ...(alt ? {alt} : {}),
143
- },
144
- })
145
-
146
- if (inlineImage) {
147
- return inlineImage
148
- }
149
- }
150
-
151
- // Block-level image (or fallback if inline image not supported)
152
- const image = options.matchers?.image?.({
153
- context: {
154
- schema: schema,
155
- keyGenerator: options.keyGenerator ?? keyGenerator,
156
- },
157
- props: {
158
- ...props,
159
- ...(src ? {src} : {}),
160
- ...(alt ? {alt} : {}),
161
- },
162
- })
163
-
164
- if (image) {
165
- return {
166
- _type: '__block',
167
- block: image,
168
- }
169
- }
170
-
171
- return undefined
172
- },
173
- },
174
- // List item rule - handles <li> elements with aria-level
175
- {
176
- deserialize(el, next) {
177
- if (!isElement(el) || tagName(el) !== 'li') {
178
- return undefined
179
- }
180
-
181
- const ariaLevel = el.getAttribute('data-aria-level')
182
-
183
- if (!ariaLevel) {
184
- return undefined
185
- }
186
-
187
- const listItem = tagName(el.parentNode) === 'ol' ? 'number' : 'bullet'
188
-
189
- let childNodesToProcess = el.childNodes
190
- let blockStyle = 'normal'
191
-
192
- if (
193
- el.childNodes.length === 1 &&
194
- el.firstChild &&
195
- isElement(el.firstChild)
196
- ) {
197
- const childTag = tagName(el.firstChild)
198
-
199
- if (
200
- childTag &&
201
- (HTML_BLOCK_TAGS[childTag as keyof typeof HTML_BLOCK_TAGS] ||
202
- HTML_HEADER_TAGS[childTag as keyof typeof HTML_HEADER_TAGS] ||
203
- childTag === 'word-online-block')
204
- ) {
205
- // If it's a word-online-block, extract the style before skipping it
206
- if (childTag === 'word-online-block') {
207
- const paraStyle = el.firstChild.getAttribute('data-parastyle')
208
- const foundBlockStyle = paraStyle
209
- ? mapParaStyleToBlockStyle(schema, paraStyle)
210
- : undefined
211
-
212
- if (foundBlockStyle) {
213
- blockStyle = foundBlockStyle
214
- }
215
- }
216
-
217
- // Skip the block wrapper and process its children directly
218
- childNodesToProcess = el.firstChild.childNodes
219
- }
220
- }
221
-
222
- const children = next(childNodesToProcess)
223
- let childArray = Array.isArray(children)
224
- ? children
225
- : [children].filter(Boolean)
226
-
227
- // Clean up trailing empty or whitespace-only spans
228
- // Word Online often adds trailing tabs/breaks and extra spaces in list items
229
- while (childArray.length > 0) {
230
- const lastChild = childArray[childArray.length - 1]
231
-
232
- if (
233
- lastChild &&
234
- typeof lastChild === 'object' &&
235
- 'text' in lastChild
236
- ) {
237
- const text = (lastChild.text as string).trimEnd()
238
- if (text === '') {
239
- // Remove empty span
240
- childArray = childArray.slice(0, -1)
241
- } else if (text !== lastChild.text) {
242
- // Update with trimmed text
243
- lastChild.text = text
244
- break
245
- } else {
246
- break
247
- }
248
- } else {
249
- break
250
- }
251
- }
252
-
253
- return {
254
- _type: schema.block.name,
255
- children: childArray,
256
- markDefs: [],
257
- style: blockStyle,
258
- listItem,
259
- level: parseInt(ariaLevel, 10),
260
- }
261
- },
262
- },
263
- // Block style rule - handles paragraph styles like Quote
264
- // The preprocessor wraps grouped NormalTextRun spans in a word-online-block element
265
- {
266
- deserialize(el, next) {
267
- if (!isElement(el)) {
268
- return undefined
269
- }
270
-
271
- const paraStyle = el.getAttribute('data-parastyle')
272
- const blockStyle = paraStyle
273
- ? mapParaStyleToBlockStyle(schema, paraStyle)
274
- : undefined
275
-
276
- if (!blockStyle) {
277
- return undefined
278
- }
279
-
280
- const children = next(el.childNodes)
281
-
282
- return {
283
- _type: schema.block.name,
284
- style: blockStyle,
285
- markDefs: [],
286
- children: Array.isArray(children)
287
- ? children
288
- : children
289
- ? [children]
290
- : [],
291
- }
292
- },
293
- },
294
- // TextRun rule
295
- {
296
- deserialize(el) {
297
- if (isWordOnlineTextRun(el)) {
298
- if (!isElement(el)) {
299
- return undefined
300
- }
301
-
302
- if (!el.textContent) {
303
- return undefined
304
- }
305
-
306
- // Find ALL NormalTextRun and FindHit children and extract text from them
307
- // (Word Online sometimes splits text across multiple spans)
308
- // FindHit is used for search result highlighting
309
- const textSpans = Array.from(el.childNodes).filter(
310
- (node) => isNormalTextRun(node) || isFindHit(node),
311
- )
312
- const text = textSpans
313
- .map((span) => (isElement(span) ? (span.textContent ?? '') : ''))
314
- .join('')
315
-
316
- if (!text) {
317
- return undefined
318
- }
319
-
320
- const span = {
321
- ...DEFAULT_SPAN,
322
- marks: [] as Array<string>,
323
- text,
324
- }
325
-
326
- if (hasStrongFormatting(el)) {
327
- span.marks.push('strong')
328
- }
329
-
330
- // Don't add italic mark if we're in a heading or blockquote (it's part of their default style)
331
- if (
332
- hasEmphasisFormatting(el) &&
333
- !isInHeading(el) &&
334
- !isInBlockquote(el)
335
- ) {
336
- span.marks.push('em')
337
- }
338
-
339
- // Add underline mark if the element has explicit underline formatting
340
- // Word Online always adds underline to links, so we need to distinguish between:
341
- // 1. Default link underline (skip)
342
- // 2. Explicit user underline that includes the link (add)
343
- // We check: if the link is surrounded by underlined content, it's explicit user underline
344
- if (hasUnderlineFormatting(el)) {
345
- const isInsideLink =
346
- isElement(el) &&
347
- el.parentElement &&
348
- tagName(el.parentElement) === 'a'
349
-
350
- if (isInsideLink) {
351
- // Check if there are underlined siblings of the link
352
- const linkElement = el.parentElement
353
- if (linkElement) {
354
- const prevSibling = linkElement.previousSibling
355
- const nextSibling = linkElement.nextSibling
356
-
357
- // If either sibling is an underlined TextRun, the link is part of explicit underline
358
- const hasPrevUnderline =
359
- prevSibling &&
360
- isElement(prevSibling) &&
361
- hasUnderlineFormatting(prevSibling)
362
- const hasNextUnderline =
363
- nextSibling &&
364
- isElement(nextSibling) &&
365
- hasUnderlineFormatting(nextSibling)
366
-
367
- if (hasPrevUnderline || hasNextUnderline) {
368
- span.marks.push('underline')
369
- }
370
- // Otherwise, it's just default link styling, don't add underline mark
371
- }
372
- } else {
373
- // Not in a link, always add underline
374
- span.marks.push('underline')
375
- }
376
- }
377
-
378
- // Add strikethrough mark if the element has strikethrough formatting
379
- if (hasStrikethroughFormatting(el)) {
380
- span.marks.push('strike-through')
381
- }
382
-
383
- return span
384
- }
385
-
386
- return undefined
387
- },
388
- },
389
- ]
390
- }
package/src/constants.ts DELETED
@@ -1,104 +0,0 @@
1
- import {uniq} from 'lodash'
2
-
3
- export interface PartialBlock {
4
- _type: string
5
- markDefs: string[]
6
- style: string
7
- level?: number
8
- listItem?: string
9
- }
10
-
11
- export const PRESERVE_WHITESPACE_TAGS = ['pre', 'textarea', 'code']
12
-
13
- export const BLOCK_DEFAULT_STYLE = 'normal'
14
-
15
- export const DEFAULT_BLOCK: PartialBlock = Object.freeze({
16
- _type: 'block',
17
- markDefs: [],
18
- style: BLOCK_DEFAULT_STYLE,
19
- })
20
-
21
- export const DEFAULT_SPAN = Object.freeze({
22
- _type: 'span',
23
- marks: [] as string[],
24
- })
25
-
26
- export const HTML_BLOCK_TAGS = {
27
- p: DEFAULT_BLOCK,
28
- blockquote: {...DEFAULT_BLOCK, style: 'blockquote'} as PartialBlock,
29
- }
30
-
31
- export const HTML_SPAN_TAGS = {
32
- span: {object: 'text'},
33
- }
34
-
35
- export const HTML_LIST_CONTAINER_TAGS: Record<
36
- string,
37
- {object: null} | undefined
38
- > = {
39
- ol: {object: null},
40
- ul: {object: null},
41
- }
42
-
43
- export const HTML_HEADER_TAGS: Record<string, PartialBlock | undefined> = {
44
- h1: {...DEFAULT_BLOCK, style: 'h1'},
45
- h2: {...DEFAULT_BLOCK, style: 'h2'},
46
- h3: {...DEFAULT_BLOCK, style: 'h3'},
47
- h4: {...DEFAULT_BLOCK, style: 'h4'},
48
- h5: {...DEFAULT_BLOCK, style: 'h5'},
49
- h6: {...DEFAULT_BLOCK, style: 'h6'},
50
- }
51
-
52
- export const HTML_MISC_TAGS = {
53
- br: {...DEFAULT_BLOCK, style: BLOCK_DEFAULT_STYLE} as PartialBlock,
54
- }
55
-
56
- export const HTML_DECORATOR_TAGS: Record<string, string | undefined> = {
57
- b: 'strong',
58
- strong: 'strong',
59
-
60
- i: 'em',
61
- em: 'em',
62
-
63
- u: 'underline',
64
- s: 'strike-through',
65
- strike: 'strike-through',
66
- del: 'strike-through',
67
-
68
- code: 'code',
69
- sup: 'sup',
70
- sub: 'sub',
71
- ins: 'ins',
72
- mark: 'mark',
73
- small: 'small',
74
- }
75
-
76
- export const HTML_LIST_ITEM_TAGS: Record<string, PartialBlock | undefined> = {
77
- li: {
78
- ...DEFAULT_BLOCK,
79
- style: BLOCK_DEFAULT_STYLE,
80
- level: 1,
81
- listItem: 'bullet',
82
- },
83
- }
84
-
85
- export const ELEMENT_MAP = {
86
- ...HTML_BLOCK_TAGS,
87
- ...HTML_SPAN_TAGS,
88
- ...HTML_LIST_CONTAINER_TAGS,
89
- ...HTML_LIST_ITEM_TAGS,
90
- ...HTML_HEADER_TAGS,
91
- ...HTML_MISC_TAGS,
92
- }
93
-
94
- export const DEFAULT_SUPPORTED_STYLES = uniq(
95
- Object.values(ELEMENT_MAP)
96
- .filter((tag): tag is PartialBlock => 'style' in tag)
97
- .map((tag) => tag.style),
98
- )
99
-
100
- export const DEFAULT_SUPPORTED_DECORATORS = uniq(
101
- Object.values(HTML_DECORATOR_TAGS),
102
- )
103
-
104
- export const DEFAULT_SUPPORTED_ANNOTATIONS = ['link']
package/src/index.ts DELETED
@@ -1,49 +0,0 @@
1
- import {sanitySchemaToPortableTextSchema} from '@portabletext/sanity-bridge'
2
- import type {Schema} from '@portabletext/schema'
3
- import type {ArraySchemaType} from '@sanity/types'
4
- import HtmlDeserializer from './HtmlDeserializer'
5
- import type {HtmlDeserializerOptions, TypedObject} from './types'
6
- import {normalizeBlock} from './util/normalizeBlock'
7
-
8
- /**
9
- * Convert HTML to blocks respecting the block content type's schema
10
- *
11
- * @param html - The HTML to convert to blocks
12
- * @param schemaType - A compiled version of the schema type for the block content
13
- * @param options - Options for deserializing HTML to blocks
14
- * @returns Array of blocks
15
- * @public
16
- */
17
- export function htmlToBlocks(
18
- html: string,
19
- schemaType: ArraySchemaType | Schema,
20
- options: HtmlDeserializerOptions = {},
21
- ) {
22
- const schema = isSanitySchema(schemaType)
23
- ? sanitySchemaToPortableTextSchema(schemaType)
24
- : schemaType
25
-
26
- const deserializer = new HtmlDeserializer(schema, options)
27
- return deserializer
28
- .deserialize(html)
29
- .map((block) => normalizeBlock(block, {keyGenerator: options.keyGenerator}))
30
- }
31
-
32
- export type {ImageSchemaMatcher, SchemaMatchers} from './schema-matchers'
33
- export type {ArbitraryTypedObject, DeserializerRule, HtmlParser} from './types'
34
- export type {
35
- PortableTextBlock,
36
- PortableTextObject,
37
- PortableTextSpan,
38
- PortableTextTextBlock,
39
- } from '@portabletext/schema'
40
- export type {BlockNormalizationOptions} from './util/normalizeBlock'
41
- export {randomKey} from './util/randomKey'
42
- export {normalizeBlock}
43
- export type {HtmlDeserializerOptions, TypedObject}
44
-
45
- function isSanitySchema(
46
- schema: ArraySchemaType | Schema,
47
- ): schema is ArraySchemaType {
48
- return schema.hasOwnProperty('jsonType')
49
- }
@@ -1 +0,0 @@
1
- export * from '../index'