@portabletext/block-tools 3.4.1 → 3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/_chunks-cjs/helpers.cjs +479 -0
- package/lib/_chunks-cjs/helpers.cjs.map +1 -0
- package/lib/_chunks-dts/types.d.cts +85 -0
- package/lib/_chunks-dts/types.d.ts +85 -0
- package/lib/_chunks-es/helpers.js +478 -0
- package/lib/_chunks-es/helpers.js.map +1 -0
- package/lib/index.cjs +84 -534
- package/lib/index.cjs.map +1 -1
- package/lib/index.d.cts +1 -83
- package/lib/index.d.ts +1 -83
- package/lib/index.js +3 -453
- package/lib/index.js.map +1 -1
- package/lib/rules/index.cjs +66 -0
- package/lib/rules/index.cjs.map +1 -0
- package/lib/rules/index.d.cts +72 -0
- package/lib/rules/index.d.ts +72 -0
- package/lib/rules/index.js +67 -0
- package/lib/rules/index.js.map +1 -0
- package/package.json +8 -2
- package/src/HtmlDeserializer/flatten-nested-blocks.test.ts +5 -8
- package/src/HtmlDeserializer/flatten-nested-blocks.ts +0 -1
- package/src/HtmlDeserializer/index.ts +1 -1
- package/src/rules/_exports/index.ts +1 -0
- package/src/rules/flatten-tables.test.ts +224 -0
- package/src/rules/flatten-tables.ts +202 -0
- package/src/rules/index.ts +1 -0
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { isTextBlock } from "@portabletext/schema";
|
|
2
|
+
import { isElement, tagName, flattenNestedBlocks } from "../_chunks-es/helpers.js";
|
|
3
|
+
function createFlattenTableRule({
|
|
4
|
+
schema,
|
|
5
|
+
separator
|
|
6
|
+
}) {
|
|
7
|
+
return {
|
|
8
|
+
deserialize: (node, next) => {
|
|
9
|
+
if (!isElement(node) || tagName(node) !== "table")
|
|
10
|
+
return;
|
|
11
|
+
const thead = node.querySelector("thead"), tbody = node.querySelector("tbody");
|
|
12
|
+
if (!thead || !tbody)
|
|
13
|
+
return;
|
|
14
|
+
const headerRow = thead.querySelector("tr");
|
|
15
|
+
if (!headerRow)
|
|
16
|
+
return;
|
|
17
|
+
const headerResults = [...headerRow.querySelectorAll("th")].map(
|
|
18
|
+
(headerCell) => next(headerCell)
|
|
19
|
+
), rows = [], rowElements = tbody.querySelectorAll("tr");
|
|
20
|
+
for (const row of rowElements) {
|
|
21
|
+
const cells = row.querySelectorAll("td");
|
|
22
|
+
let cellIndex = 0;
|
|
23
|
+
for (const cell of cells) {
|
|
24
|
+
const result = next(cell);
|
|
25
|
+
if (!result) {
|
|
26
|
+
cellIndex++;
|
|
27
|
+
continue;
|
|
28
|
+
}
|
|
29
|
+
const headerResult = headerResults[cellIndex];
|
|
30
|
+
if (!headerResult) {
|
|
31
|
+
Array.isArray(result) ? rows.push(...result) : rows.push(result), cellIndex++;
|
|
32
|
+
continue;
|
|
33
|
+
}
|
|
34
|
+
const flattenedHeaderResult = flattenNestedBlocks(
|
|
35
|
+
{ schema },
|
|
36
|
+
Array.isArray(headerResult) ? headerResult : [headerResult]
|
|
37
|
+
), firstFlattenedHeaderResult = flattenedHeaderResult[0], flattenedResult = flattenNestedBlocks(
|
|
38
|
+
{ schema },
|
|
39
|
+
Array.isArray(result) ? result : [result]
|
|
40
|
+
), firstFlattenedResult = flattenedResult[0];
|
|
41
|
+
if (flattenedHeaderResult.length === 1 && isTextBlock({ schema }, firstFlattenedHeaderResult) && flattenedResult.length === 1 && isTextBlock({ schema }, firstFlattenedResult)) {
|
|
42
|
+
const separatorChild = separator?.(), mergedTextBlock = {
|
|
43
|
+
...firstFlattenedHeaderResult,
|
|
44
|
+
children: [
|
|
45
|
+
...firstFlattenedHeaderResult.children,
|
|
46
|
+
...separatorChild ? [separatorChild] : [],
|
|
47
|
+
...firstFlattenedResult.children
|
|
48
|
+
],
|
|
49
|
+
markDefs: [
|
|
50
|
+
...firstFlattenedHeaderResult.markDefs ?? [],
|
|
51
|
+
...firstFlattenedResult.markDefs ?? []
|
|
52
|
+
]
|
|
53
|
+
};
|
|
54
|
+
rows.push(mergedTextBlock), cellIndex++;
|
|
55
|
+
continue;
|
|
56
|
+
}
|
|
57
|
+
Array.isArray(headerResult) ? rows.push(...headerResult) : rows.push(headerResult), Array.isArray(result) ? rows.push(...result) : rows.push(result), cellIndex++;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
return rows;
|
|
61
|
+
}
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
export {
|
|
65
|
+
createFlattenTableRule
|
|
66
|
+
};
|
|
67
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sources":["../../src/rules/flatten-tables.ts"],"sourcesContent":["import {\n isTextBlock,\n type PortableTextObject,\n type PortableTextSpan,\n type Schema,\n} from '@portabletext/schema'\nimport {flattenNestedBlocks} from '../HtmlDeserializer/flatten-nested-blocks'\nimport {isElement, tagName} from '../HtmlDeserializer/helpers'\nimport type {\n ArbitraryTypedObject,\n DeserializerRule,\n TypedObject,\n} from '../types'\n\n/**\n * An opinionated `DeserializerRule` that flattens tables in a way that repeats\n * the header row for each cell in the row.\n *\n * @example\n * ```html\n * <table>\n * <thead>\n * <tr>\n * <th>Header 1</th>\n * <th>Header 2</th>\n * </tr>\n * </thead>\n * <tbody>\n * <tr>\n * <td>Cell 1</td>\n * <td>Cell 2</td>\n * </tr>\n * </tbody>\n * </table>\n * ```\n * Turns into\n * ```json\n * [\n * {\n * _type: 'block',\n * children: [\n * {\n * _type: 'text',\n * text: 'Header 1'\n * },\n * {\n * _type: 'text',\n * text: 'Cell 1'\n * }\n * ]\n * },\n * {\n * _type: 'block',\n * children: [\n * {\n * _type: 'text',\n * text: 'Header 2'\n * },\n * {\n * _type: 'text',\n * text: 'Cell 2'\n * }\n * ]\n * }\n * ]\n * ```\n *\n * Use the `separator` option to control if a child element should separate\n * headers and cells.\n *\n * @beta\n */\nexport function createFlattenTableRule({\n schema,\n separator,\n}: {\n schema: Schema\n separator?: () =>\n | (Omit<PortableTextSpan, '_key'> & {_key?: string})\n | (Omit<PortableTextObject, '_key'> & {_key?: string})\n | undefined\n}): DeserializerRule {\n return {\n deserialize: (node, next) => {\n if (!isElement(node) || tagName(node) !== 'table') {\n return undefined\n }\n\n const thead = node.querySelector('thead')\n const tbody = node.querySelector('tbody')\n\n // Only process tables with thead and tbody\n if (!thead || !tbody) {\n return undefined\n }\n\n // Extract header labels from thead\n const headerRow = thead.querySelector('tr')\n\n if (!headerRow) {\n return undefined\n }\n\n const headerCells = headerRow.querySelectorAll('th')\n const headerResults = [...headerCells].map((headerCell) =>\n next(headerCell),\n )\n\n // Process tbody rows and combine with headers\n const rows: TypedObject[] = []\n const rowElements = tbody.querySelectorAll('tr')\n\n for (const row of rowElements) {\n const cells = row.querySelectorAll('td')\n\n let cellIndex = 0\n for (const cell of cells) {\n const result = next(cell)\n\n if (!result) {\n cellIndex++\n continue\n }\n\n const headerResult = headerResults[cellIndex]\n\n if (!headerResult) {\n // If we can't find a corresponding header, then we just push\n // the deserialized cell as is.\n if (Array.isArray(result)) {\n rows.push(...result)\n } else {\n rows.push(result)\n }\n cellIndex++\n continue\n }\n\n const flattenedHeaderResult = flattenNestedBlocks(\n {schema},\n (Array.isArray(headerResult)\n ? headerResult\n : [headerResult]) as Array<ArbitraryTypedObject>,\n )\n const firstFlattenedHeaderResult = flattenedHeaderResult[0]\n const flattenedResult = flattenNestedBlocks(\n {schema},\n (Array.isArray(result)\n ? result\n : [result]) as Array<ArbitraryTypedObject>,\n )\n const firstFlattenedResult = flattenedResult[0]\n\n if (\n flattenedHeaderResult.length === 1 &&\n isTextBlock({schema}, firstFlattenedHeaderResult) &&\n flattenedResult.length === 1 &&\n isTextBlock({schema}, firstFlattenedResult)\n ) {\n const separatorChild = separator?.()\n // If the header result and the cell result are text blocks then\n // we merge them together.\n const mergedTextBlock = {\n ...firstFlattenedHeaderResult,\n children: [\n ...firstFlattenedHeaderResult.children,\n ...(separatorChild ? [separatorChild] : []),\n ...firstFlattenedResult.children,\n ],\n markDefs: [\n ...(firstFlattenedHeaderResult.markDefs ?? []),\n ...(firstFlattenedResult.markDefs ?? []),\n ],\n }\n\n rows.push(mergedTextBlock)\n cellIndex++\n continue\n }\n\n // Otherwise, we push the header result and the cell result as is.\n if (Array.isArray(headerResult)) {\n rows.push(...headerResult)\n } else {\n rows.push(headerResult)\n }\n\n if (Array.isArray(result)) {\n rows.push(...result)\n } else {\n rows.push(result)\n }\n\n cellIndex++\n }\n }\n\n // Return the processed rows as individual text blocks\n return rows\n },\n }\n}\n"],"names":[],"mappings":";;AAwEO,SAAS,uBAAuB;AAAA,EACrC;AAAA,EACA;AACF,GAMqB;AACnB,SAAO;AAAA,IACL,aAAa,CAAC,MAAM,SAAS;AAC3B,UAAI,CAAC,UAAU,IAAI,KAAK,QAAQ,IAAI,MAAM;AACxC;AAGF,YAAM,QAAQ,KAAK,cAAc,OAAO,GAClC,QAAQ,KAAK,cAAc,OAAO;AAGxC,UAAI,CAAC,SAAS,CAAC;AACb;AAIF,YAAM,YAAY,MAAM,cAAc,IAAI;AAE1C,UAAI,CAAC;AACH;AAIF,YAAM,gBAAgB,CAAC,GADH,UAAU,iBAAiB,IAAI,CACd,EAAE;AAAA,QAAI,CAAC,eAC1C,KAAK,UAAU;AAAA,MAAA,GAIX,OAAsB,CAAA,GACtB,cAAc,MAAM,iBAAiB,IAAI;AAE/C,iBAAW,OAAO,aAAa;AAC7B,cAAM,QAAQ,IAAI,iBAAiB,IAAI;AAEvC,YAAI,YAAY;AAChB,mBAAW,QAAQ,OAAO;AACxB,gBAAM,SAAS,KAAK,IAAI;AAExB,cAAI,CAAC,QAAQ;AACX;AACA;AAAA,UACF;AAEA,gBAAM,eAAe,cAAc,SAAS;AAE5C,cAAI,CAAC,cAAc;AAGb,kBAAM,QAAQ,MAAM,IACtB,KAAK,KAAK,GAAG,MAAM,IAEnB,KAAK,KAAK,MAAM,GAElB;AACA;AAAA,UACF;AAEA,gBAAM,wBAAwB;AAAA,YAC5B,EAAC,OAAA;AAAA,YACA,MAAM,QAAQ,YAAY,IACvB,eACA,CAAC,YAAY;AAAA,UAAA,GAEb,6BAA6B,sBAAsB,CAAC,GACpD,kBAAkB;AAAA,YACtB,EAAC,OAAA;AAAA,YACA,MAAM,QAAQ,MAAM,IACjB,SACA,CAAC,MAAM;AAAA,UAAA,GAEP,uBAAuB,gBAAgB,CAAC;AAE9C,cACE,sBAAsB,WAAW,KACjC,YAAY,EAAC,UAAS,0BAA0B,KAChD,gBAAgB,WAAW,KAC3B,YAAY,EAAC,OAAA,GAAS,oBAAoB,GAC1C;AACA,kBAAM,iBAAiB,YAAA,GAGjB,kBAAkB;AAAA,cACtB,GAAG;AAAA,cACH,UAAU;AAAA,gBACR,GAAG,2BAA2B;AAAA,gBAC9B,GAAI,iBAAiB,CAAC,cAAc,IAAI,CAAA;AAAA,gBACxC,GAAG,qBAAqB;AAAA,cAAA;AAAA,cAE1B,UAAU;AAAA,gBACR,GAAI,2BAA2B,YAAY,CAAA;AAAA,gBAC3C,GAAI,qBAAqB,YAAY,CAAA;AAAA,cAAC;AAAA,YACxC;AAGF,iBAAK,KAAK,eAAe,GACzB;AACA;AAAA,UACF;AAGI,gBAAM,QAAQ,YAAY,IAC5B,KAAK,KAAK,GAAG,YAAY,IAEzB,KAAK,KAAK,YAAY,GAGpB,MAAM,QAAQ,MAAM,IACtB,KAAK,KAAK,GAAG,MAAM,IAEnB,KAAK,KAAK,MAAM,GAGlB;AAAA,QACF;AAAA,MACF;AAGA,aAAO;AAAA,IACT;AAAA,EAAA;AAEJ;"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@portabletext/block-tools",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.5.0",
|
|
4
4
|
"description": "Can format HTML, Slate JSON or Sanity block array into any other format.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"portable-text",
|
|
@@ -26,6 +26,12 @@
|
|
|
26
26
|
"require": "./lib/index.cjs",
|
|
27
27
|
"default": "./lib/index.js"
|
|
28
28
|
},
|
|
29
|
+
"./rules": {
|
|
30
|
+
"source": "./src/rules/_exports/index.ts",
|
|
31
|
+
"import": "./lib/rules/index.js",
|
|
32
|
+
"require": "./lib/rules/index.cjs",
|
|
33
|
+
"default": "./lib/rules/index.js"
|
|
34
|
+
},
|
|
29
35
|
"./package.json": "./package.json"
|
|
30
36
|
},
|
|
31
37
|
"main": "./lib/index.cjs",
|
|
@@ -42,7 +48,7 @@
|
|
|
42
48
|
"@portabletext/schema": "^1.2.0"
|
|
43
49
|
},
|
|
44
50
|
"devDependencies": {
|
|
45
|
-
"@sanity/pkg-utils": "^8.
|
|
51
|
+
"@sanity/pkg-utils": "^8.1.1",
|
|
46
52
|
"@sanity/schema": "^4.6.0",
|
|
47
53
|
"@sanity/types": "^4.6.0",
|
|
48
54
|
"@types/jsdom": "^20.0.0",
|
|
@@ -5,10 +5,9 @@ import {flattenNestedBlocks} from './flatten-nested-blocks'
|
|
|
5
5
|
|
|
6
6
|
describe(flattenNestedBlocks.name, () => {
|
|
7
7
|
test('flattening text blocks', () => {
|
|
8
|
-
const keyGenerator = createTestKeyGenerator('k')
|
|
9
8
|
const schema = compileSchema(defineSchema({}))
|
|
10
9
|
expect(
|
|
11
|
-
flattenNestedBlocks({schema
|
|
10
|
+
flattenNestedBlocks({schema}, [
|
|
12
11
|
{
|
|
13
12
|
_type: 'block',
|
|
14
13
|
children: [
|
|
@@ -40,12 +39,11 @@ describe(flattenNestedBlocks.name, () => {
|
|
|
40
39
|
})
|
|
41
40
|
|
|
42
41
|
test('flattening text blocks with block objects in schema', () => {
|
|
43
|
-
const keyGenerator = createTestKeyGenerator('k')
|
|
44
42
|
const schema = compileSchema(
|
|
45
43
|
defineSchema({blockObjects: [{name: 'image'}]}),
|
|
46
44
|
)
|
|
47
45
|
expect(
|
|
48
|
-
flattenNestedBlocks({schema
|
|
46
|
+
flattenNestedBlocks({schema}, [
|
|
49
47
|
{
|
|
50
48
|
_type: 'block',
|
|
51
49
|
children: [
|
|
@@ -77,14 +75,13 @@ describe(flattenNestedBlocks.name, () => {
|
|
|
77
75
|
})
|
|
78
76
|
|
|
79
77
|
test('flattening text blocks with styles in schema', () => {
|
|
80
|
-
const keyGenerator = createTestKeyGenerator('k')
|
|
81
78
|
const schema = compileSchema(
|
|
82
79
|
defineSchema({
|
|
83
80
|
styles: [{name: 'h1'}],
|
|
84
81
|
}),
|
|
85
82
|
)
|
|
86
83
|
expect(
|
|
87
|
-
flattenNestedBlocks({schema
|
|
84
|
+
flattenNestedBlocks({schema}, [
|
|
88
85
|
{
|
|
89
86
|
_type: 'block',
|
|
90
87
|
children: [
|
|
@@ -131,7 +128,7 @@ describe(flattenNestedBlocks.name, () => {
|
|
|
131
128
|
const linkKey = keyGenerator()
|
|
132
129
|
|
|
133
130
|
expect(
|
|
134
|
-
flattenNestedBlocks({schema
|
|
131
|
+
flattenNestedBlocks({schema}, [
|
|
135
132
|
{
|
|
136
133
|
_key: blockKey,
|
|
137
134
|
_type: 'block',
|
|
@@ -198,7 +195,7 @@ describe(flattenNestedBlocks.name, () => {
|
|
|
198
195
|
const barKey = keyGenerator()
|
|
199
196
|
|
|
200
197
|
expect(
|
|
201
|
-
flattenNestedBlocks({schema
|
|
198
|
+
flattenNestedBlocks({schema}, [
|
|
202
199
|
{
|
|
203
200
|
_key: blockKey,
|
|
204
201
|
_type: 'block',
|
|
@@ -78,7 +78,7 @@ export default class HtmlDeserializer {
|
|
|
78
78
|
const blocks = trimWhitespace(
|
|
79
79
|
this.schema,
|
|
80
80
|
flattenNestedBlocks(
|
|
81
|
-
{schema: this.schema
|
|
81
|
+
{schema: this.schema},
|
|
82
82
|
ensureRootIsBlocks(
|
|
83
83
|
this.schema,
|
|
84
84
|
this.deserializeElements(children) as Array<ArbitraryTypedObject>,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from '../index'
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
import {compileSchema, defineSchema} from '@portabletext/schema'
|
|
2
|
+
import {getTersePt} from '@portabletext/test'
|
|
3
|
+
import {JSDOM} from 'jsdom'
|
|
4
|
+
import {describe, expect, test} from 'vitest'
|
|
5
|
+
import {createTestKeyGenerator} from '../../test/test-key-generator'
|
|
6
|
+
import {htmlToBlocks} from '../index'
|
|
7
|
+
import type {ImageSchemaMatcher} from '../schema-matchers'
|
|
8
|
+
import type {HtmlDeserializerOptions} from '../types'
|
|
9
|
+
import {createFlattenTableRule} from './flatten-tables'
|
|
10
|
+
|
|
11
|
+
describe(createFlattenTableRule.name, () => {
|
|
12
|
+
const imageMatcher: ImageSchemaMatcher = ({context, props}) => {
|
|
13
|
+
if (
|
|
14
|
+
!context.schema.blockObjects.some(
|
|
15
|
+
(blockObject) => blockObject.name === 'image',
|
|
16
|
+
)
|
|
17
|
+
) {
|
|
18
|
+
return undefined
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
return {
|
|
22
|
+
_type: 'image',
|
|
23
|
+
...(props.src ? {src: props.src} : {}),
|
|
24
|
+
...(props.alt ? {alt: props.alt} : {}),
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
const inlineImageMatcher: ImageSchemaMatcher = ({context, props}) => {
|
|
29
|
+
if (
|
|
30
|
+
!context.schema.inlineObjects.some(
|
|
31
|
+
(inlineObject) => inlineObject.name === 'image',
|
|
32
|
+
)
|
|
33
|
+
) {
|
|
34
|
+
return undefined
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
return {
|
|
38
|
+
_type: 'image',
|
|
39
|
+
...(props.src ? {src: props.src} : {}),
|
|
40
|
+
...(props.alt ? {alt: props.alt} : {}),
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const schema = compileSchema(
|
|
45
|
+
defineSchema({
|
|
46
|
+
blockObjects: [{name: 'image', fields: [{name: 'src', type: 'string'}]}],
|
|
47
|
+
inlineObjects: [{name: 'image', fields: [{name: 'src', type: 'string'}]}],
|
|
48
|
+
}),
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
function transform(html: string, options?: HtmlDeserializerOptions) {
|
|
52
|
+
return htmlToBlocks(html, schema, {
|
|
53
|
+
parseHtml: (html) => new JSDOM(html).window.document,
|
|
54
|
+
keyGenerator: createTestKeyGenerator('k'),
|
|
55
|
+
...options,
|
|
56
|
+
})
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
const flattenTableRule = createFlattenTableRule({
|
|
60
|
+
schema,
|
|
61
|
+
separator: () => ({_type: 'span', text: ' '}),
|
|
62
|
+
})
|
|
63
|
+
|
|
64
|
+
test('ordinary table', () => {
|
|
65
|
+
/**
|
|
66
|
+
* | Year | Sales | Expenses | Profit |
|
|
67
|
+
* | ---- | -------- | -------- | ------- |
|
|
68
|
+
* | 2022 | \$8,000 | \$5,000 | \$3,000 |
|
|
69
|
+
* | 2023 | \$10,000 | \$6,500 | \$3,500 |
|
|
70
|
+
* | 2024 | \$15,000 | \$9,000 | \$6,000 |
|
|
71
|
+
*/
|
|
72
|
+
const html = [
|
|
73
|
+
'<table>',
|
|
74
|
+
'<thead>',
|
|
75
|
+
'<tr>',
|
|
76
|
+
'<th>Year</th>',
|
|
77
|
+
'<th>Sales</th>',
|
|
78
|
+
'<th>Expenses</th>',
|
|
79
|
+
'<th>Profit</th>',
|
|
80
|
+
'</tr>',
|
|
81
|
+
'</thead>',
|
|
82
|
+
'<tbody>',
|
|
83
|
+
'<tr>',
|
|
84
|
+
'<td>2022</td>',
|
|
85
|
+
'<td>$8,000</td>',
|
|
86
|
+
'<td>$5,000</td>',
|
|
87
|
+
'<td>$3,000</td>',
|
|
88
|
+
'</tr>',
|
|
89
|
+
'<tr>',
|
|
90
|
+
'<td>2023</td>',
|
|
91
|
+
'<td>$10,000</td>',
|
|
92
|
+
'<td>$6,500</td>',
|
|
93
|
+
'<td>$3,500</td>',
|
|
94
|
+
'</tr>',
|
|
95
|
+
'<tr>',
|
|
96
|
+
'<td>2024</td>',
|
|
97
|
+
'<td>$15,000</td>',
|
|
98
|
+
'<td>$9,000</td>',
|
|
99
|
+
'<td>$6,000</td>',
|
|
100
|
+
'</tr>',
|
|
101
|
+
'</tbody>',
|
|
102
|
+
'</table>',
|
|
103
|
+
].join('')
|
|
104
|
+
|
|
105
|
+
expect(
|
|
106
|
+
getTersePt({
|
|
107
|
+
schema,
|
|
108
|
+
value: transform(html, {
|
|
109
|
+
rules: [flattenTableRule],
|
|
110
|
+
}),
|
|
111
|
+
}),
|
|
112
|
+
).toEqual([
|
|
113
|
+
'Year, ,2022',
|
|
114
|
+
'Sales, ,$8,000',
|
|
115
|
+
'Expenses, ,$5,000',
|
|
116
|
+
'Profit, ,$3,000',
|
|
117
|
+
'Year, ,2023',
|
|
118
|
+
'Sales, ,$10,000',
|
|
119
|
+
'Expenses, ,$6,500',
|
|
120
|
+
'Profit, ,$3,500',
|
|
121
|
+
'Year, ,2024',
|
|
122
|
+
'Sales, ,$15,000',
|
|
123
|
+
'Expenses, ,$9,000',
|
|
124
|
+
'Profit, ,$6,000',
|
|
125
|
+
])
|
|
126
|
+
})
|
|
127
|
+
|
|
128
|
+
describe('table with images', () => {
|
|
129
|
+
/**
|
|
130
|
+
* | Name | Photo |
|
|
131
|
+
* | --- | --- |
|
|
132
|
+
* | John Doe | <img src="https://via.placeholder.com/150" alt="John Doe" /> |
|
|
133
|
+
* | Jane Smith | <img src="https://via.placeholder.com/150" alt="Jane Smith" /> |
|
|
134
|
+
*/
|
|
135
|
+
const html = [
|
|
136
|
+
'<table>',
|
|
137
|
+
'<thead>',
|
|
138
|
+
'<tr>',
|
|
139
|
+
'<th>Name</th>',
|
|
140
|
+
'<th>Photo</th>',
|
|
141
|
+
'</tr>',
|
|
142
|
+
'</thead>',
|
|
143
|
+
'<tbody>',
|
|
144
|
+
'<tr>',
|
|
145
|
+
'<td>John Doe</td>',
|
|
146
|
+
'<td><img src="https://via.placeholder.com/150" alt="John Doe" /></td>',
|
|
147
|
+
'</tr>',
|
|
148
|
+
'<tr>',
|
|
149
|
+
'<td>Jane Smith</td>',
|
|
150
|
+
'<td><img src="https://via.placeholder.com/150" alt="Jane Smith" /></td>',
|
|
151
|
+
'</tr>',
|
|
152
|
+
'</tbody>',
|
|
153
|
+
'</table>',
|
|
154
|
+
].join('')
|
|
155
|
+
|
|
156
|
+
test('no image matcher', () => {
|
|
157
|
+
expect(
|
|
158
|
+
getTersePt({
|
|
159
|
+
schema,
|
|
160
|
+
value: transform(html, {
|
|
161
|
+
matchers: undefined,
|
|
162
|
+
rules: [flattenTableRule],
|
|
163
|
+
}),
|
|
164
|
+
}),
|
|
165
|
+
).toEqual(['Name, ,John Doe', 'Photo', 'Name, ,Jane Smith', 'Photo'])
|
|
166
|
+
})
|
|
167
|
+
|
|
168
|
+
test('block image matcher', () => {
|
|
169
|
+
expect(
|
|
170
|
+
getTersePt({
|
|
171
|
+
schema,
|
|
172
|
+
value: transform(html, {
|
|
173
|
+
matchers: {
|
|
174
|
+
image: imageMatcher,
|
|
175
|
+
},
|
|
176
|
+
rules: [flattenTableRule],
|
|
177
|
+
}),
|
|
178
|
+
}),
|
|
179
|
+
).toEqual([
|
|
180
|
+
'Name, ,John Doe',
|
|
181
|
+
'Photo',
|
|
182
|
+
'{image}',
|
|
183
|
+
'Name, ,Jane Smith',
|
|
184
|
+
'Photo',
|
|
185
|
+
'{image}',
|
|
186
|
+
])
|
|
187
|
+
})
|
|
188
|
+
|
|
189
|
+
test('block and inline image matcher', () => {
|
|
190
|
+
expect(
|
|
191
|
+
getTersePt({
|
|
192
|
+
schema,
|
|
193
|
+
value: transform(html, {
|
|
194
|
+
matchers: {
|
|
195
|
+
image: imageMatcher,
|
|
196
|
+
inlineImage: inlineImageMatcher,
|
|
197
|
+
},
|
|
198
|
+
rules: [flattenTableRule],
|
|
199
|
+
}),
|
|
200
|
+
}),
|
|
201
|
+
).toEqual([
|
|
202
|
+
'Name, ,John Doe',
|
|
203
|
+
'Photo, ,{image}',
|
|
204
|
+
'Name, ,Jane Smith',
|
|
205
|
+
'Photo, ,{image}',
|
|
206
|
+
])
|
|
207
|
+
})
|
|
208
|
+
|
|
209
|
+
describe('Google Docs', () => {
|
|
210
|
+
test('simple table with thead', () => {
|
|
211
|
+
const html = `<meta charset='utf-8'><meta charset="utf-8"><b style="font-weight:normal;" id="docs-internal-guid-e0aa048e-7fff-f3cb-0d6d-5e68751be0e3"><div dir="ltr" style="margin-left:0pt;" align="left"><table style="border:none;border-collapse:collapse;table-layout:fixed;width:468pt"><colgroup><col /><col /></colgroup><thead><tr style="height:0pt"><th style="border-left:solid #000000 1pt;border-right:solid #000000 1pt;border-bottom:solid #000000 1pt;border-top:solid #000000 1pt;vertical-align:top;padding:5pt 5pt 5pt 5pt;overflow:hidden;overflow-wrap:break-word;" scope="col"><p dir="ltr" style="line-height:1.2;margin-top:0pt;margin-bottom:0pt;"><span style="font-size:12pt;font-family:Arial,sans-serif;color:#000000;background-color:transparent;font-weight:400;font-style:normal;font-variant:normal;text-decoration:none;vertical-align:baseline;white-space:pre;white-space:pre-wrap;">Header 1</span></p></th><th style="border-left:solid #000000 1pt;border-right:solid #000000 1pt;border-bottom:solid #000000 1pt;border-top:solid #000000 1pt;vertical-align:top;padding:5pt 5pt 5pt 5pt;overflow:hidden;overflow-wrap:break-word;" scope="col"><p dir="ltr" style="line-height:1.2;margin-top:0pt;margin-bottom:0pt;"><span style="font-size:12pt;font-family:Arial,sans-serif;color:#000000;background-color:transparent;font-weight:400;font-style:normal;font-variant:normal;text-decoration:none;vertical-align:baseline;white-space:pre;white-space:pre-wrap;">Header 2</span></p></th></tr></thead><tbody><tr style="height:0pt"><td style="border-left:solid #000000 1pt;border-right:solid #000000 1pt;border-bottom:solid #000000 1pt;border-top:solid #000000 1pt;vertical-align:top;padding:5pt 5pt 5pt 5pt;overflow:hidden;overflow-wrap:break-word;"><p dir="ltr" style="line-height:1.2;margin-top:0pt;margin-bottom:0pt;"><span style="font-size:12pt;font-family:Arial,sans-serif;color:#000000;background-color:transparent;font-weight:400;font-style:normal;font-variant:normal;text-decoration:none;vertical-align:baseline;white-space:pre;white-space:pre-wrap;">Cell 1</span></p></td><td style="border-left:solid #000000 1pt;border-right:solid #000000 1pt;border-bottom:solid #000000 1pt;border-top:solid #000000 1pt;vertical-align:top;padding:5pt 5pt 5pt 5pt;overflow:hidden;overflow-wrap:break-word;"><p dir="ltr" style="line-height:1.2;margin-top:0pt;margin-bottom:0pt;"><span style="font-size:12pt;font-family:Arial,sans-serif;color:#000000;background-color:transparent;font-weight:400;font-style:normal;font-variant:normal;text-decoration:none;vertical-align:baseline;white-space:pre;white-space:pre-wrap;">Cell 2</span></p></td></tr></tbody></table></div></b>`
|
|
212
|
+
|
|
213
|
+
expect(
|
|
214
|
+
getTersePt({
|
|
215
|
+
schema,
|
|
216
|
+
value: transform(html, {
|
|
217
|
+
rules: [flattenTableRule],
|
|
218
|
+
}),
|
|
219
|
+
}),
|
|
220
|
+
).toEqual(['Header 1, ,Cell 1', 'Header 2, ,Cell 2'])
|
|
221
|
+
})
|
|
222
|
+
})
|
|
223
|
+
})
|
|
224
|
+
})
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
import {
|
|
2
|
+
isTextBlock,
|
|
3
|
+
type PortableTextObject,
|
|
4
|
+
type PortableTextSpan,
|
|
5
|
+
type Schema,
|
|
6
|
+
} from '@portabletext/schema'
|
|
7
|
+
import {flattenNestedBlocks} from '../HtmlDeserializer/flatten-nested-blocks'
|
|
8
|
+
import {isElement, tagName} from '../HtmlDeserializer/helpers'
|
|
9
|
+
import type {
|
|
10
|
+
ArbitraryTypedObject,
|
|
11
|
+
DeserializerRule,
|
|
12
|
+
TypedObject,
|
|
13
|
+
} from '../types'
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* An opinionated `DeserializerRule` that flattens tables in a way that repeats
|
|
17
|
+
* the header row for each cell in the row.
|
|
18
|
+
*
|
|
19
|
+
* @example
|
|
20
|
+
* ```html
|
|
21
|
+
* <table>
|
|
22
|
+
* <thead>
|
|
23
|
+
* <tr>
|
|
24
|
+
* <th>Header 1</th>
|
|
25
|
+
* <th>Header 2</th>
|
|
26
|
+
* </tr>
|
|
27
|
+
* </thead>
|
|
28
|
+
* <tbody>
|
|
29
|
+
* <tr>
|
|
30
|
+
* <td>Cell 1</td>
|
|
31
|
+
* <td>Cell 2</td>
|
|
32
|
+
* </tr>
|
|
33
|
+
* </tbody>
|
|
34
|
+
* </table>
|
|
35
|
+
* ```
|
|
36
|
+
* Turns into
|
|
37
|
+
* ```json
|
|
38
|
+
* [
|
|
39
|
+
* {
|
|
40
|
+
* _type: 'block',
|
|
41
|
+
* children: [
|
|
42
|
+
* {
|
|
43
|
+
* _type: 'text',
|
|
44
|
+
* text: 'Header 1'
|
|
45
|
+
* },
|
|
46
|
+
* {
|
|
47
|
+
* _type: 'text',
|
|
48
|
+
* text: 'Cell 1'
|
|
49
|
+
* }
|
|
50
|
+
* ]
|
|
51
|
+
* },
|
|
52
|
+
* {
|
|
53
|
+
* _type: 'block',
|
|
54
|
+
* children: [
|
|
55
|
+
* {
|
|
56
|
+
* _type: 'text',
|
|
57
|
+
* text: 'Header 2'
|
|
58
|
+
* },
|
|
59
|
+
* {
|
|
60
|
+
* _type: 'text',
|
|
61
|
+
* text: 'Cell 2'
|
|
62
|
+
* }
|
|
63
|
+
* ]
|
|
64
|
+
* }
|
|
65
|
+
* ]
|
|
66
|
+
* ```
|
|
67
|
+
*
|
|
68
|
+
* Use the `separator` option to control if a child element should separate
|
|
69
|
+
* headers and cells.
|
|
70
|
+
*
|
|
71
|
+
* @beta
|
|
72
|
+
*/
|
|
73
|
+
export function createFlattenTableRule({
|
|
74
|
+
schema,
|
|
75
|
+
separator,
|
|
76
|
+
}: {
|
|
77
|
+
schema: Schema
|
|
78
|
+
separator?: () =>
|
|
79
|
+
| (Omit<PortableTextSpan, '_key'> & {_key?: string})
|
|
80
|
+
| (Omit<PortableTextObject, '_key'> & {_key?: string})
|
|
81
|
+
| undefined
|
|
82
|
+
}): DeserializerRule {
|
|
83
|
+
return {
|
|
84
|
+
deserialize: (node, next) => {
|
|
85
|
+
if (!isElement(node) || tagName(node) !== 'table') {
|
|
86
|
+
return undefined
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
const thead = node.querySelector('thead')
|
|
90
|
+
const tbody = node.querySelector('tbody')
|
|
91
|
+
|
|
92
|
+
// Only process tables with thead and tbody
|
|
93
|
+
if (!thead || !tbody) {
|
|
94
|
+
return undefined
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// Extract header labels from thead
|
|
98
|
+
const headerRow = thead.querySelector('tr')
|
|
99
|
+
|
|
100
|
+
if (!headerRow) {
|
|
101
|
+
return undefined
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const headerCells = headerRow.querySelectorAll('th')
|
|
105
|
+
const headerResults = [...headerCells].map((headerCell) =>
|
|
106
|
+
next(headerCell),
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
// Process tbody rows and combine with headers
|
|
110
|
+
const rows: TypedObject[] = []
|
|
111
|
+
const rowElements = tbody.querySelectorAll('tr')
|
|
112
|
+
|
|
113
|
+
for (const row of rowElements) {
|
|
114
|
+
const cells = row.querySelectorAll('td')
|
|
115
|
+
|
|
116
|
+
let cellIndex = 0
|
|
117
|
+
for (const cell of cells) {
|
|
118
|
+
const result = next(cell)
|
|
119
|
+
|
|
120
|
+
if (!result) {
|
|
121
|
+
cellIndex++
|
|
122
|
+
continue
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
const headerResult = headerResults[cellIndex]
|
|
126
|
+
|
|
127
|
+
if (!headerResult) {
|
|
128
|
+
// If we can't find a corresponding header, then we just push
|
|
129
|
+
// the deserialized cell as is.
|
|
130
|
+
if (Array.isArray(result)) {
|
|
131
|
+
rows.push(...result)
|
|
132
|
+
} else {
|
|
133
|
+
rows.push(result)
|
|
134
|
+
}
|
|
135
|
+
cellIndex++
|
|
136
|
+
continue
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
const flattenedHeaderResult = flattenNestedBlocks(
|
|
140
|
+
{schema},
|
|
141
|
+
(Array.isArray(headerResult)
|
|
142
|
+
? headerResult
|
|
143
|
+
: [headerResult]) as Array<ArbitraryTypedObject>,
|
|
144
|
+
)
|
|
145
|
+
const firstFlattenedHeaderResult = flattenedHeaderResult[0]
|
|
146
|
+
const flattenedResult = flattenNestedBlocks(
|
|
147
|
+
{schema},
|
|
148
|
+
(Array.isArray(result)
|
|
149
|
+
? result
|
|
150
|
+
: [result]) as Array<ArbitraryTypedObject>,
|
|
151
|
+
)
|
|
152
|
+
const firstFlattenedResult = flattenedResult[0]
|
|
153
|
+
|
|
154
|
+
if (
|
|
155
|
+
flattenedHeaderResult.length === 1 &&
|
|
156
|
+
isTextBlock({schema}, firstFlattenedHeaderResult) &&
|
|
157
|
+
flattenedResult.length === 1 &&
|
|
158
|
+
isTextBlock({schema}, firstFlattenedResult)
|
|
159
|
+
) {
|
|
160
|
+
const separatorChild = separator?.()
|
|
161
|
+
// If the header result and the cell result are text blocks then
|
|
162
|
+
// we merge them together.
|
|
163
|
+
const mergedTextBlock = {
|
|
164
|
+
...firstFlattenedHeaderResult,
|
|
165
|
+
children: [
|
|
166
|
+
...firstFlattenedHeaderResult.children,
|
|
167
|
+
...(separatorChild ? [separatorChild] : []),
|
|
168
|
+
...firstFlattenedResult.children,
|
|
169
|
+
],
|
|
170
|
+
markDefs: [
|
|
171
|
+
...(firstFlattenedHeaderResult.markDefs ?? []),
|
|
172
|
+
...(firstFlattenedResult.markDefs ?? []),
|
|
173
|
+
],
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
rows.push(mergedTextBlock)
|
|
177
|
+
cellIndex++
|
|
178
|
+
continue
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// Otherwise, we push the header result and the cell result as is.
|
|
182
|
+
if (Array.isArray(headerResult)) {
|
|
183
|
+
rows.push(...headerResult)
|
|
184
|
+
} else {
|
|
185
|
+
rows.push(headerResult)
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
if (Array.isArray(result)) {
|
|
189
|
+
rows.push(...result)
|
|
190
|
+
} else {
|
|
191
|
+
rows.push(result)
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
cellIndex++
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// Return the processed rows as individual text blocks
|
|
199
|
+
return rows
|
|
200
|
+
},
|
|
201
|
+
}
|
|
202
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from './flatten-tables'
|