@portabletext/block-tools 3.4.1 → 3.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,73 @@
1
+ import { isTextBlock } from "@portabletext/schema";
2
+ import { isElement, tagName, flattenNestedBlocks } from "../_chunks-es/helpers.js";
3
+ function createFlattenTableRule({
4
+ schema,
5
+ separator
6
+ }) {
7
+ return {
8
+ deserialize: (node, next) => {
9
+ if (!isElement(node) || tagName(node) !== "table")
10
+ return;
11
+ let headerRow = node.querySelector("thead")?.querySelector("tr");
12
+ const tbody = node.querySelector("tbody");
13
+ let bodyRows = tbody ? [...tbody.querySelectorAll("tr")] : [];
14
+ if (!headerRow || !bodyRows) {
15
+ const columnCounts = [...node.querySelectorAll("tr")].map((row) => row.querySelectorAll("td").length), firstColumnCount = columnCounts[0];
16
+ if (!firstColumnCount || !columnCounts.every((count) => count === firstColumnCount) || firstColumnCount < 3)
17
+ return;
18
+ const rows2 = [...node.querySelectorAll("tr")];
19
+ headerRow = rows2.slice(0, 1)[0], bodyRows = rows2.slice(1);
20
+ }
21
+ if (!headerRow)
22
+ return;
23
+ const headerResults = [...headerRow.querySelectorAll("th, td")].map(
24
+ (headerCell) => next(headerCell)
25
+ ), rows = [];
26
+ for (const row of bodyRows) {
27
+ const cells = row.querySelectorAll("td");
28
+ let cellIndex = 0;
29
+ for (const cell of cells) {
30
+ const result = next(cell);
31
+ if (!result) {
32
+ cellIndex++;
33
+ continue;
34
+ }
35
+ const headerResult = headerResults[cellIndex];
36
+ if (!headerResult) {
37
+ Array.isArray(result) ? rows.push(...result) : rows.push(result), cellIndex++;
38
+ continue;
39
+ }
40
+ const flattenedHeaderResult = flattenNestedBlocks(
41
+ { schema },
42
+ Array.isArray(headerResult) ? headerResult : [headerResult]
43
+ ), firstFlattenedHeaderResult = flattenedHeaderResult[0], flattenedResult = flattenNestedBlocks(
44
+ { schema },
45
+ Array.isArray(result) ? result : [result]
46
+ ), firstFlattenedResult = flattenedResult[0];
47
+ if (flattenedHeaderResult.length === 1 && isTextBlock({ schema }, firstFlattenedHeaderResult) && flattenedResult.length === 1 && isTextBlock({ schema }, firstFlattenedResult)) {
48
+ const separatorChild = separator?.(), mergedTextBlock = {
49
+ ...firstFlattenedHeaderResult,
50
+ children: [
51
+ ...firstFlattenedHeaderResult.children,
52
+ ...separatorChild ? [separatorChild] : [],
53
+ ...firstFlattenedResult.children
54
+ ],
55
+ markDefs: [
56
+ ...firstFlattenedHeaderResult.markDefs ?? [],
57
+ ...firstFlattenedResult.markDefs ?? []
58
+ ]
59
+ };
60
+ rows.push(mergedTextBlock), cellIndex++;
61
+ continue;
62
+ }
63
+ Array.isArray(headerResult) ? rows.push(...headerResult) : rows.push(headerResult), Array.isArray(result) ? rows.push(...result) : rows.push(result), cellIndex++;
64
+ }
65
+ }
66
+ return rows;
67
+ }
68
+ };
69
+ }
70
+ export {
71
+ createFlattenTableRule
72
+ };
73
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sources":["../../src/rules/flatten-tables.ts"],"sourcesContent":["import {\n isTextBlock,\n type PortableTextObject,\n type PortableTextSpan,\n type Schema,\n} from '@portabletext/schema'\nimport {flattenNestedBlocks} from '../HtmlDeserializer/flatten-nested-blocks'\nimport {isElement, tagName} from '../HtmlDeserializer/helpers'\nimport type {\n ArbitraryTypedObject,\n DeserializerRule,\n TypedObject,\n} from '../types'\n\n/**\n * An opinionated `DeserializerRule` that flattens tables in a way that repeats\n * the header row for each cell in the row.\n *\n * @example\n * ```html\n * <table>\n * <thead>\n * <tr>\n * <th>Header 1</th>\n * <th>Header 2</th>\n * </tr>\n * </thead>\n * <tbody>\n * <tr>\n * <td>Cell 1</td>\n * <td>Cell 2</td>\n * </tr>\n * </tbody>\n * </table>\n * ```\n * Turns into\n * ```json\n * [\n * {\n * _type: 'block',\n * children: [\n * {\n * _type: 'text',\n * text: 'Header 1'\n * },\n * {\n * _type: 'text',\n * text: 'Cell 1'\n * }\n * ]\n * },\n * {\n * _type: 'block',\n * children: [\n * {\n * _type: 'text',\n * text: 'Header 2'\n * },\n * {\n * _type: 'text',\n * text: 'Cell 2'\n * }\n * ]\n * }\n * ]\n * ```\n *\n * Use the `separator` option to control if a child element should separate\n * headers and cells.\n *\n * @beta\n */\nexport function createFlattenTableRule({\n schema,\n separator,\n}: {\n schema: Schema\n separator?: () =>\n | (Omit<PortableTextSpan, '_key'> & {_key?: string})\n | (Omit<PortableTextObject, '_key'> & {_key?: string})\n | undefined\n}): DeserializerRule {\n return {\n deserialize: (node, next) => {\n if (!isElement(node) || tagName(node) !== 'table') {\n return undefined\n }\n\n const thead = node.querySelector('thead')\n let headerRow = thead?.querySelector('tr')\n const tbody = node.querySelector('tbody')\n let bodyRows = tbody ? [...tbody.querySelectorAll('tr')] : []\n\n if (!headerRow || !bodyRows) {\n // If there is not thead or tbody, we look at the column count. If the\n // column count is greater than 2 then we infer that the first row is\n // the header row and the rest are the body rows.\n\n const columnCounts = [...node.querySelectorAll('tr')].map((row) => {\n const cells = row.querySelectorAll('td')\n return cells.length\n })\n\n const firstColumnCount = columnCounts[0]\n\n if (\n !firstColumnCount ||\n !columnCounts.every((count) => count === firstColumnCount)\n ) {\n return undefined\n }\n\n if (firstColumnCount < 3) {\n return undefined\n }\n\n // Now we know that all rows have the same column count and that\n // count is >2\n const rows = [...node.querySelectorAll('tr')]\n headerRow = rows.slice(0, 1)[0]\n bodyRows = rows.slice(1)\n }\n\n if (!headerRow) {\n return undefined\n }\n\n const headerCells = headerRow.querySelectorAll('th, td')\n const headerResults = [...headerCells].map((headerCell) =>\n next(headerCell),\n )\n\n // Process tbody rows and combine with headers\n const rows: TypedObject[] = []\n\n for (const row of bodyRows) {\n const cells = row.querySelectorAll('td')\n\n let cellIndex = 0\n for (const cell of cells) {\n const result = next(cell)\n\n if (!result) {\n cellIndex++\n continue\n }\n\n const headerResult = headerResults[cellIndex]\n\n if (!headerResult) {\n // If we can't find a corresponding header, then we just push\n // the deserialized cell as is.\n if (Array.isArray(result)) {\n rows.push(...result)\n } else {\n rows.push(result)\n }\n cellIndex++\n continue\n }\n\n const flattenedHeaderResult = flattenNestedBlocks(\n {schema},\n (Array.isArray(headerResult)\n ? headerResult\n : [headerResult]) as Array<ArbitraryTypedObject>,\n )\n const firstFlattenedHeaderResult = flattenedHeaderResult[0]\n const flattenedResult = flattenNestedBlocks(\n {schema},\n (Array.isArray(result)\n ? result\n : [result]) as Array<ArbitraryTypedObject>,\n )\n const firstFlattenedResult = flattenedResult[0]\n\n if (\n flattenedHeaderResult.length === 1 &&\n isTextBlock({schema}, firstFlattenedHeaderResult) &&\n flattenedResult.length === 1 &&\n isTextBlock({schema}, firstFlattenedResult)\n ) {\n const separatorChild = separator?.()\n // If the header result and the cell result are text blocks then\n // we merge them together.\n const mergedTextBlock = {\n ...firstFlattenedHeaderResult,\n children: [\n ...firstFlattenedHeaderResult.children,\n ...(separatorChild ? [separatorChild] : []),\n ...firstFlattenedResult.children,\n ],\n markDefs: [\n ...(firstFlattenedHeaderResult.markDefs ?? []),\n ...(firstFlattenedResult.markDefs ?? []),\n ],\n }\n\n rows.push(mergedTextBlock)\n cellIndex++\n continue\n }\n\n // Otherwise, we push the header result and the cell result as is.\n if (Array.isArray(headerResult)) {\n rows.push(...headerResult)\n } else {\n rows.push(headerResult)\n }\n\n if (Array.isArray(result)) {\n rows.push(...result)\n } else {\n rows.push(result)\n }\n\n cellIndex++\n }\n }\n\n // Return the processed rows as individual text blocks\n return rows\n },\n }\n}\n"],"names":["rows"],"mappings":";;AAwEO,SAAS,uBAAuB;AAAA,EACrC;AAAA,EACA;AACF,GAMqB;AACnB,SAAO;AAAA,IACL,aAAa,CAAC,MAAM,SAAS;AAC3B,UAAI,CAAC,UAAU,IAAI,KAAK,QAAQ,IAAI,MAAM;AACxC;AAIF,UAAI,YADU,KAAK,cAAc,OAAO,GACjB,cAAc,IAAI;AACzC,YAAM,QAAQ,KAAK,cAAc,OAAO;AACxC,UAAI,WAAW,QAAQ,CAAC,GAAG,MAAM,iBAAiB,IAAI,CAAC,IAAI,CAAA;AAE3D,UAAI,CAAC,aAAa,CAAC,UAAU;AAK3B,cAAM,eAAe,CAAC,GAAG,KAAK,iBAAiB,IAAI,CAAC,EAAE,IAAI,CAAC,QAC3C,IAAI,iBAAiB,IAAI,EAC1B,MACd,GAEK,mBAAmB,aAAa,CAAC;AASvC,YANE,CAAC,oBACD,CAAC,aAAa,MAAM,CAAC,UAAU,UAAU,gBAAgB,KAKvD,mBAAmB;AACrB;AAKF,cAAMA,QAAO,CAAC,GAAG,KAAK,iBAAiB,IAAI,CAAC;AAC5C,oBAAYA,MAAK,MAAM,GAAG,CAAC,EAAE,CAAC,GAC9B,WAAWA,MAAK,MAAM,CAAC;AAAA,MACzB;AAEA,UAAI,CAAC;AACH;AAIF,YAAM,gBAAgB,CAAC,GADH,UAAU,iBAAiB,QAAQ,CAClB,EAAE;AAAA,QAAI,CAAC,eAC1C,KAAK,UAAU;AAAA,MAAA,GAIX,OAAsB,CAAA;AAE5B,iBAAW,OAAO,UAAU;AAC1B,cAAM,QAAQ,IAAI,iBAAiB,IAAI;AAEvC,YAAI,YAAY;AAChB,mBAAW,QAAQ,OAAO;AACxB,gBAAM,SAAS,KAAK,IAAI;AAExB,cAAI,CAAC,QAAQ;AACX;AACA;AAAA,UACF;AAEA,gBAAM,eAAe,cAAc,SAAS;AAE5C,cAAI,CAAC,cAAc;AAGb,kBAAM,QAAQ,MAAM,IACtB,KAAK,KAAK,GAAG,MAAM,IAEnB,KAAK,KAAK,MAAM,GAElB;AACA;AAAA,UACF;AAEA,gBAAM,wBAAwB;AAAA,YAC5B,EAAC,OAAA;AAAA,YACA,MAAM,QAAQ,YAAY,IACvB,eACA,CAAC,YAAY;AAAA,UAAA,GAEb,6BAA6B,sBAAsB,CAAC,GACpD,kBAAkB;AAAA,YACtB,EAAC,OAAA;AAAA,YACA,MAAM,QAAQ,MAAM,IACjB,SACA,CAAC,MAAM;AAAA,UAAA,GAEP,uBAAuB,gBAAgB,CAAC;AAE9C,cACE,sBAAsB,WAAW,KACjC,YAAY,EAAC,UAAS,0BAA0B,KAChD,gBAAgB,WAAW,KAC3B,YAAY,EAAC,OAAA,GAAS,oBAAoB,GAC1C;AACA,kBAAM,iBAAiB,YAAA,GAGjB,kBAAkB;AAAA,cACtB,GAAG;AAAA,cACH,UAAU;AAAA,gBACR,GAAG,2BAA2B;AAAA,gBAC9B,GAAI,iBAAiB,CAAC,cAAc,IAAI,CAAA;AAAA,gBACxC,GAAG,qBAAqB;AAAA,cAAA;AAAA,cAE1B,UAAU;AAAA,gBACR,GAAI,2BAA2B,YAAY,CAAA;AAAA,gBAC3C,GAAI,qBAAqB,YAAY,CAAA;AAAA,cAAC;AAAA,YACxC;AAGF,iBAAK,KAAK,eAAe,GACzB;AACA;AAAA,UACF;AAGI,gBAAM,QAAQ,YAAY,IAC5B,KAAK,KAAK,GAAG,YAAY,IAEzB,KAAK,KAAK,YAAY,GAGpB,MAAM,QAAQ,MAAM,IACtB,KAAK,KAAK,GAAG,MAAM,IAEnB,KAAK,KAAK,MAAM,GAGlB;AAAA,QACF;AAAA,MACF;AAGA,aAAO;AAAA,IACT;AAAA,EAAA;AAEJ;"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@portabletext/block-tools",
3
- "version": "3.4.1",
3
+ "version": "3.5.1",
4
4
  "description": "Can format HTML, Slate JSON or Sanity block array into any other format.",
5
5
  "keywords": [
6
6
  "portable-text",
@@ -26,6 +26,12 @@
26
26
  "require": "./lib/index.cjs",
27
27
  "default": "./lib/index.js"
28
28
  },
29
+ "./rules": {
30
+ "source": "./src/rules/_exports/index.ts",
31
+ "import": "./lib/rules/index.js",
32
+ "require": "./lib/rules/index.cjs",
33
+ "default": "./lib/rules/index.js"
34
+ },
29
35
  "./package.json": "./package.json"
30
36
  },
31
37
  "main": "./lib/index.cjs",
@@ -42,7 +48,7 @@
42
48
  "@portabletext/schema": "^1.2.0"
43
49
  },
44
50
  "devDependencies": {
45
- "@sanity/pkg-utils": "^8.0.5",
51
+ "@sanity/pkg-utils": "^8.1.1",
46
52
  "@sanity/schema": "^4.6.0",
47
53
  "@sanity/types": "^4.6.0",
48
54
  "@types/jsdom": "^20.0.0",
@@ -5,10 +5,9 @@ import {flattenNestedBlocks} from './flatten-nested-blocks'
5
5
 
6
6
  describe(flattenNestedBlocks.name, () => {
7
7
  test('flattening text blocks', () => {
8
- const keyGenerator = createTestKeyGenerator('k')
9
8
  const schema = compileSchema(defineSchema({}))
10
9
  expect(
11
- flattenNestedBlocks({schema, keyGenerator}, [
10
+ flattenNestedBlocks({schema}, [
12
11
  {
13
12
  _type: 'block',
14
13
  children: [
@@ -40,12 +39,11 @@ describe(flattenNestedBlocks.name, () => {
40
39
  })
41
40
 
42
41
  test('flattening text blocks with block objects in schema', () => {
43
- const keyGenerator = createTestKeyGenerator('k')
44
42
  const schema = compileSchema(
45
43
  defineSchema({blockObjects: [{name: 'image'}]}),
46
44
  )
47
45
  expect(
48
- flattenNestedBlocks({schema, keyGenerator}, [
46
+ flattenNestedBlocks({schema}, [
49
47
  {
50
48
  _type: 'block',
51
49
  children: [
@@ -77,14 +75,13 @@ describe(flattenNestedBlocks.name, () => {
77
75
  })
78
76
 
79
77
  test('flattening text blocks with styles in schema', () => {
80
- const keyGenerator = createTestKeyGenerator('k')
81
78
  const schema = compileSchema(
82
79
  defineSchema({
83
80
  styles: [{name: 'h1'}],
84
81
  }),
85
82
  )
86
83
  expect(
87
- flattenNestedBlocks({schema, keyGenerator}, [
84
+ flattenNestedBlocks({schema}, [
88
85
  {
89
86
  _type: 'block',
90
87
  children: [
@@ -131,7 +128,7 @@ describe(flattenNestedBlocks.name, () => {
131
128
  const linkKey = keyGenerator()
132
129
 
133
130
  expect(
134
- flattenNestedBlocks({schema, keyGenerator}, [
131
+ flattenNestedBlocks({schema}, [
135
132
  {
136
133
  _key: blockKey,
137
134
  _type: 'block',
@@ -198,7 +195,7 @@ describe(flattenNestedBlocks.name, () => {
198
195
  const barKey = keyGenerator()
199
196
 
200
197
  expect(
201
- flattenNestedBlocks({schema, keyGenerator}, [
198
+ flattenNestedBlocks({schema}, [
202
199
  {
203
200
  _key: blockKey,
204
201
  _type: 'block',
@@ -17,7 +17,6 @@ import {
17
17
  export function flattenNestedBlocks(
18
18
  context: {
19
19
  schema: Schema
20
- keyGenerator: () => string
21
20
  },
22
21
  blocks: Array<ArbitraryTypedObject>,
23
22
  ): TypedObject[] {
@@ -78,7 +78,7 @@ export default class HtmlDeserializer {
78
78
  const blocks = trimWhitespace(
79
79
  this.schema,
80
80
  flattenNestedBlocks(
81
- {schema: this.schema, keyGenerator: this.keyGenerator},
81
+ {schema: this.schema},
82
82
  ensureRootIsBlocks(
83
83
  this.schema,
84
84
  this.deserializeElements(children) as Array<ArbitraryTypedObject>,
@@ -0,0 +1 @@
1
+ export * from '../index'
@@ -0,0 +1,405 @@
1
+ import {compileSchema, defineSchema} from '@portabletext/schema'
2
+ import {getTersePt} from '@portabletext/test'
3
+ import {JSDOM} from 'jsdom'
4
+ import {describe, expect, test} from 'vitest'
5
+ import {createTestKeyGenerator} from '../../test/test-key-generator'
6
+ import {htmlToBlocks} from '../index'
7
+ import type {ImageSchemaMatcher} from '../schema-matchers'
8
+ import type {HtmlDeserializerOptions} from '../types'
9
+ import {createFlattenTableRule} from './flatten-tables'
10
+
11
+ describe(createFlattenTableRule.name, () => {
12
+ const imageMatcher: ImageSchemaMatcher = ({context, props}) => {
13
+ if (
14
+ !context.schema.blockObjects.some(
15
+ (blockObject) => blockObject.name === 'image',
16
+ )
17
+ ) {
18
+ return undefined
19
+ }
20
+
21
+ return {
22
+ _type: 'image',
23
+ ...(props.src ? {src: props.src} : {}),
24
+ ...(props.alt ? {alt: props.alt} : {}),
25
+ }
26
+ }
27
+
28
+ const inlineImageMatcher: ImageSchemaMatcher = ({context, props}) => {
29
+ if (
30
+ !context.schema.inlineObjects.some(
31
+ (inlineObject) => inlineObject.name === 'image',
32
+ )
33
+ ) {
34
+ return undefined
35
+ }
36
+
37
+ return {
38
+ _type: 'image',
39
+ ...(props.src ? {src: props.src} : {}),
40
+ ...(props.alt ? {alt: props.alt} : {}),
41
+ }
42
+ }
43
+
44
+ const schema = compileSchema(
45
+ defineSchema({
46
+ blockObjects: [{name: 'image', fields: [{name: 'src', type: 'string'}]}],
47
+ inlineObjects: [{name: 'image', fields: [{name: 'src', type: 'string'}]}],
48
+ }),
49
+ )
50
+
51
+ function transform(html: string, options?: HtmlDeserializerOptions) {
52
+ return htmlToBlocks(html, schema, {
53
+ parseHtml: (html) => new JSDOM(html).window.document,
54
+ keyGenerator: createTestKeyGenerator('k'),
55
+ ...options,
56
+ })
57
+ }
58
+
59
+ const flattenTableRule = createFlattenTableRule({
60
+ schema,
61
+ separator: () => ({_type: 'span', text: ' '}),
62
+ })
63
+
64
+ test('ordinary table', () => {
65
+ /**
66
+ * | Year | Sales | Expenses | Profit |
67
+ * | ---- | -------- | -------- | ------- |
68
+ * | 2022 | \$8,000 | \$5,000 | \$3,000 |
69
+ * | 2023 | \$10,000 | \$6,500 | \$3,500 |
70
+ * | 2024 | \$15,000 | \$9,000 | \$6,000 |
71
+ */
72
+ const html = [
73
+ '<table>',
74
+ '<thead>',
75
+ '<tr>',
76
+ '<th>Year</th>',
77
+ '<th>Sales</th>',
78
+ '<th>Expenses</th>',
79
+ '<th>Profit</th>',
80
+ '</tr>',
81
+ '</thead>',
82
+ '<tbody>',
83
+ '<tr>',
84
+ '<td>2022</td>',
85
+ '<td>$8,000</td>',
86
+ '<td>$5,000</td>',
87
+ '<td>$3,000</td>',
88
+ '</tr>',
89
+ '<tr>',
90
+ '<td>2023</td>',
91
+ '<td>$10,000</td>',
92
+ '<td>$6,500</td>',
93
+ '<td>$3,500</td>',
94
+ '</tr>',
95
+ '<tr>',
96
+ '<td>2024</td>',
97
+ '<td>$15,000</td>',
98
+ '<td>$9,000</td>',
99
+ '<td>$6,000</td>',
100
+ '</tr>',
101
+ '</tbody>',
102
+ '</table>',
103
+ ].join('')
104
+
105
+ expect(
106
+ getTersePt({
107
+ schema,
108
+ value: transform(html, {
109
+ rules: [flattenTableRule],
110
+ }),
111
+ }),
112
+ ).toEqual([
113
+ 'Year, ,2022',
114
+ 'Sales, ,$8,000',
115
+ 'Expenses, ,$5,000',
116
+ 'Profit, ,$3,000',
117
+ 'Year, ,2023',
118
+ 'Sales, ,$10,000',
119
+ 'Expenses, ,$6,500',
120
+ 'Profit, ,$3,500',
121
+ 'Year, ,2024',
122
+ 'Sales, ,$15,000',
123
+ 'Expenses, ,$9,000',
124
+ 'Profit, ,$6,000',
125
+ ])
126
+ })
127
+
128
+ test('ordinary table without thead and tbody', () => {
129
+ /**
130
+ * | Year | Sales | Expenses | Profit |
131
+ * | 2022 | \$8,000 | \$5,000 | \$3,000 |
132
+ * | 2023 | \$10,000 | \$6,500 | \$3,500 |
133
+ * | 2024 | \$15,000 | \$9,000 | \$6,000 |
134
+ */
135
+ const html = [
136
+ '<table>',
137
+ '<tr>',
138
+ '<td>Year</td>',
139
+ '<td>Sales</td>',
140
+ '<td>Expenses</td>',
141
+ '<td>Profit</td>',
142
+ '</tr>',
143
+ '<tr>',
144
+ '<td>2022</td>',
145
+ '<td>$8,000</td>',
146
+ '<td>$5,000</td>',
147
+ '<td>$3,000</td>',
148
+ '</tr>',
149
+ '<tr>',
150
+ '<td>2023</td>',
151
+ '<td>$10,000</td>',
152
+ '<td>$6,500</td>',
153
+ '<td>$3,500</td>',
154
+ '</tr>',
155
+ '<tr>',
156
+ '<td>2024</td>',
157
+ '<td>$15,000</td>',
158
+ '<td>$9,000</td>',
159
+ '<td>$6,000</td>',
160
+ '</tr>',
161
+ '</table>',
162
+ ].join('')
163
+
164
+ expect(
165
+ getTersePt({
166
+ schema,
167
+ value: transform(html, {
168
+ rules: [flattenTableRule],
169
+ }),
170
+ }),
171
+ ).toEqual([
172
+ 'Year, ,2022',
173
+ 'Sales, ,$8,000',
174
+ 'Expenses, ,$5,000',
175
+ 'Profit, ,$3,000',
176
+ 'Year, ,2023',
177
+ 'Sales, ,$10,000',
178
+ 'Expenses, ,$6,500',
179
+ 'Profit, ,$3,500',
180
+ 'Year, ,2024',
181
+ 'Sales, ,$15,000',
182
+ 'Expenses, ,$9,000',
183
+ 'Profit, ,$6,000',
184
+ ])
185
+ })
186
+
187
+ test('ordinary table without thead', () => {
188
+ /**
189
+ * | Year | Sales | Expenses | Profit |
190
+ * | 2022 | \$8,000 | \$5,000 | \$3,000 |
191
+ * | 2023 | \$10,000 | \$6,500 | \$3,500 |
192
+ * | 2024 | \$15,000 | \$9,000 | \$6,000 |
193
+ */
194
+ const html = [
195
+ '<table>',
196
+ '<tbody>',
197
+ '<tr>',
198
+ '<td>Year</td>',
199
+ '<td>Sales</td>',
200
+ '<td>Expenses</td>',
201
+ '<td>Profit</td>',
202
+ '</tr>',
203
+ '<tr>',
204
+ '<td>2022</td>',
205
+ '<td>$8,000</td>',
206
+ '<td>$5,000</td>',
207
+ '<td>$3,000</td>',
208
+ '</tr>',
209
+ '<tr>',
210
+ '<td>2023</td>',
211
+ '<td>$10,000</td>',
212
+ '<td>$6,500</td>',
213
+ '<td>$3,500</td>',
214
+ '</tr>',
215
+ '<tr>',
216
+ '<td>2024</td>',
217
+ '<td>$15,000</td>',
218
+ '<td>$9,000</td>',
219
+ '<td>$6,000</td>',
220
+ '</tr>',
221
+ '</tbody>',
222
+ '</table>',
223
+ ].join('')
224
+
225
+ expect(
226
+ getTersePt({
227
+ schema,
228
+ value: transform(html, {
229
+ rules: [flattenTableRule],
230
+ }),
231
+ }),
232
+ ).toEqual([
233
+ 'Year, ,2022',
234
+ 'Sales, ,$8,000',
235
+ 'Expenses, ,$5,000',
236
+ 'Profit, ,$3,000',
237
+ 'Year, ,2023',
238
+ 'Sales, ,$10,000',
239
+ 'Expenses, ,$6,500',
240
+ 'Profit, ,$3,500',
241
+ 'Year, ,2024',
242
+ 'Sales, ,$15,000',
243
+ 'Expenses, ,$9,000',
244
+ 'Profit, ,$6,000',
245
+ ])
246
+ })
247
+
248
+ test('ordinary table without tbody', () => {
249
+ /**
250
+ * | Year | Sales | Expenses | Profit |
251
+ * | 2022 | \$8,000 | \$5,000 | \$3,000 |
252
+ * | 2023 | \$10,000 | \$6,500 | \$3,500 |
253
+ * | 2024 | \$15,000 | \$9,000 | \$6,000 |
254
+ */
255
+ const html = [
256
+ '<table>',
257
+ '<thead>',
258
+ '<tr>',
259
+ '<td>Year</td>',
260
+ '<td>Sales</td>',
261
+ '<td>Expenses</td>',
262
+ '<td>Profit</td>',
263
+ '</tr>',
264
+ '</thead>',
265
+ '<tr>',
266
+ '<td>2022</td>',
267
+ '<td>$8,000</td>',
268
+ '<td>$5,000</td>',
269
+ '<td>$3,000</td>',
270
+ '</tr>',
271
+ '<tr>',
272
+ '<td>2023</td>',
273
+ '<td>$10,000</td>',
274
+ '<td>$6,500</td>',
275
+ '<td>$3,500</td>',
276
+ '</tr>',
277
+ '<tr>',
278
+ '<td>2024</td>',
279
+ '<td>$15,000</td>',
280
+ '<td>$9,000</td>',
281
+ '<td>$6,000</td>',
282
+ '</tr>',
283
+ '</table>',
284
+ ].join('')
285
+
286
+ expect(
287
+ getTersePt({
288
+ schema,
289
+ value: transform(html, {
290
+ rules: [flattenTableRule],
291
+ }),
292
+ }),
293
+ ).toEqual([
294
+ 'Year, ,2022',
295
+ 'Sales, ,$8,000',
296
+ 'Expenses, ,$5,000',
297
+ 'Profit, ,$3,000',
298
+ 'Year, ,2023',
299
+ 'Sales, ,$10,000',
300
+ 'Expenses, ,$6,500',
301
+ 'Profit, ,$3,500',
302
+ 'Year, ,2024',
303
+ 'Sales, ,$15,000',
304
+ 'Expenses, ,$9,000',
305
+ 'Profit, ,$6,000',
306
+ ])
307
+ })
308
+
309
+ describe('table with images', () => {
310
+ /**
311
+ * | Name | Photo |
312
+ * | --- | --- |
313
+ * | John Doe | <img src="https://via.placeholder.com/150" alt="John Doe" /> |
314
+ * | Jane Smith | <img src="https://via.placeholder.com/150" alt="Jane Smith" /> |
315
+ */
316
+ const html = [
317
+ '<table>',
318
+ '<thead>',
319
+ '<tr>',
320
+ '<th>Name</th>',
321
+ '<th>Photo</th>',
322
+ '</tr>',
323
+ '</thead>',
324
+ '<tbody>',
325
+ '<tr>',
326
+ '<td>John Doe</td>',
327
+ '<td><img src="https://via.placeholder.com/150" alt="John Doe" /></td>',
328
+ '</tr>',
329
+ '<tr>',
330
+ '<td>Jane Smith</td>',
331
+ '<td><img src="https://via.placeholder.com/150" alt="Jane Smith" /></td>',
332
+ '</tr>',
333
+ '</tbody>',
334
+ '</table>',
335
+ ].join('')
336
+
337
+ test('no image matcher', () => {
338
+ expect(
339
+ getTersePt({
340
+ schema,
341
+ value: transform(html, {
342
+ matchers: undefined,
343
+ rules: [flattenTableRule],
344
+ }),
345
+ }),
346
+ ).toEqual(['Name, ,John Doe', 'Photo', 'Name, ,Jane Smith', 'Photo'])
347
+ })
348
+
349
+ test('block image matcher', () => {
350
+ expect(
351
+ getTersePt({
352
+ schema,
353
+ value: transform(html, {
354
+ matchers: {
355
+ image: imageMatcher,
356
+ },
357
+ rules: [flattenTableRule],
358
+ }),
359
+ }),
360
+ ).toEqual([
361
+ 'Name, ,John Doe',
362
+ 'Photo',
363
+ '{image}',
364
+ 'Name, ,Jane Smith',
365
+ 'Photo',
366
+ '{image}',
367
+ ])
368
+ })
369
+
370
+ test('block and inline image matcher', () => {
371
+ expect(
372
+ getTersePt({
373
+ schema,
374
+ value: transform(html, {
375
+ matchers: {
376
+ image: imageMatcher,
377
+ inlineImage: inlineImageMatcher,
378
+ },
379
+ rules: [flattenTableRule],
380
+ }),
381
+ }),
382
+ ).toEqual([
383
+ 'Name, ,John Doe',
384
+ 'Photo, ,{image}',
385
+ 'Name, ,Jane Smith',
386
+ 'Photo, ,{image}',
387
+ ])
388
+ })
389
+
390
+ describe('Google Docs', () => {
391
+ test('simple table with thead', () => {
392
+ const html = `<meta charset='utf-8'><meta charset="utf-8"><b style="font-weight:normal;" id="docs-internal-guid-e0aa048e-7fff-f3cb-0d6d-5e68751be0e3"><div dir="ltr" style="margin-left:0pt;" align="left"><table style="border:none;border-collapse:collapse;table-layout:fixed;width:468pt"><colgroup><col /><col /></colgroup><thead><tr style="height:0pt"><th style="border-left:solid #000000 1pt;border-right:solid #000000 1pt;border-bottom:solid #000000 1pt;border-top:solid #000000 1pt;vertical-align:top;padding:5pt 5pt 5pt 5pt;overflow:hidden;overflow-wrap:break-word;" scope="col"><p dir="ltr" style="line-height:1.2;margin-top:0pt;margin-bottom:0pt;"><span style="font-size:12pt;font-family:Arial,sans-serif;color:#000000;background-color:transparent;font-weight:400;font-style:normal;font-variant:normal;text-decoration:none;vertical-align:baseline;white-space:pre;white-space:pre-wrap;">Header 1</span></p></th><th style="border-left:solid #000000 1pt;border-right:solid #000000 1pt;border-bottom:solid #000000 1pt;border-top:solid #000000 1pt;vertical-align:top;padding:5pt 5pt 5pt 5pt;overflow:hidden;overflow-wrap:break-word;" scope="col"><p dir="ltr" style="line-height:1.2;margin-top:0pt;margin-bottom:0pt;"><span style="font-size:12pt;font-family:Arial,sans-serif;color:#000000;background-color:transparent;font-weight:400;font-style:normal;font-variant:normal;text-decoration:none;vertical-align:baseline;white-space:pre;white-space:pre-wrap;">Header 2</span></p></th></tr></thead><tbody><tr style="height:0pt"><td style="border-left:solid #000000 1pt;border-right:solid #000000 1pt;border-bottom:solid #000000 1pt;border-top:solid #000000 1pt;vertical-align:top;padding:5pt 5pt 5pt 5pt;overflow:hidden;overflow-wrap:break-word;"><p dir="ltr" style="line-height:1.2;margin-top:0pt;margin-bottom:0pt;"><span style="font-size:12pt;font-family:Arial,sans-serif;color:#000000;background-color:transparent;font-weight:400;font-style:normal;font-variant:normal;text-decoration:none;vertical-align:baseline;white-space:pre;white-space:pre-wrap;">Cell 1</span></p></td><td style="border-left:solid #000000 1pt;border-right:solid #000000 1pt;border-bottom:solid #000000 1pt;border-top:solid #000000 1pt;vertical-align:top;padding:5pt 5pt 5pt 5pt;overflow:hidden;overflow-wrap:break-word;"><p dir="ltr" style="line-height:1.2;margin-top:0pt;margin-bottom:0pt;"><span style="font-size:12pt;font-family:Arial,sans-serif;color:#000000;background-color:transparent;font-weight:400;font-style:normal;font-variant:normal;text-decoration:none;vertical-align:baseline;white-space:pre;white-space:pre-wrap;">Cell 2</span></p></td></tr></tbody></table></div></b>`
393
+
394
+ expect(
395
+ getTersePt({
396
+ schema,
397
+ value: transform(html, {
398
+ rules: [flattenTableRule],
399
+ }),
400
+ }),
401
+ ).toEqual(['Header 1, ,Cell 1', 'Header 2, ,Cell 2'])
402
+ })
403
+ })
404
+ })
405
+ })