@portabletext/block-tools 3.5.0 → 3.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/rules/index.cjs +13 -7
- package/lib/rules/index.cjs.map +1 -1
- package/lib/rules/index.js +13 -7
- package/lib/rules/index.js.map +1 -1
- package/package.json +1 -1
- package/src/rules/flatten-tables.test.ts +181 -0
- package/src/rules/flatten-tables.ts +32 -9
package/lib/rules/index.cjs
CHANGED
|
@@ -9,16 +9,22 @@ function createFlattenTableRule({
|
|
|
9
9
|
deserialize: (node, next) => {
|
|
10
10
|
if (!helpers.isElement(node) || helpers.tagName(node) !== "table")
|
|
11
11
|
return;
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
12
|
+
let headerRow = node.querySelector("thead")?.querySelector("tr");
|
|
13
|
+
const tbody = node.querySelector("tbody");
|
|
14
|
+
let bodyRows = tbody ? [...tbody.querySelectorAll("tr")] : [];
|
|
15
|
+
if (!headerRow || !bodyRows) {
|
|
16
|
+
const columnCounts = [...node.querySelectorAll("tr")].map((row) => row.querySelectorAll("td").length), firstColumnCount = columnCounts[0];
|
|
17
|
+
if (!firstColumnCount || !columnCounts.every((count) => count === firstColumnCount) || firstColumnCount < 3)
|
|
18
|
+
return;
|
|
19
|
+
const rows2 = [...node.querySelectorAll("tr")];
|
|
20
|
+
headerRow = rows2.slice(0, 1)[0], bodyRows = rows2.slice(1);
|
|
21
|
+
}
|
|
16
22
|
if (!headerRow)
|
|
17
23
|
return;
|
|
18
|
-
const headerResults = [...headerRow.querySelectorAll("th")].map(
|
|
24
|
+
const headerResults = [...headerRow.querySelectorAll("th, td")].map(
|
|
19
25
|
(headerCell) => next(headerCell)
|
|
20
|
-
), rows = []
|
|
21
|
-
for (const row of
|
|
26
|
+
), rows = [];
|
|
27
|
+
for (const row of bodyRows) {
|
|
22
28
|
const cells = row.querySelectorAll("td");
|
|
23
29
|
let cellIndex = 0;
|
|
24
30
|
for (const cell of cells) {
|
package/lib/rules/index.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.cjs","sources":["../../src/rules/flatten-tables.ts"],"sourcesContent":["import {\n isTextBlock,\n type PortableTextObject,\n type PortableTextSpan,\n type Schema,\n} from '@portabletext/schema'\nimport {flattenNestedBlocks} from '../HtmlDeserializer/flatten-nested-blocks'\nimport {isElement, tagName} from '../HtmlDeserializer/helpers'\nimport type {\n ArbitraryTypedObject,\n DeserializerRule,\n TypedObject,\n} from '../types'\n\n/**\n * An opinionated `DeserializerRule` that flattens tables in a way that repeats\n * the header row for each cell in the row.\n *\n * @example\n * ```html\n * <table>\n * <thead>\n * <tr>\n * <th>Header 1</th>\n * <th>Header 2</th>\n * </tr>\n * </thead>\n * <tbody>\n * <tr>\n * <td>Cell 1</td>\n * <td>Cell 2</td>\n * </tr>\n * </tbody>\n * </table>\n * ```\n * Turns into\n * ```json\n * [\n * {\n * _type: 'block',\n * children: [\n * {\n * _type: 'text',\n * text: 'Header 1'\n * },\n * {\n * _type: 'text',\n * text: 'Cell 1'\n * }\n * ]\n * },\n * {\n * _type: 'block',\n * children: [\n * {\n * _type: 'text',\n * text: 'Header 2'\n * },\n * {\n * _type: 'text',\n * text: 'Cell 2'\n * }\n * ]\n * }\n * ]\n * ```\n *\n * Use the `separator` option to control if a child element should separate\n * headers and cells.\n *\n * @beta\n */\nexport function createFlattenTableRule({\n schema,\n separator,\n}: {\n schema: Schema\n separator?: () =>\n | (Omit<PortableTextSpan, '_key'> & {_key?: string})\n | (Omit<PortableTextObject, '_key'> & {_key?: string})\n | undefined\n}): DeserializerRule {\n return {\n deserialize: (node, next) => {\n if (!isElement(node) || tagName(node) !== 'table') {\n return undefined\n }\n\n const thead = node.querySelector('thead')\n const tbody = node.querySelector('tbody')\n\n //
|
|
1
|
+
{"version":3,"file":"index.cjs","sources":["../../src/rules/flatten-tables.ts"],"sourcesContent":["import {\n isTextBlock,\n type PortableTextObject,\n type PortableTextSpan,\n type Schema,\n} from '@portabletext/schema'\nimport {flattenNestedBlocks} from '../HtmlDeserializer/flatten-nested-blocks'\nimport {isElement, tagName} from '../HtmlDeserializer/helpers'\nimport type {\n ArbitraryTypedObject,\n DeserializerRule,\n TypedObject,\n} from '../types'\n\n/**\n * An opinionated `DeserializerRule` that flattens tables in a way that repeats\n * the header row for each cell in the row.\n *\n * @example\n * ```html\n * <table>\n * <thead>\n * <tr>\n * <th>Header 1</th>\n * <th>Header 2</th>\n * </tr>\n * </thead>\n * <tbody>\n * <tr>\n * <td>Cell 1</td>\n * <td>Cell 2</td>\n * </tr>\n * </tbody>\n * </table>\n * ```\n * Turns into\n * ```json\n * [\n * {\n * _type: 'block',\n * children: [\n * {\n * _type: 'text',\n * text: 'Header 1'\n * },\n * {\n * _type: 'text',\n * text: 'Cell 1'\n * }\n * ]\n * },\n * {\n * _type: 'block',\n * children: [\n * {\n * _type: 'text',\n * text: 'Header 2'\n * },\n * {\n * _type: 'text',\n * text: 'Cell 2'\n * }\n * ]\n * }\n * ]\n * ```\n *\n * Use the `separator` option to control if a child element should separate\n * headers and cells.\n *\n * @beta\n */\nexport function createFlattenTableRule({\n schema,\n separator,\n}: {\n schema: Schema\n separator?: () =>\n | (Omit<PortableTextSpan, '_key'> & {_key?: string})\n | (Omit<PortableTextObject, '_key'> & {_key?: string})\n | undefined\n}): DeserializerRule {\n return {\n deserialize: (node, next) => {\n if (!isElement(node) || tagName(node) !== 'table') {\n return undefined\n }\n\n const thead = node.querySelector('thead')\n let headerRow = thead?.querySelector('tr')\n const tbody = node.querySelector('tbody')\n let bodyRows = tbody ? [...tbody.querySelectorAll('tr')] : []\n\n if (!headerRow || !bodyRows) {\n // If there is not thead or tbody, we look at the column count. If the\n // column count is greater than 2 then we infer that the first row is\n // the header row and the rest are the body rows.\n\n const columnCounts = [...node.querySelectorAll('tr')].map((row) => {\n const cells = row.querySelectorAll('td')\n return cells.length\n })\n\n const firstColumnCount = columnCounts[0]\n\n if (\n !firstColumnCount ||\n !columnCounts.every((count) => count === firstColumnCount)\n ) {\n return undefined\n }\n\n if (firstColumnCount < 3) {\n return undefined\n }\n\n // Now we know that all rows have the same column count and that\n // count is >2\n const rows = [...node.querySelectorAll('tr')]\n headerRow = rows.slice(0, 1)[0]\n bodyRows = rows.slice(1)\n }\n\n if (!headerRow) {\n return undefined\n }\n\n const headerCells = headerRow.querySelectorAll('th, td')\n const headerResults = [...headerCells].map((headerCell) =>\n next(headerCell),\n )\n\n // Process tbody rows and combine with headers\n const rows: TypedObject[] = []\n\n for (const row of bodyRows) {\n const cells = row.querySelectorAll('td')\n\n let cellIndex = 0\n for (const cell of cells) {\n const result = next(cell)\n\n if (!result) {\n cellIndex++\n continue\n }\n\n const headerResult = headerResults[cellIndex]\n\n if (!headerResult) {\n // If we can't find a corresponding header, then we just push\n // the deserialized cell as is.\n if (Array.isArray(result)) {\n rows.push(...result)\n } else {\n rows.push(result)\n }\n cellIndex++\n continue\n }\n\n const flattenedHeaderResult = flattenNestedBlocks(\n {schema},\n (Array.isArray(headerResult)\n ? headerResult\n : [headerResult]) as Array<ArbitraryTypedObject>,\n )\n const firstFlattenedHeaderResult = flattenedHeaderResult[0]\n const flattenedResult = flattenNestedBlocks(\n {schema},\n (Array.isArray(result)\n ? result\n : [result]) as Array<ArbitraryTypedObject>,\n )\n const firstFlattenedResult = flattenedResult[0]\n\n if (\n flattenedHeaderResult.length === 1 &&\n isTextBlock({schema}, firstFlattenedHeaderResult) &&\n flattenedResult.length === 1 &&\n isTextBlock({schema}, firstFlattenedResult)\n ) {\n const separatorChild = separator?.()\n // If the header result and the cell result are text blocks then\n // we merge them together.\n const mergedTextBlock = {\n ...firstFlattenedHeaderResult,\n children: [\n ...firstFlattenedHeaderResult.children,\n ...(separatorChild ? [separatorChild] : []),\n ...firstFlattenedResult.children,\n ],\n markDefs: [\n ...(firstFlattenedHeaderResult.markDefs ?? []),\n ...(firstFlattenedResult.markDefs ?? []),\n ],\n }\n\n rows.push(mergedTextBlock)\n cellIndex++\n continue\n }\n\n // Otherwise, we push the header result and the cell result as is.\n if (Array.isArray(headerResult)) {\n rows.push(...headerResult)\n } else {\n rows.push(headerResult)\n }\n\n if (Array.isArray(result)) {\n rows.push(...result)\n } else {\n rows.push(result)\n }\n\n cellIndex++\n }\n }\n\n // Return the processed rows as individual text blocks\n return rows\n },\n }\n}\n"],"names":["schema","isElement","tagName","rows","flattenNestedBlocks","isTextBlock"],"mappings":";;;AAwEO,SAAS,uBAAuB;AAAA,EAAA,QACrCA;AAAAA,EACA;AACF,GAMqB;AACnB,SAAO;AAAA,IACL,aAAa,CAAC,MAAM,SAAS;AAC3B,UAAI,CAACC,QAAAA,UAAU,IAAI,KAAKC,QAAAA,QAAQ,IAAI,MAAM;AACxC;AAIF,UAAI,YADU,KAAK,cAAc,OAAO,GACjB,cAAc,IAAI;AACzC,YAAM,QAAQ,KAAK,cAAc,OAAO;AACxC,UAAI,WAAW,QAAQ,CAAC,GAAG,MAAM,iBAAiB,IAAI,CAAC,IAAI,CAAA;AAE3D,UAAI,CAAC,aAAa,CAAC,UAAU;AAK3B,cAAM,eAAe,CAAC,GAAG,KAAK,iBAAiB,IAAI,CAAC,EAAE,IAAI,CAAC,QAC3C,IAAI,iBAAiB,IAAI,EAC1B,MACd,GAEK,mBAAmB,aAAa,CAAC;AASvC,YANE,CAAC,oBACD,CAAC,aAAa,MAAM,CAAC,UAAU,UAAU,gBAAgB,KAKvD,mBAAmB;AACrB;AAKF,cAAMC,QAAO,CAAC,GAAG,KAAK,iBAAiB,IAAI,CAAC;AAC5C,oBAAYA,MAAK,MAAM,GAAG,CAAC,EAAE,CAAC,GAC9B,WAAWA,MAAK,MAAM,CAAC;AAAA,MACzB;AAEA,UAAI,CAAC;AACH;AAIF,YAAM,gBAAgB,CAAC,GADH,UAAU,iBAAiB,QAAQ,CAClB,EAAE;AAAA,QAAI,CAAC,eAC1C,KAAK,UAAU;AAAA,MAAA,GAIX,OAAsB,CAAA;AAE5B,iBAAW,OAAO,UAAU;AAC1B,cAAM,QAAQ,IAAI,iBAAiB,IAAI;AAEvC,YAAI,YAAY;AAChB,mBAAW,QAAQ,OAAO;AACxB,gBAAM,SAAS,KAAK,IAAI;AAExB,cAAI,CAAC,QAAQ;AACX;AACA;AAAA,UACF;AAEA,gBAAM,eAAe,cAAc,SAAS;AAE5C,cAAI,CAAC,cAAc;AAGb,kBAAM,QAAQ,MAAM,IACtB,KAAK,KAAK,GAAG,MAAM,IAEnB,KAAK,KAAK,MAAM,GAElB;AACA;AAAA,UACF;AAEA,gBAAM,wBAAwBC,QAAAA;AAAAA,YAC5B,EAAA,QAACJ,SAAA;AAAA,YACA,MAAM,QAAQ,YAAY,IACvB,eACA,CAAC,YAAY;AAAA,UAAA,GAEb,6BAA6B,sBAAsB,CAAC,GACpD,kBAAkBI,QAAAA;AAAAA,YACtB,EAAA,QAACJ,SAAA;AAAA,YACA,MAAM,QAAQ,MAAM,IACjB,SACA,CAAC,MAAM;AAAA,UAAA,GAEP,uBAAuB,gBAAgB,CAAC;AAE9C,cACE,sBAAsB,WAAW,KACjCK,OAAAA,YAAY,EAAA,QAACL,YAAS,0BAA0B,KAChD,gBAAgB,WAAW,KAC3BK,OAAAA,YAAY,EAAA,QAACL,SAAA,GAAS,oBAAoB,GAC1C;AACA,kBAAM,iBAAiB,YAAA,GAGjB,kBAAkB;AAAA,cACtB,GAAG;AAAA,cACH,UAAU;AAAA,gBACR,GAAG,2BAA2B;AAAA,gBAC9B,GAAI,iBAAiB,CAAC,cAAc,IAAI,CAAA;AAAA,gBACxC,GAAG,qBAAqB;AAAA,cAAA;AAAA,cAE1B,UAAU;AAAA,gBACR,GAAI,2BAA2B,YAAY,CAAA;AAAA,gBAC3C,GAAI,qBAAqB,YAAY,CAAA;AAAA,cAAC;AAAA,YACxC;AAGF,iBAAK,KAAK,eAAe,GACzB;AACA;AAAA,UACF;AAGI,gBAAM,QAAQ,YAAY,IAC5B,KAAK,KAAK,GAAG,YAAY,IAEzB,KAAK,KAAK,YAAY,GAGpB,MAAM,QAAQ,MAAM,IACtB,KAAK,KAAK,GAAG,MAAM,IAEnB,KAAK,KAAK,MAAM,GAGlB;AAAA,QACF;AAAA,MACF;AAGA,aAAO;AAAA,IACT;AAAA,EAAA;AAEJ;;"}
|
package/lib/rules/index.js
CHANGED
|
@@ -8,16 +8,22 @@ function createFlattenTableRule({
|
|
|
8
8
|
deserialize: (node, next) => {
|
|
9
9
|
if (!isElement(node) || tagName(node) !== "table")
|
|
10
10
|
return;
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
11
|
+
let headerRow = node.querySelector("thead")?.querySelector("tr");
|
|
12
|
+
const tbody = node.querySelector("tbody");
|
|
13
|
+
let bodyRows = tbody ? [...tbody.querySelectorAll("tr")] : [];
|
|
14
|
+
if (!headerRow || !bodyRows) {
|
|
15
|
+
const columnCounts = [...node.querySelectorAll("tr")].map((row) => row.querySelectorAll("td").length), firstColumnCount = columnCounts[0];
|
|
16
|
+
if (!firstColumnCount || !columnCounts.every((count) => count === firstColumnCount) || firstColumnCount < 3)
|
|
17
|
+
return;
|
|
18
|
+
const rows2 = [...node.querySelectorAll("tr")];
|
|
19
|
+
headerRow = rows2.slice(0, 1)[0], bodyRows = rows2.slice(1);
|
|
20
|
+
}
|
|
15
21
|
if (!headerRow)
|
|
16
22
|
return;
|
|
17
|
-
const headerResults = [...headerRow.querySelectorAll("th")].map(
|
|
23
|
+
const headerResults = [...headerRow.querySelectorAll("th, td")].map(
|
|
18
24
|
(headerCell) => next(headerCell)
|
|
19
|
-
), rows = []
|
|
20
|
-
for (const row of
|
|
25
|
+
), rows = [];
|
|
26
|
+
for (const row of bodyRows) {
|
|
21
27
|
const cells = row.querySelectorAll("td");
|
|
22
28
|
let cellIndex = 0;
|
|
23
29
|
for (const cell of cells) {
|
package/lib/rules/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sources":["../../src/rules/flatten-tables.ts"],"sourcesContent":["import {\n isTextBlock,\n type PortableTextObject,\n type PortableTextSpan,\n type Schema,\n} from '@portabletext/schema'\nimport {flattenNestedBlocks} from '../HtmlDeserializer/flatten-nested-blocks'\nimport {isElement, tagName} from '../HtmlDeserializer/helpers'\nimport type {\n ArbitraryTypedObject,\n DeserializerRule,\n TypedObject,\n} from '../types'\n\n/**\n * An opinionated `DeserializerRule` that flattens tables in a way that repeats\n * the header row for each cell in the row.\n *\n * @example\n * ```html\n * <table>\n * <thead>\n * <tr>\n * <th>Header 1</th>\n * <th>Header 2</th>\n * </tr>\n * </thead>\n * <tbody>\n * <tr>\n * <td>Cell 1</td>\n * <td>Cell 2</td>\n * </tr>\n * </tbody>\n * </table>\n * ```\n * Turns into\n * ```json\n * [\n * {\n * _type: 'block',\n * children: [\n * {\n * _type: 'text',\n * text: 'Header 1'\n * },\n * {\n * _type: 'text',\n * text: 'Cell 1'\n * }\n * ]\n * },\n * {\n * _type: 'block',\n * children: [\n * {\n * _type: 'text',\n * text: 'Header 2'\n * },\n * {\n * _type: 'text',\n * text: 'Cell 2'\n * }\n * ]\n * }\n * ]\n * ```\n *\n * Use the `separator` option to control if a child element should separate\n * headers and cells.\n *\n * @beta\n */\nexport function createFlattenTableRule({\n schema,\n separator,\n}: {\n schema: Schema\n separator?: () =>\n | (Omit<PortableTextSpan, '_key'> & {_key?: string})\n | (Omit<PortableTextObject, '_key'> & {_key?: string})\n | undefined\n}): DeserializerRule {\n return {\n deserialize: (node, next) => {\n if (!isElement(node) || tagName(node) !== 'table') {\n return undefined\n }\n\n const thead = node.querySelector('thead')\n const tbody = node.querySelector('tbody')\n\n //
|
|
1
|
+
{"version":3,"file":"index.js","sources":["../../src/rules/flatten-tables.ts"],"sourcesContent":["import {\n isTextBlock,\n type PortableTextObject,\n type PortableTextSpan,\n type Schema,\n} from '@portabletext/schema'\nimport {flattenNestedBlocks} from '../HtmlDeserializer/flatten-nested-blocks'\nimport {isElement, tagName} from '../HtmlDeserializer/helpers'\nimport type {\n ArbitraryTypedObject,\n DeserializerRule,\n TypedObject,\n} from '../types'\n\n/**\n * An opinionated `DeserializerRule` that flattens tables in a way that repeats\n * the header row for each cell in the row.\n *\n * @example\n * ```html\n * <table>\n * <thead>\n * <tr>\n * <th>Header 1</th>\n * <th>Header 2</th>\n * </tr>\n * </thead>\n * <tbody>\n * <tr>\n * <td>Cell 1</td>\n * <td>Cell 2</td>\n * </tr>\n * </tbody>\n * </table>\n * ```\n * Turns into\n * ```json\n * [\n * {\n * _type: 'block',\n * children: [\n * {\n * _type: 'text',\n * text: 'Header 1'\n * },\n * {\n * _type: 'text',\n * text: 'Cell 1'\n * }\n * ]\n * },\n * {\n * _type: 'block',\n * children: [\n * {\n * _type: 'text',\n * text: 'Header 2'\n * },\n * {\n * _type: 'text',\n * text: 'Cell 2'\n * }\n * ]\n * }\n * ]\n * ```\n *\n * Use the `separator` option to control if a child element should separate\n * headers and cells.\n *\n * @beta\n */\nexport function createFlattenTableRule({\n schema,\n separator,\n}: {\n schema: Schema\n separator?: () =>\n | (Omit<PortableTextSpan, '_key'> & {_key?: string})\n | (Omit<PortableTextObject, '_key'> & {_key?: string})\n | undefined\n}): DeserializerRule {\n return {\n deserialize: (node, next) => {\n if (!isElement(node) || tagName(node) !== 'table') {\n return undefined\n }\n\n const thead = node.querySelector('thead')\n let headerRow = thead?.querySelector('tr')\n const tbody = node.querySelector('tbody')\n let bodyRows = tbody ? [...tbody.querySelectorAll('tr')] : []\n\n if (!headerRow || !bodyRows) {\n // If there is not thead or tbody, we look at the column count. If the\n // column count is greater than 2 then we infer that the first row is\n // the header row and the rest are the body rows.\n\n const columnCounts = [...node.querySelectorAll('tr')].map((row) => {\n const cells = row.querySelectorAll('td')\n return cells.length\n })\n\n const firstColumnCount = columnCounts[0]\n\n if (\n !firstColumnCount ||\n !columnCounts.every((count) => count === firstColumnCount)\n ) {\n return undefined\n }\n\n if (firstColumnCount < 3) {\n return undefined\n }\n\n // Now we know that all rows have the same column count and that\n // count is >2\n const rows = [...node.querySelectorAll('tr')]\n headerRow = rows.slice(0, 1)[0]\n bodyRows = rows.slice(1)\n }\n\n if (!headerRow) {\n return undefined\n }\n\n const headerCells = headerRow.querySelectorAll('th, td')\n const headerResults = [...headerCells].map((headerCell) =>\n next(headerCell),\n )\n\n // Process tbody rows and combine with headers\n const rows: TypedObject[] = []\n\n for (const row of bodyRows) {\n const cells = row.querySelectorAll('td')\n\n let cellIndex = 0\n for (const cell of cells) {\n const result = next(cell)\n\n if (!result) {\n cellIndex++\n continue\n }\n\n const headerResult = headerResults[cellIndex]\n\n if (!headerResult) {\n // If we can't find a corresponding header, then we just push\n // the deserialized cell as is.\n if (Array.isArray(result)) {\n rows.push(...result)\n } else {\n rows.push(result)\n }\n cellIndex++\n continue\n }\n\n const flattenedHeaderResult = flattenNestedBlocks(\n {schema},\n (Array.isArray(headerResult)\n ? headerResult\n : [headerResult]) as Array<ArbitraryTypedObject>,\n )\n const firstFlattenedHeaderResult = flattenedHeaderResult[0]\n const flattenedResult = flattenNestedBlocks(\n {schema},\n (Array.isArray(result)\n ? result\n : [result]) as Array<ArbitraryTypedObject>,\n )\n const firstFlattenedResult = flattenedResult[0]\n\n if (\n flattenedHeaderResult.length === 1 &&\n isTextBlock({schema}, firstFlattenedHeaderResult) &&\n flattenedResult.length === 1 &&\n isTextBlock({schema}, firstFlattenedResult)\n ) {\n const separatorChild = separator?.()\n // If the header result and the cell result are text blocks then\n // we merge them together.\n const mergedTextBlock = {\n ...firstFlattenedHeaderResult,\n children: [\n ...firstFlattenedHeaderResult.children,\n ...(separatorChild ? [separatorChild] : []),\n ...firstFlattenedResult.children,\n ],\n markDefs: [\n ...(firstFlattenedHeaderResult.markDefs ?? []),\n ...(firstFlattenedResult.markDefs ?? []),\n ],\n }\n\n rows.push(mergedTextBlock)\n cellIndex++\n continue\n }\n\n // Otherwise, we push the header result and the cell result as is.\n if (Array.isArray(headerResult)) {\n rows.push(...headerResult)\n } else {\n rows.push(headerResult)\n }\n\n if (Array.isArray(result)) {\n rows.push(...result)\n } else {\n rows.push(result)\n }\n\n cellIndex++\n }\n }\n\n // Return the processed rows as individual text blocks\n return rows\n },\n }\n}\n"],"names":["rows"],"mappings":";;AAwEO,SAAS,uBAAuB;AAAA,EACrC;AAAA,EACA;AACF,GAMqB;AACnB,SAAO;AAAA,IACL,aAAa,CAAC,MAAM,SAAS;AAC3B,UAAI,CAAC,UAAU,IAAI,KAAK,QAAQ,IAAI,MAAM;AACxC;AAIF,UAAI,YADU,KAAK,cAAc,OAAO,GACjB,cAAc,IAAI;AACzC,YAAM,QAAQ,KAAK,cAAc,OAAO;AACxC,UAAI,WAAW,QAAQ,CAAC,GAAG,MAAM,iBAAiB,IAAI,CAAC,IAAI,CAAA;AAE3D,UAAI,CAAC,aAAa,CAAC,UAAU;AAK3B,cAAM,eAAe,CAAC,GAAG,KAAK,iBAAiB,IAAI,CAAC,EAAE,IAAI,CAAC,QAC3C,IAAI,iBAAiB,IAAI,EAC1B,MACd,GAEK,mBAAmB,aAAa,CAAC;AASvC,YANE,CAAC,oBACD,CAAC,aAAa,MAAM,CAAC,UAAU,UAAU,gBAAgB,KAKvD,mBAAmB;AACrB;AAKF,cAAMA,QAAO,CAAC,GAAG,KAAK,iBAAiB,IAAI,CAAC;AAC5C,oBAAYA,MAAK,MAAM,GAAG,CAAC,EAAE,CAAC,GAC9B,WAAWA,MAAK,MAAM,CAAC;AAAA,MACzB;AAEA,UAAI,CAAC;AACH;AAIF,YAAM,gBAAgB,CAAC,GADH,UAAU,iBAAiB,QAAQ,CAClB,EAAE;AAAA,QAAI,CAAC,eAC1C,KAAK,UAAU;AAAA,MAAA,GAIX,OAAsB,CAAA;AAE5B,iBAAW,OAAO,UAAU;AAC1B,cAAM,QAAQ,IAAI,iBAAiB,IAAI;AAEvC,YAAI,YAAY;AAChB,mBAAW,QAAQ,OAAO;AACxB,gBAAM,SAAS,KAAK,IAAI;AAExB,cAAI,CAAC,QAAQ;AACX;AACA;AAAA,UACF;AAEA,gBAAM,eAAe,cAAc,SAAS;AAE5C,cAAI,CAAC,cAAc;AAGb,kBAAM,QAAQ,MAAM,IACtB,KAAK,KAAK,GAAG,MAAM,IAEnB,KAAK,KAAK,MAAM,GAElB;AACA;AAAA,UACF;AAEA,gBAAM,wBAAwB;AAAA,YAC5B,EAAC,OAAA;AAAA,YACA,MAAM,QAAQ,YAAY,IACvB,eACA,CAAC,YAAY;AAAA,UAAA,GAEb,6BAA6B,sBAAsB,CAAC,GACpD,kBAAkB;AAAA,YACtB,EAAC,OAAA;AAAA,YACA,MAAM,QAAQ,MAAM,IACjB,SACA,CAAC,MAAM;AAAA,UAAA,GAEP,uBAAuB,gBAAgB,CAAC;AAE9C,cACE,sBAAsB,WAAW,KACjC,YAAY,EAAC,UAAS,0BAA0B,KAChD,gBAAgB,WAAW,KAC3B,YAAY,EAAC,OAAA,GAAS,oBAAoB,GAC1C;AACA,kBAAM,iBAAiB,YAAA,GAGjB,kBAAkB;AAAA,cACtB,GAAG;AAAA,cACH,UAAU;AAAA,gBACR,GAAG,2BAA2B;AAAA,gBAC9B,GAAI,iBAAiB,CAAC,cAAc,IAAI,CAAA;AAAA,gBACxC,GAAG,qBAAqB;AAAA,cAAA;AAAA,cAE1B,UAAU;AAAA,gBACR,GAAI,2BAA2B,YAAY,CAAA;AAAA,gBAC3C,GAAI,qBAAqB,YAAY,CAAA;AAAA,cAAC;AAAA,YACxC;AAGF,iBAAK,KAAK,eAAe,GACzB;AACA;AAAA,UACF;AAGI,gBAAM,QAAQ,YAAY,IAC5B,KAAK,KAAK,GAAG,YAAY,IAEzB,KAAK,KAAK,YAAY,GAGpB,MAAM,QAAQ,MAAM,IACtB,KAAK,KAAK,GAAG,MAAM,IAEnB,KAAK,KAAK,MAAM,GAGlB;AAAA,QACF;AAAA,MACF;AAGA,aAAO;AAAA,IACT;AAAA,EAAA;AAEJ;"}
|
package/package.json
CHANGED
|
@@ -125,6 +125,187 @@ describe(createFlattenTableRule.name, () => {
|
|
|
125
125
|
])
|
|
126
126
|
})
|
|
127
127
|
|
|
128
|
+
test('ordinary table without thead and tbody', () => {
|
|
129
|
+
/**
|
|
130
|
+
* | Year | Sales | Expenses | Profit |
|
|
131
|
+
* | 2022 | \$8,000 | \$5,000 | \$3,000 |
|
|
132
|
+
* | 2023 | \$10,000 | \$6,500 | \$3,500 |
|
|
133
|
+
* | 2024 | \$15,000 | \$9,000 | \$6,000 |
|
|
134
|
+
*/
|
|
135
|
+
const html = [
|
|
136
|
+
'<table>',
|
|
137
|
+
'<tr>',
|
|
138
|
+
'<td>Year</td>',
|
|
139
|
+
'<td>Sales</td>',
|
|
140
|
+
'<td>Expenses</td>',
|
|
141
|
+
'<td>Profit</td>',
|
|
142
|
+
'</tr>',
|
|
143
|
+
'<tr>',
|
|
144
|
+
'<td>2022</td>',
|
|
145
|
+
'<td>$8,000</td>',
|
|
146
|
+
'<td>$5,000</td>',
|
|
147
|
+
'<td>$3,000</td>',
|
|
148
|
+
'</tr>',
|
|
149
|
+
'<tr>',
|
|
150
|
+
'<td>2023</td>',
|
|
151
|
+
'<td>$10,000</td>',
|
|
152
|
+
'<td>$6,500</td>',
|
|
153
|
+
'<td>$3,500</td>',
|
|
154
|
+
'</tr>',
|
|
155
|
+
'<tr>',
|
|
156
|
+
'<td>2024</td>',
|
|
157
|
+
'<td>$15,000</td>',
|
|
158
|
+
'<td>$9,000</td>',
|
|
159
|
+
'<td>$6,000</td>',
|
|
160
|
+
'</tr>',
|
|
161
|
+
'</table>',
|
|
162
|
+
].join('')
|
|
163
|
+
|
|
164
|
+
expect(
|
|
165
|
+
getTersePt({
|
|
166
|
+
schema,
|
|
167
|
+
value: transform(html, {
|
|
168
|
+
rules: [flattenTableRule],
|
|
169
|
+
}),
|
|
170
|
+
}),
|
|
171
|
+
).toEqual([
|
|
172
|
+
'Year, ,2022',
|
|
173
|
+
'Sales, ,$8,000',
|
|
174
|
+
'Expenses, ,$5,000',
|
|
175
|
+
'Profit, ,$3,000',
|
|
176
|
+
'Year, ,2023',
|
|
177
|
+
'Sales, ,$10,000',
|
|
178
|
+
'Expenses, ,$6,500',
|
|
179
|
+
'Profit, ,$3,500',
|
|
180
|
+
'Year, ,2024',
|
|
181
|
+
'Sales, ,$15,000',
|
|
182
|
+
'Expenses, ,$9,000',
|
|
183
|
+
'Profit, ,$6,000',
|
|
184
|
+
])
|
|
185
|
+
})
|
|
186
|
+
|
|
187
|
+
test('ordinary table without thead', () => {
|
|
188
|
+
/**
|
|
189
|
+
* | Year | Sales | Expenses | Profit |
|
|
190
|
+
* | 2022 | \$8,000 | \$5,000 | \$3,000 |
|
|
191
|
+
* | 2023 | \$10,000 | \$6,500 | \$3,500 |
|
|
192
|
+
* | 2024 | \$15,000 | \$9,000 | \$6,000 |
|
|
193
|
+
*/
|
|
194
|
+
const html = [
|
|
195
|
+
'<table>',
|
|
196
|
+
'<tbody>',
|
|
197
|
+
'<tr>',
|
|
198
|
+
'<td>Year</td>',
|
|
199
|
+
'<td>Sales</td>',
|
|
200
|
+
'<td>Expenses</td>',
|
|
201
|
+
'<td>Profit</td>',
|
|
202
|
+
'</tr>',
|
|
203
|
+
'<tr>',
|
|
204
|
+
'<td>2022</td>',
|
|
205
|
+
'<td>$8,000</td>',
|
|
206
|
+
'<td>$5,000</td>',
|
|
207
|
+
'<td>$3,000</td>',
|
|
208
|
+
'</tr>',
|
|
209
|
+
'<tr>',
|
|
210
|
+
'<td>2023</td>',
|
|
211
|
+
'<td>$10,000</td>',
|
|
212
|
+
'<td>$6,500</td>',
|
|
213
|
+
'<td>$3,500</td>',
|
|
214
|
+
'</tr>',
|
|
215
|
+
'<tr>',
|
|
216
|
+
'<td>2024</td>',
|
|
217
|
+
'<td>$15,000</td>',
|
|
218
|
+
'<td>$9,000</td>',
|
|
219
|
+
'<td>$6,000</td>',
|
|
220
|
+
'</tr>',
|
|
221
|
+
'</tbody>',
|
|
222
|
+
'</table>',
|
|
223
|
+
].join('')
|
|
224
|
+
|
|
225
|
+
expect(
|
|
226
|
+
getTersePt({
|
|
227
|
+
schema,
|
|
228
|
+
value: transform(html, {
|
|
229
|
+
rules: [flattenTableRule],
|
|
230
|
+
}),
|
|
231
|
+
}),
|
|
232
|
+
).toEqual([
|
|
233
|
+
'Year, ,2022',
|
|
234
|
+
'Sales, ,$8,000',
|
|
235
|
+
'Expenses, ,$5,000',
|
|
236
|
+
'Profit, ,$3,000',
|
|
237
|
+
'Year, ,2023',
|
|
238
|
+
'Sales, ,$10,000',
|
|
239
|
+
'Expenses, ,$6,500',
|
|
240
|
+
'Profit, ,$3,500',
|
|
241
|
+
'Year, ,2024',
|
|
242
|
+
'Sales, ,$15,000',
|
|
243
|
+
'Expenses, ,$9,000',
|
|
244
|
+
'Profit, ,$6,000',
|
|
245
|
+
])
|
|
246
|
+
})
|
|
247
|
+
|
|
248
|
+
test('ordinary table without tbody', () => {
|
|
249
|
+
/**
|
|
250
|
+
* | Year | Sales | Expenses | Profit |
|
|
251
|
+
* | 2022 | \$8,000 | \$5,000 | \$3,000 |
|
|
252
|
+
* | 2023 | \$10,000 | \$6,500 | \$3,500 |
|
|
253
|
+
* | 2024 | \$15,000 | \$9,000 | \$6,000 |
|
|
254
|
+
*/
|
|
255
|
+
const html = [
|
|
256
|
+
'<table>',
|
|
257
|
+
'<thead>',
|
|
258
|
+
'<tr>',
|
|
259
|
+
'<td>Year</td>',
|
|
260
|
+
'<td>Sales</td>',
|
|
261
|
+
'<td>Expenses</td>',
|
|
262
|
+
'<td>Profit</td>',
|
|
263
|
+
'</tr>',
|
|
264
|
+
'</thead>',
|
|
265
|
+
'<tr>',
|
|
266
|
+
'<td>2022</td>',
|
|
267
|
+
'<td>$8,000</td>',
|
|
268
|
+
'<td>$5,000</td>',
|
|
269
|
+
'<td>$3,000</td>',
|
|
270
|
+
'</tr>',
|
|
271
|
+
'<tr>',
|
|
272
|
+
'<td>2023</td>',
|
|
273
|
+
'<td>$10,000</td>',
|
|
274
|
+
'<td>$6,500</td>',
|
|
275
|
+
'<td>$3,500</td>',
|
|
276
|
+
'</tr>',
|
|
277
|
+
'<tr>',
|
|
278
|
+
'<td>2024</td>',
|
|
279
|
+
'<td>$15,000</td>',
|
|
280
|
+
'<td>$9,000</td>',
|
|
281
|
+
'<td>$6,000</td>',
|
|
282
|
+
'</tr>',
|
|
283
|
+
'</table>',
|
|
284
|
+
].join('')
|
|
285
|
+
|
|
286
|
+
expect(
|
|
287
|
+
getTersePt({
|
|
288
|
+
schema,
|
|
289
|
+
value: transform(html, {
|
|
290
|
+
rules: [flattenTableRule],
|
|
291
|
+
}),
|
|
292
|
+
}),
|
|
293
|
+
).toEqual([
|
|
294
|
+
'Year, ,2022',
|
|
295
|
+
'Sales, ,$8,000',
|
|
296
|
+
'Expenses, ,$5,000',
|
|
297
|
+
'Profit, ,$3,000',
|
|
298
|
+
'Year, ,2023',
|
|
299
|
+
'Sales, ,$10,000',
|
|
300
|
+
'Expenses, ,$6,500',
|
|
301
|
+
'Profit, ,$3,500',
|
|
302
|
+
'Year, ,2024',
|
|
303
|
+
'Sales, ,$15,000',
|
|
304
|
+
'Expenses, ,$9,000',
|
|
305
|
+
'Profit, ,$6,000',
|
|
306
|
+
])
|
|
307
|
+
})
|
|
308
|
+
|
|
128
309
|
describe('table with images', () => {
|
|
129
310
|
/**
|
|
130
311
|
* | Name | Photo |
|
|
@@ -87,30 +87,53 @@ export function createFlattenTableRule({
|
|
|
87
87
|
}
|
|
88
88
|
|
|
89
89
|
const thead = node.querySelector('thead')
|
|
90
|
+
let headerRow = thead?.querySelector('tr')
|
|
90
91
|
const tbody = node.querySelector('tbody')
|
|
92
|
+
let bodyRows = tbody ? [...tbody.querySelectorAll('tr')] : []
|
|
91
93
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
94
|
+
if (!headerRow || !bodyRows) {
|
|
95
|
+
// If there is not thead or tbody, we look at the column count. If the
|
|
96
|
+
// column count is greater than 2 then we infer that the first row is
|
|
97
|
+
// the header row and the rest are the body rows.
|
|
98
|
+
|
|
99
|
+
const columnCounts = [...node.querySelectorAll('tr')].map((row) => {
|
|
100
|
+
const cells = row.querySelectorAll('td')
|
|
101
|
+
return cells.length
|
|
102
|
+
})
|
|
96
103
|
|
|
97
|
-
|
|
98
|
-
|
|
104
|
+
const firstColumnCount = columnCounts[0]
|
|
105
|
+
|
|
106
|
+
if (
|
|
107
|
+
!firstColumnCount ||
|
|
108
|
+
!columnCounts.every((count) => count === firstColumnCount)
|
|
109
|
+
) {
|
|
110
|
+
return undefined
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
if (firstColumnCount < 3) {
|
|
114
|
+
return undefined
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// Now we know that all rows have the same column count and that
|
|
118
|
+
// count is >2
|
|
119
|
+
const rows = [...node.querySelectorAll('tr')]
|
|
120
|
+
headerRow = rows.slice(0, 1)[0]
|
|
121
|
+
bodyRows = rows.slice(1)
|
|
122
|
+
}
|
|
99
123
|
|
|
100
124
|
if (!headerRow) {
|
|
101
125
|
return undefined
|
|
102
126
|
}
|
|
103
127
|
|
|
104
|
-
const headerCells = headerRow.querySelectorAll('th')
|
|
128
|
+
const headerCells = headerRow.querySelectorAll('th, td')
|
|
105
129
|
const headerResults = [...headerCells].map((headerCell) =>
|
|
106
130
|
next(headerCell),
|
|
107
131
|
)
|
|
108
132
|
|
|
109
133
|
// Process tbody rows and combine with headers
|
|
110
134
|
const rows: TypedObject[] = []
|
|
111
|
-
const rowElements = tbody.querySelectorAll('tr')
|
|
112
135
|
|
|
113
|
-
for (const row of
|
|
136
|
+
for (const row of bodyRows) {
|
|
114
137
|
const cells = row.querySelectorAll('td')
|
|
115
138
|
|
|
116
139
|
let cellIndex = 0
|