@tfw.in/structura-lib 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/PRODUCTION_ARCHITECTURE.md +511 -0
- package/README.md +379 -0
- package/SAVE_FUNCTIONALITY_COMPLETE.md +448 -0
- package/dist/cjs/EditableContent.js +150 -0
- package/dist/cjs/HtmlViewer.js +587 -0
- package/dist/cjs/PdfComponents.js +16 -0
- package/dist/cjs/PdfDocumentViewer.js +281 -0
- package/dist/cjs/Structura.js +806 -0
- package/dist/cjs/Table.js +164 -0
- package/dist/cjs/TableCell.js +115 -0
- package/dist/cjs/accuracyMetrics.js +39 -0
- package/dist/cjs/helpers/preprocessData.js +143 -0
- package/dist/cjs/index.js +7 -0
- package/dist/cjs/lib/polyfills.js +15 -0
- package/dist/cjs/lib/utils.js +10 -0
- package/dist/cjs/node_modules/react-icons/fa/index.esm.js +14 -0
- package/dist/cjs/node_modules/react-icons/lib/esm/iconBase.js +69 -0
- package/dist/cjs/node_modules/react-icons/lib/esm/iconContext.js +15 -0
- package/dist/cjs/polyfills.js +19 -0
- package/dist/cjs/route.js +102 -0
- package/dist/cjs/styles.css +7 -0
- package/dist/cjs/styles.css.map +1 -0
- package/dist/cjs/ui/badge.js +34 -0
- package/dist/cjs/ui/button.js +71 -0
- package/dist/cjs/ui/card.js +86 -0
- package/dist/cjs/ui/progress.js +45 -0
- package/dist/cjs/ui/scroll-area.js +62 -0
- package/dist/cjs/ui/tabs.js +60 -0
- package/dist/cjs/worker.js +36 -0
- package/dist/esm/EditableContent.js +161 -0
- package/dist/esm/HtmlViewer.js +640 -0
- package/dist/esm/PdfComponents.js +21 -0
- package/dist/esm/PdfDocumentViewer.js +294 -0
- package/dist/esm/Structura.js +951 -0
- package/dist/esm/Table.js +182 -0
- package/dist/esm/TableCell.js +122 -0
- package/dist/esm/_virtual/_rollupPluginBabelHelpers.js +305 -0
- package/dist/esm/accuracyMetrics.js +41 -0
- package/dist/esm/helpers/preprocessData.js +152 -0
- package/dist/esm/index.js +1 -0
- package/dist/esm/lib/polyfills.js +13 -0
- package/dist/esm/lib/utils.js +8 -0
- package/dist/esm/node_modules/react-icons/fa/index.esm.js +11 -0
- package/dist/esm/node_modules/react-icons/lib/esm/iconBase.js +66 -0
- package/dist/esm/node_modules/react-icons/lib/esm/iconContext.js +12 -0
- package/dist/esm/polyfills.js +17 -0
- package/dist/esm/route.js +154 -0
- package/dist/esm/styles.css +7 -0
- package/dist/esm/styles.css.map +1 -0
- package/dist/esm/types/EditableContent.d.ts +9 -0
- package/dist/esm/types/HtmlViewer.d.ts +10 -0
- package/dist/esm/types/PdfComponents.d.ts +35 -0
- package/dist/esm/types/PdfDocumentViewer.d.ts +22 -0
- package/dist/esm/types/Structura.d.ts +11 -0
- package/dist/esm/types/Table.d.ts +12 -0
- package/dist/esm/types/TableCell.d.ts +13 -0
- package/dist/esm/types/accuracy.d.ts +23 -0
- package/dist/esm/types/accuracyMetrics.d.ts +5 -0
- package/dist/esm/types/helpers/flattenJSON.d.ts +1 -0
- package/dist/esm/types/helpers/hardMerging.d.ts +2 -0
- package/dist/esm/types/helpers/index.d.ts +6 -0
- package/dist/esm/types/helpers/jsonToHtml.d.ts +40 -0
- package/dist/esm/types/helpers/preprocessData.d.ts +3 -0
- package/dist/esm/types/helpers/removeMetadata.d.ts +1 -0
- package/dist/esm/types/helpers/tableProcessor.d.ts +1 -0
- package/dist/esm/types/index.d.ts +3 -0
- package/dist/esm/types/lib/polyfills.d.ts +1 -0
- package/dist/esm/types/lib/utils.d.ts +2 -0
- package/dist/esm/types/polyfills.d.ts +1 -0
- package/dist/esm/types/route.d.ts +45 -0
- package/dist/esm/types/test-app/src/App.d.ts +4 -0
- package/dist/esm/types/test-app/src/main.d.ts +1 -0
- package/dist/esm/types/test-app/vite.config.d.ts +2 -0
- package/dist/esm/types/types.d.ts +23 -0
- package/dist/esm/types/ui/alert.d.ts +8 -0
- package/dist/esm/types/ui/badge.d.ts +9 -0
- package/dist/esm/types/ui/button.d.ts +11 -0
- package/dist/esm/types/ui/card.d.ts +8 -0
- package/dist/esm/types/ui/progress.d.ts +6 -0
- package/dist/esm/types/ui/scroll-area.d.ts +5 -0
- package/dist/esm/types/ui/skeleton.d.ts +2 -0
- package/dist/esm/types/ui/tabs.d.ts +7 -0
- package/dist/esm/types/worker.d.ts +1 -0
- package/dist/esm/ui/badge.js +31 -0
- package/dist/esm/ui/button.js +50 -0
- package/dist/esm/ui/card.js +67 -0
- package/dist/esm/ui/progress.js +26 -0
- package/dist/esm/ui/scroll-area.js +45 -0
- package/dist/esm/ui/tabs.js +39 -0
- package/dist/esm/worker.js +50 -0
- package/dist/index.d.ts +38 -0
- package/package.json +85 -0
- package/server/README.md +203 -0
- package/server/db.js +142 -0
- package/server/server.js +165 -0
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, '__esModule', { value: true });
|
|
4
|
+
|
|
5
|
+
var jsxRuntime = require('react/jsx-runtime');
|
|
6
|
+
var React = require('react');
|
|
7
|
+
var TableCell = require('./TableCell.js');
|
|
8
|
+
|
|
9
|
+
// Utility function to clean HTML content
|
|
10
|
+
const cleanHtml = html => {
|
|
11
|
+
if (!html) return "";
|
|
12
|
+
// Log original HTML content
|
|
13
|
+
if (html.includes("strikethrough")) {
|
|
14
|
+
console.log("Table - Original HTML with strikethrough:", html);
|
|
15
|
+
}
|
|
16
|
+
// Remove newline characters
|
|
17
|
+
const cleanedHtml = html.replace(/\n/g, " ").replace(/<br\s*\/?>/g, " ")
|
|
18
|
+
// Replace non-standard strikethrough tag with standard HTML5 <s> tag
|
|
19
|
+
.replace(/<strikethrough>/g, "<s>").replace(/<\/strikethrough>/g, "</s>")
|
|
20
|
+
// Remove consecutive spaces
|
|
21
|
+
.replace(/\s+/g, " ")
|
|
22
|
+
// Trim leading and trailing spaces
|
|
23
|
+
.trim();
|
|
24
|
+
// Log transformed HTML content
|
|
25
|
+
if (html.includes("strikethrough")) {
|
|
26
|
+
console.log("Table - Transformed HTML with <s> tags:", cleanedHtml);
|
|
27
|
+
}
|
|
28
|
+
return cleanedHtml;
|
|
29
|
+
};
|
|
30
|
+
function Table({
|
|
31
|
+
node,
|
|
32
|
+
selectedBboxId,
|
|
33
|
+
onJsonClick,
|
|
34
|
+
onContentChange,
|
|
35
|
+
mergedTables = [],
|
|
36
|
+
hasLlmHtml = false,
|
|
37
|
+
showJsonIcons = true,
|
|
38
|
+
onNodeClick
|
|
39
|
+
}) {
|
|
40
|
+
const [useLlmHtml, setUseLlmHtml] = React.useState(false);
|
|
41
|
+
// Get the appropriate HTML content
|
|
42
|
+
const getHtmlContent = node => {
|
|
43
|
+
if (useLlmHtml && node.llm_table_html) {
|
|
44
|
+
return cleanHtml(node.llm_table_html);
|
|
45
|
+
}
|
|
46
|
+
return cleanHtml(node.html || "");
|
|
47
|
+
};
|
|
48
|
+
// Check if we should use individual TableCell contents
|
|
49
|
+
const shouldUseTableCellContents = () => {
|
|
50
|
+
// Check if any of the TableCell children has llm_table_html
|
|
51
|
+
if (!node.children) return false;
|
|
52
|
+
// TODO: to be fixed later
|
|
53
|
+
return node.children.some(child => child.block_type === "TableCell" && child.llm_table_html !== undefined);
|
|
54
|
+
};
|
|
55
|
+
// Process the main table
|
|
56
|
+
const processTable = tableNode => {
|
|
57
|
+
const tempDiv = document.createElement("div");
|
|
58
|
+
tempDiv.innerHTML = getHtmlContent(tableNode);
|
|
59
|
+
const tableElement = tempDiv.querySelector("table");
|
|
60
|
+
if (!tableElement) return null;
|
|
61
|
+
// Get all rows
|
|
62
|
+
const rows = Array.from(tableElement.querySelectorAll("tr"));
|
|
63
|
+
return {
|
|
64
|
+
rows,
|
|
65
|
+
tableElement,
|
|
66
|
+
tempDiv
|
|
67
|
+
};
|
|
68
|
+
};
|
|
69
|
+
const mainTable = processTable(node);
|
|
70
|
+
if (!mainTable) return null;
|
|
71
|
+
// Process merged tables if any
|
|
72
|
+
const mergedRows = [];
|
|
73
|
+
mergedTables.forEach(table => {
|
|
74
|
+
const processedTable = processTable(table);
|
|
75
|
+
if (processedTable) {
|
|
76
|
+
mergedRows.push(...processedTable.rows);
|
|
77
|
+
}
|
|
78
|
+
});
|
|
79
|
+
// Combine rows from main table and merged tables
|
|
80
|
+
const allRows = [...mainTable.rows, ...mergedRows];
|
|
81
|
+
// Map cells to children
|
|
82
|
+
const processedRows = allRows.map((row, rowIndex) => {
|
|
83
|
+
const cells = Array.from(row.querySelectorAll("th, td"));
|
|
84
|
+
return cells.map((cell, colIndex) => {
|
|
85
|
+
var _a;
|
|
86
|
+
const globalCellIndex = rowIndex * cells.length + colIndex;
|
|
87
|
+
const childNode = (_a = node.children) === null || _a === void 0 ? void 0 : _a[globalCellIndex];
|
|
88
|
+
let cellContent = cleanHtml(cell.innerHTML);
|
|
89
|
+
const isHeaderCell = cell.tagName.toLowerCase() === "th"; // Check if this is a header cell
|
|
90
|
+
// If we should use TableCell content and we have a childNode with llm_table_html
|
|
91
|
+
if (shouldUseTableCellContents() && childNode && childNode.llm_table_html !== undefined) {
|
|
92
|
+
cellContent = useLlmHtml && childNode.llm_table_html ? cleanHtml(childNode.llm_table_html) : cleanHtml(childNode.html || "");
|
|
93
|
+
}
|
|
94
|
+
// TODO: to be fixed later - only apply dubious highlighting if the cell has llm_table_html
|
|
95
|
+
const isDubious = (childNode === null || childNode === void 0 ? void 0 : childNode.dubious) && (childNode === null || childNode === void 0 ? void 0 : childNode.llm_table_html) !== undefined ? true : false;
|
|
96
|
+
return {
|
|
97
|
+
id: (childNode === null || childNode === void 0 ? void 0 : childNode.id) || `${node.id}-cell-${globalCellIndex}`,
|
|
98
|
+
content: cellContent,
|
|
99
|
+
isHeader: isHeaderCell,
|
|
100
|
+
isDubious: isDubious,
|
|
101
|
+
nodeData: childNode || {
|
|
102
|
+
id: `${node.id}-cell-${globalCellIndex}`,
|
|
103
|
+
html: cellContent,
|
|
104
|
+
block_type: "TableCell"
|
|
105
|
+
}
|
|
106
|
+
};
|
|
107
|
+
});
|
|
108
|
+
});
|
|
109
|
+
const handleCellContentChange = (cellId, newContent) => {
|
|
110
|
+
if (onContentChange) {
|
|
111
|
+
onContentChange(cellId, newContent);
|
|
112
|
+
}
|
|
113
|
+
};
|
|
114
|
+
const handleTableClick = e => {
|
|
115
|
+
// Only process clicks directly on the table container, not bubbled events from cells
|
|
116
|
+
if (e.target === e.currentTarget && node.id && onNodeClick) {
|
|
117
|
+
onNodeClick(node.id);
|
|
118
|
+
}
|
|
119
|
+
};
|
|
120
|
+
return jsxRuntime.jsxs("div", {
|
|
121
|
+
onClick: handleTableClick,
|
|
122
|
+
className: onNodeClick ? "cursor-pointer" : "",
|
|
123
|
+
children: [hasLlmHtml && jsxRuntime.jsx("div", {
|
|
124
|
+
className: "mb-2 flex justify-end",
|
|
125
|
+
onClick: e => e.stopPropagation(),
|
|
126
|
+
children: jsxRuntime.jsxs("label", {
|
|
127
|
+
className: "inline-flex items-center cursor-pointer",
|
|
128
|
+
children: [jsxRuntime.jsx("input", {
|
|
129
|
+
type: "checkbox",
|
|
130
|
+
checked: useLlmHtml,
|
|
131
|
+
onChange: () => setUseLlmHtml(!useLlmHtml),
|
|
132
|
+
className: "sr-only peer"
|
|
133
|
+
}), jsxRuntime.jsx("div", {
|
|
134
|
+
className: "relative w-11 h-6 bg-gray-200 peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-blue-300 rounded-full peer peer-checked:after:translate-x-full rtl:peer-checked:after:-translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:start-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-blue-600"
|
|
135
|
+
}), jsxRuntime.jsx("span", {
|
|
136
|
+
className: "ms-3 text-sm font-medium text-gray-900",
|
|
137
|
+
children: "Agentic"
|
|
138
|
+
})]
|
|
139
|
+
})
|
|
140
|
+
}), jsxRuntime.jsx("div", {
|
|
141
|
+
className: "w-full overflow-x-auto",
|
|
142
|
+
children: jsxRuntime.jsx("table", {
|
|
143
|
+
className: "min-w-full divide-y divide-gray-200",
|
|
144
|
+
children: jsxRuntime.jsx("tbody", {
|
|
145
|
+
children: processedRows.map((row, rowIndex) => jsxRuntime.jsx("tr", {
|
|
146
|
+
children: row.map(cell => jsxRuntime.jsx(TableCell.default, {
|
|
147
|
+
id: cell.id,
|
|
148
|
+
content: cell.content,
|
|
149
|
+
onJsonClick: () => onJsonClick(cell.nodeData),
|
|
150
|
+
isSelected: cell.id === selectedBboxId,
|
|
151
|
+
isHeader: cell.isHeader,
|
|
152
|
+
isDubious: cell.isDubious,
|
|
153
|
+
onContentChange: onContentChange ? newContent => handleCellContentChange(cell.id, newContent) : undefined,
|
|
154
|
+
showJsonIcons: showJsonIcons,
|
|
155
|
+
onNodeClick: onNodeClick ? () => onNodeClick(cell.id) : undefined
|
|
156
|
+
}, cell.id))
|
|
157
|
+
}, rowIndex))
|
|
158
|
+
})
|
|
159
|
+
})
|
|
160
|
+
})]
|
|
161
|
+
});
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
exports.default = Table;
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, '__esModule', { value: true });
|
|
4
|
+
|
|
5
|
+
var jsxRuntime = require('react/jsx-runtime');
|
|
6
|
+
var React = require('react');
|
|
7
|
+
var vsc = require('react-icons/vsc');
|
|
8
|
+
|
|
9
|
+
// Utility function to clean HTML content
|
|
10
|
+
const cleanHtml = html => {
|
|
11
|
+
if (!html) return "";
|
|
12
|
+
// Log original HTML content
|
|
13
|
+
if (html.includes("strikethrough")) {
|
|
14
|
+
console.log("TableCell - Original HTML with strikethrough:", html);
|
|
15
|
+
}
|
|
16
|
+
// Remove newline characters
|
|
17
|
+
const cleanedHtml = html.replace(/\n/g, " ").replace(/<br\s*\/?>/g, " ")
|
|
18
|
+
// Replace non-standard strikethrough tag with standard HTML5 <s> tag
|
|
19
|
+
.replace(/<strikethrough>/g, "<s>").replace(/<\/strikethrough>/g, "</s>")
|
|
20
|
+
// Remove consecutive spaces
|
|
21
|
+
.replace(/\s+/g, " ")
|
|
22
|
+
// Trim leading and trailing spaces
|
|
23
|
+
.trim();
|
|
24
|
+
// Log transformed HTML content
|
|
25
|
+
if (html.includes("strikethrough")) {
|
|
26
|
+
console.log("TableCell - Transformed HTML with <s> tags:", cleanedHtml);
|
|
27
|
+
}
|
|
28
|
+
return cleanedHtml;
|
|
29
|
+
};
|
|
30
|
+
function TableCell({
|
|
31
|
+
id,
|
|
32
|
+
content,
|
|
33
|
+
onJsonClick,
|
|
34
|
+
isSelected,
|
|
35
|
+
isHeader = false,
|
|
36
|
+
onContentChange,
|
|
37
|
+
showJsonIcons = true,
|
|
38
|
+
onNodeClick,
|
|
39
|
+
isDubious = false
|
|
40
|
+
}) {
|
|
41
|
+
const [isEditing, setIsEditing] = React.useState(false);
|
|
42
|
+
const [editedContent, setEditedContent] = React.useState(cleanHtml(content));
|
|
43
|
+
const [isEdited, setIsEdited] = React.useState(false);
|
|
44
|
+
const CellComponent = isHeader ? "th" : "td";
|
|
45
|
+
// Update content when props change, but preserve edited state
|
|
46
|
+
React.useEffect(() => {
|
|
47
|
+
if (!isEdited) {
|
|
48
|
+
setEditedContent(cleanHtml(content));
|
|
49
|
+
}
|
|
50
|
+
}, [content, isEdited]);
|
|
51
|
+
// Log content that contains strikethrough when component renders
|
|
52
|
+
React.useEffect(() => {
|
|
53
|
+
if (content.includes("strikethrough")) {
|
|
54
|
+
console.log("TableCell rendering - Cell ID:", id);
|
|
55
|
+
console.log("Original content:", content);
|
|
56
|
+
console.log("Edited content being displayed:", editedContent);
|
|
57
|
+
}
|
|
58
|
+
}, [id, content, editedContent]);
|
|
59
|
+
const handleDoubleClick = () => {
|
|
60
|
+
if (onContentChange) {
|
|
61
|
+
setIsEditing(true);
|
|
62
|
+
}
|
|
63
|
+
};
|
|
64
|
+
const handleBlur = () => {
|
|
65
|
+
setIsEditing(false);
|
|
66
|
+
if (editedContent !== content && onContentChange) {
|
|
67
|
+
setIsEdited(true);
|
|
68
|
+
onContentChange(id, editedContent);
|
|
69
|
+
}
|
|
70
|
+
};
|
|
71
|
+
const handleClick = e => {
|
|
72
|
+
if (!isEditing && onNodeClick) {
|
|
73
|
+
onNodeClick();
|
|
74
|
+
}
|
|
75
|
+
};
|
|
76
|
+
return jsxRuntime.jsx(CellComponent, {
|
|
77
|
+
id: id,
|
|
78
|
+
className: `structura-px-4 structura-py-2 structura-border structura-border-gray-200 structura-transition-colors structura-relative structura-group
|
|
79
|
+
${isSelected ? "structura-bg-blue-100 structura-border-2 structura-border-blue-500" : ""}
|
|
80
|
+
${isHeader ? "structura-bg-gray-100 structura-font-bold structura-text-gray-800" : ""}
|
|
81
|
+
${isEdited ? "structura-bg-yellow-100" : ""}
|
|
82
|
+
${isDubious ? "structura-bg-red-500/20" : ""}
|
|
83
|
+
${onNodeClick && !isEditing ? "structura-cursor-pointer hover:structura-bg-gray-50" : ""}`,
|
|
84
|
+
onDoubleClick: handleDoubleClick,
|
|
85
|
+
onClick: handleClick,
|
|
86
|
+
children: jsxRuntime.jsxs("div", {
|
|
87
|
+
className: "structura-w-full structura-relative",
|
|
88
|
+
children: [showJsonIcons && jsxRuntime.jsx("button", {
|
|
89
|
+
onClick: e => {
|
|
90
|
+
e.stopPropagation();
|
|
91
|
+
onJsonClick();
|
|
92
|
+
},
|
|
93
|
+
className: "structura-absolute -structura-right-1 structura-top-0 structura-opacity-0 group-hover:structura-opacity-100 structura-transition-opacity structura-text-gray-500 hover:structura-text-gray-700",
|
|
94
|
+
title: "View JSON",
|
|
95
|
+
children: jsxRuntime.jsx(vsc.VscJson, {
|
|
96
|
+
size: 16
|
|
97
|
+
})
|
|
98
|
+
}), isEditing ? jsxRuntime.jsx("textarea", {
|
|
99
|
+
value: editedContent,
|
|
100
|
+
onChange: e => setEditedContent(e.target.value),
|
|
101
|
+
onBlur: handleBlur,
|
|
102
|
+
autoFocus: true,
|
|
103
|
+
className: "structura-w-full structura-min-h-[24px] structura-p-1 structura-border structura-border-blue-400 structura-rounded focus:structura-outline-none focus:structura-ring-2 focus:structura-ring-blue-500",
|
|
104
|
+
onClick: e => e.stopPropagation()
|
|
105
|
+
}) : jsxRuntime.jsx("div", {
|
|
106
|
+
dangerouslySetInnerHTML: {
|
|
107
|
+
__html: editedContent
|
|
108
|
+
},
|
|
109
|
+
className: "structura-min-w-0 structura-break-words"
|
|
110
|
+
})]
|
|
111
|
+
})
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
exports.default = TableCell;
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const calculateDifferences = (original, updated) => {
|
|
4
|
+
const cleanText = text => text.replace(/<[^>]*>/g, "").trim();
|
|
5
|
+
const originalClean = cleanText(original);
|
|
6
|
+
const updatedClean = cleanText(updated);
|
|
7
|
+
const originalWords = originalClean.split(/\s+/);
|
|
8
|
+
const updatedWords = updatedClean.split(/\s+/);
|
|
9
|
+
return {
|
|
10
|
+
charactersDifferent: Math.abs(updatedClean.length - originalClean.length),
|
|
11
|
+
wordsDifferent: Math.abs(updatedWords.length - originalWords.length),
|
|
12
|
+
// Levenshtein distance for more accurate character difference
|
|
13
|
+
editDistance: levenshteinDistance(originalClean, updatedClean)
|
|
14
|
+
};
|
|
15
|
+
};
|
|
16
|
+
function levenshteinDistance(str1, str2) {
|
|
17
|
+
const m = str1.length;
|
|
18
|
+
const n = str2.length;
|
|
19
|
+
const dp = Array(m + 1).fill(0).map(() => Array(n + 1).fill(0));
|
|
20
|
+
for (let i = 0; i <= m; i++) dp[i][0] = i;
|
|
21
|
+
for (let j = 0; j <= n; j++) dp[0][j] = j;
|
|
22
|
+
for (let i = 1; i <= m; i++) {
|
|
23
|
+
for (let j = 1; j <= n; j++) {
|
|
24
|
+
if (str1[i - 1] === str2[j - 1]) {
|
|
25
|
+
dp[i][j] = dp[i - 1][j - 1];
|
|
26
|
+
} else {
|
|
27
|
+
dp[i][j] = 1 + Math.min(dp[i - 1][j],
|
|
28
|
+
// deletion
|
|
29
|
+
dp[i][j - 1],
|
|
30
|
+
// insertion
|
|
31
|
+
dp[i - 1][j - 1] // substitution
|
|
32
|
+
);
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
return dp[m][n];
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
exports.calculateDifferences = calculateDifferences;
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Helper function to sort table cells by page number and bounding box coordinates
|
|
4
|
+
function sortTableCells(tableCells) {
|
|
5
|
+
// console.log("Table cells to sort:", tableCells);
|
|
6
|
+
// Helper function to extract page number and block number from ID (e.g., '/page/2/TableCell/10')
|
|
7
|
+
const getPageAndBlockNumber = id => {
|
|
8
|
+
const match = id.match(/^\/page\/(\d+)\/TableCell\/(\d+)$/);
|
|
9
|
+
if (match) {
|
|
10
|
+
return [parseInt(match[1]), parseInt(match[2])];
|
|
11
|
+
}
|
|
12
|
+
return [0, 0]; // Default to (0,0) if pattern not found
|
|
13
|
+
};
|
|
14
|
+
// Sort by page number first, then by block number
|
|
15
|
+
tableCells.sort((a, b) => {
|
|
16
|
+
const [pageA, blockA] = getPageAndBlockNumber(a.id);
|
|
17
|
+
const [pageB, blockB] = getPageAndBlockNumber(b.id);
|
|
18
|
+
return pageA !== pageB ? pageA - pageB : blockA - blockB;
|
|
19
|
+
});
|
|
20
|
+
// console.log("Table cells sorted:", tableCells);
|
|
21
|
+
return tableCells;
|
|
22
|
+
}
|
|
23
|
+
// Strikethrough correction function
|
|
24
|
+
function strikethroughCorrection(jsonData) {
|
|
25
|
+
// Deep clone to avoid mutating the original
|
|
26
|
+
const modifiedJson = JSON.parse(JSON.stringify(jsonData));
|
|
27
|
+
// Helper function to process HTML content
|
|
28
|
+
const processHtml = html => {
|
|
29
|
+
if (!html) return html;
|
|
30
|
+
return html.replace(/<strikethrough>/g, "<del>").replace(/<\/strikethrough>/g, "</del>");
|
|
31
|
+
};
|
|
32
|
+
// Helper function to recursively process nodes
|
|
33
|
+
const processNode = node => {
|
|
34
|
+
// Process HTML content if it exists
|
|
35
|
+
if (node.html) {
|
|
36
|
+
node.html = processHtml(node.html);
|
|
37
|
+
}
|
|
38
|
+
// Process LLM table HTML if it exists
|
|
39
|
+
if (node.llm_table_html) {
|
|
40
|
+
node.llm_table_html = processHtml(node.llm_table_html);
|
|
41
|
+
}
|
|
42
|
+
// Process children recursively
|
|
43
|
+
if (node.children && Array.isArray(node.children)) {
|
|
44
|
+
node.children.forEach(processNode);
|
|
45
|
+
}
|
|
46
|
+
};
|
|
47
|
+
// Start processing from the root
|
|
48
|
+
processNode(modifiedJson);
|
|
49
|
+
return modifiedJson;
|
|
50
|
+
}
|
|
51
|
+
// Remove picture nodes function
|
|
52
|
+
function removePictureNodes(jsonData) {
|
|
53
|
+
// Deep clone to avoid mutating the original
|
|
54
|
+
const modifiedJson = JSON.parse(JSON.stringify(jsonData));
|
|
55
|
+
// Helper function to recursively filter out picture nodes
|
|
56
|
+
const processPictureNodes = node => {
|
|
57
|
+
// If the node has children, filter out Picture nodes and process remaining children
|
|
58
|
+
if (node.children && Array.isArray(node.children)) {
|
|
59
|
+
node.children = node.children.filter(child => child.block_type !== "Picture").map(processPictureNodes);
|
|
60
|
+
}
|
|
61
|
+
return node;
|
|
62
|
+
};
|
|
63
|
+
// Start processing from the root
|
|
64
|
+
processPictureNodes(modifiedJson);
|
|
65
|
+
return modifiedJson;
|
|
66
|
+
}
|
|
67
|
+
// MAP 'html' and 'llm_table_html' (if present) of block_type 'Table' to their respective children i.e 'TableCell'
|
|
68
|
+
function mapTableChildren(jsonData) {
|
|
69
|
+
// Skip if there's no JSON data or no pages
|
|
70
|
+
if (!jsonData || !jsonData.children) {
|
|
71
|
+
console.log("No valid JSON data to process");
|
|
72
|
+
return jsonData;
|
|
73
|
+
}
|
|
74
|
+
// Create a deep copy to avoid modifying the original
|
|
75
|
+
const modifiedJsonData = JSON.parse(JSON.stringify(jsonData));
|
|
76
|
+
// Process each page in the document
|
|
77
|
+
modifiedJsonData.children = modifiedJsonData.children.map(page => {
|
|
78
|
+
// Skip if page has no children
|
|
79
|
+
if (!page.children) {
|
|
80
|
+
return page;
|
|
81
|
+
}
|
|
82
|
+
// Process each block in the page
|
|
83
|
+
page.children = page.children.map(block => {
|
|
84
|
+
// Only process Table and TableOfContents blocks
|
|
85
|
+
if (block.block_type === "Table" || block.block_type === "TableOfContents") {
|
|
86
|
+
// Skip if block has no children or no HTML
|
|
87
|
+
if (!block.children || !block.html) {
|
|
88
|
+
return block;
|
|
89
|
+
}
|
|
90
|
+
// Get all TableCell children and sort them by position
|
|
91
|
+
const tableCells = sortTableCells(block.children.filter(cell => cell.block_type === "TableCell"));
|
|
92
|
+
// Skip if no table cells found
|
|
93
|
+
if (tableCells.length === 0) {
|
|
94
|
+
return block;
|
|
95
|
+
}
|
|
96
|
+
// Create DOM parser to extract td/th elements from table HTML
|
|
97
|
+
const parser = new DOMParser();
|
|
98
|
+
// Process main HTML
|
|
99
|
+
const htmlDoc = parser.parseFromString(block.html, "text/html");
|
|
100
|
+
const htmlCells = Array.from(htmlDoc.querySelectorAll("td, th"));
|
|
101
|
+
// Process LLM table HTML if it exists
|
|
102
|
+
let llmHtmlCells = [];
|
|
103
|
+
if (block.llm_table_html) {
|
|
104
|
+
const llmHtmlDoc = parser.parseFromString(block.llm_table_html, "text/html");
|
|
105
|
+
llmHtmlCells = Array.from(llmHtmlDoc.querySelectorAll("td, th"));
|
|
106
|
+
}
|
|
107
|
+
// Safety check: log warning if HTML cells count doesn't match TableCell nodes
|
|
108
|
+
if (htmlCells.length !== tableCells.length) {
|
|
109
|
+
console.warn(`Warning: Cell count mismatch in ${block.block_type} ${block.id}. ` + `JSON has ${tableCells.length} cells, HTML has ${htmlCells.length} cells.`);
|
|
110
|
+
}
|
|
111
|
+
// Map HTML content to table cells
|
|
112
|
+
const maxCells = Math.min(tableCells.length, htmlCells.length);
|
|
113
|
+
for (let i = 0; i < maxCells; i++) {
|
|
114
|
+
// Update the table cell's HTML with content from the HTML table
|
|
115
|
+
tableCells[i].html = htmlCells[i].outerHTML;
|
|
116
|
+
// If LLM table HTML exists and has matching cell, update that too
|
|
117
|
+
if (block.llm_table_html && i < llmHtmlCells.length) {
|
|
118
|
+
tableCells[i].llm_table_html = llmHtmlCells[i].outerHTML;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
return block;
|
|
123
|
+
});
|
|
124
|
+
return page;
|
|
125
|
+
});
|
|
126
|
+
console.log("Finished mapping table HTML to table cells");
|
|
127
|
+
return modifiedJsonData;
|
|
128
|
+
}
|
|
129
|
+
function preprocessData(jsonData) {
|
|
130
|
+
// Replace 'strikethrough' with 'del'
|
|
131
|
+
let processedJson = strikethroughCorrection(jsonData);
|
|
132
|
+
// Remove picture nodes
|
|
133
|
+
processedJson = removePictureNodes(processedJson);
|
|
134
|
+
// TODO: Uncomment this after correcting the the table enrichment and mapping.
|
|
135
|
+
// // Enrich tables with missing cells
|
|
136
|
+
// processedJson = processTablesForEnrichment(processedJson);
|
|
137
|
+
// // Map table HTML to table cells
|
|
138
|
+
processedJson = mapTableChildren(processedJson);
|
|
139
|
+
return processedJson;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
exports.preprocessData = preprocessData;
|
|
143
|
+
exports.sortTableCells = sortTableCells;
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Polyfill for Promise.withResolvers
|
|
4
|
+
// This is needed for Next.js build compatibility
|
|
5
|
+
if (typeof Promise.withResolvers !== 'function') {
|
|
6
|
+
Promise.withResolvers = function () {
|
|
7
|
+
let resolve;
|
|
8
|
+
let reject;
|
|
9
|
+
const promise = new Promise((res, rej) => {
|
|
10
|
+
resolve = res;
|
|
11
|
+
reject = rej;
|
|
12
|
+
});
|
|
13
|
+
return { promise, resolve, reject };
|
|
14
|
+
};
|
|
15
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
var iconBase = require('../lib/esm/iconBase.js');
|
|
4
|
+
require('../lib/esm/iconContext.js');
|
|
5
|
+
|
|
6
|
+
// THIS FILE IS AUTO GENERATED
|
|
7
|
+
function FaChartBar (props) {
|
|
8
|
+
return iconBase.GenIcon({"attr":{"viewBox":"0 0 512 512"},"child":[{"tag":"path","attr":{"d":"M332.8 320h38.4c6.4 0 12.8-6.4 12.8-12.8V172.8c0-6.4-6.4-12.8-12.8-12.8h-38.4c-6.4 0-12.8 6.4-12.8 12.8v134.4c0 6.4 6.4 12.8 12.8 12.8zm96 0h38.4c6.4 0 12.8-6.4 12.8-12.8V76.8c0-6.4-6.4-12.8-12.8-12.8h-38.4c-6.4 0-12.8 6.4-12.8 12.8v230.4c0 6.4 6.4 12.8 12.8 12.8zm-288 0h38.4c6.4 0 12.8-6.4 12.8-12.8v-70.4c0-6.4-6.4-12.8-12.8-12.8h-38.4c-6.4 0-12.8 6.4-12.8 12.8v70.4c0 6.4 6.4 12.8 12.8 12.8zm96 0h38.4c6.4 0 12.8-6.4 12.8-12.8V108.8c0-6.4-6.4-12.8-12.8-12.8h-38.4c-6.4 0-12.8 6.4-12.8 12.8v198.4c0 6.4 6.4 12.8 12.8 12.8zM496 384H64V80c0-8.84-7.16-16-16-16H16C7.16 64 0 71.16 0 80v336c0 17.67 14.33 32 32 32h464c8.84 0 16-7.16 16-16v-32c0-8.84-7.16-16-16-16z"}}]})(props);
|
|
9
|
+
}function FaFileDownload (props) {
|
|
10
|
+
return iconBase.GenIcon({"attr":{"viewBox":"0 0 384 512"},"child":[{"tag":"path","attr":{"d":"M224 136V0H24C10.7 0 0 10.7 0 24v464c0 13.3 10.7 24 24 24h336c13.3 0 24-10.7 24-24V160H248c-13.2 0-24-10.8-24-24zm76.45 211.36l-96.42 95.7c-6.65 6.61-17.39 6.61-24.04 0l-96.42-95.7C73.42 337.29 80.54 320 94.82 320H160v-80c0-8.84 7.16-16 16-16h32c8.84 0 16 7.16 16 16v80h65.18c14.28 0 21.4 17.29 11.27 27.36zM377 105L279.1 7c-4.5-4.5-10.6-7-17-7H256v128h128v-6.1c0-6.3-2.5-12.4-7-16.9z"}}]})(props);
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
exports.FaChartBar = FaChartBar;
|
|
14
|
+
exports.FaFileDownload = FaFileDownload;
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
var React = require('react');
|
|
4
|
+
var iconContext = require('./iconContext.js');
|
|
5
|
+
|
|
6
|
+
var __assign = undefined && undefined.__assign || function () {
|
|
7
|
+
__assign = Object.assign || function (t) {
|
|
8
|
+
for (var s, i = 1, n = arguments.length; i < n; i++) {
|
|
9
|
+
s = arguments[i];
|
|
10
|
+
for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p)) t[p] = s[p];
|
|
11
|
+
}
|
|
12
|
+
return t;
|
|
13
|
+
};
|
|
14
|
+
return __assign.apply(this, arguments);
|
|
15
|
+
};
|
|
16
|
+
var __rest = undefined && undefined.__rest || function (s, e) {
|
|
17
|
+
var t = {};
|
|
18
|
+
for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p) && e.indexOf(p) < 0) t[p] = s[p];
|
|
19
|
+
if (s != null && typeof Object.getOwnPropertySymbols === "function") for (var i = 0, p = Object.getOwnPropertySymbols(s); i < p.length; i++) {
|
|
20
|
+
if (e.indexOf(p[i]) < 0 && Object.prototype.propertyIsEnumerable.call(s, p[i])) t[p[i]] = s[p[i]];
|
|
21
|
+
}
|
|
22
|
+
return t;
|
|
23
|
+
};
|
|
24
|
+
function Tree2Element(tree) {
|
|
25
|
+
return tree && tree.map(function (node, i) {
|
|
26
|
+
return React.createElement(node.tag, __assign({
|
|
27
|
+
key: i
|
|
28
|
+
}, node.attr), Tree2Element(node.child));
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
function GenIcon(data) {
|
|
32
|
+
// eslint-disable-next-line react/display-name
|
|
33
|
+
return function (props) {
|
|
34
|
+
return React.createElement(IconBase, __assign({
|
|
35
|
+
attr: __assign({}, data.attr)
|
|
36
|
+
}, props), Tree2Element(data.child));
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
function IconBase(props) {
|
|
40
|
+
var elem = function (conf) {
|
|
41
|
+
var attr = props.attr,
|
|
42
|
+
size = props.size,
|
|
43
|
+
title = props.title,
|
|
44
|
+
svgProps = __rest(props, ["attr", "size", "title"]);
|
|
45
|
+
var computedSize = size || conf.size || "1em";
|
|
46
|
+
var className;
|
|
47
|
+
if (conf.className) className = conf.className;
|
|
48
|
+
if (props.className) className = (className ? className + " " : "") + props.className;
|
|
49
|
+
return React.createElement("svg", __assign({
|
|
50
|
+
stroke: "currentColor",
|
|
51
|
+
fill: "currentColor",
|
|
52
|
+
strokeWidth: "0"
|
|
53
|
+
}, conf.attr, attr, svgProps, {
|
|
54
|
+
className: className,
|
|
55
|
+
style: __assign(__assign({
|
|
56
|
+
color: props.color || conf.color
|
|
57
|
+
}, conf.style), props.style),
|
|
58
|
+
height: computedSize,
|
|
59
|
+
width: computedSize,
|
|
60
|
+
xmlns: "http://www.w3.org/2000/svg"
|
|
61
|
+
}), title && React.createElement("title", null, title), props.children);
|
|
62
|
+
};
|
|
63
|
+
return iconContext.IconContext !== undefined ? React.createElement(iconContext.IconContext.Consumer, null, function (conf) {
|
|
64
|
+
return elem(conf);
|
|
65
|
+
}) : elem(iconContext.DefaultContext);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
exports.GenIcon = GenIcon;
|
|
69
|
+
exports.IconBase = IconBase;
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
var React = require('react');
|
|
4
|
+
|
|
5
|
+
var DefaultContext = {
|
|
6
|
+
color: undefined,
|
|
7
|
+
size: undefined,
|
|
8
|
+
className: undefined,
|
|
9
|
+
style: undefined,
|
|
10
|
+
attr: undefined
|
|
11
|
+
};
|
|
12
|
+
var IconContext = React.createContext && React.createContext(DefaultContext);
|
|
13
|
+
|
|
14
|
+
exports.DefaultContext = DefaultContext;
|
|
15
|
+
exports.IconContext = IconContext;
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Polyfill for Promise.withResolvers
|
|
4
|
+
// This is needed for Next.js build compatibility
|
|
5
|
+
if (typeof Promise.withResolvers !== 'function') {
|
|
6
|
+
Promise.withResolvers = function () {
|
|
7
|
+
let resolve;
|
|
8
|
+
let reject;
|
|
9
|
+
const promise = new Promise((res, rej) => {
|
|
10
|
+
resolve = res;
|
|
11
|
+
reject = rej;
|
|
12
|
+
});
|
|
13
|
+
return {
|
|
14
|
+
promise,
|
|
15
|
+
resolve,
|
|
16
|
+
reject
|
|
17
|
+
};
|
|
18
|
+
};
|
|
19
|
+
}
|