@tfw.in/structura-lib 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/PRODUCTION_ARCHITECTURE.md +511 -0
- package/README.md +379 -0
- package/SAVE_FUNCTIONALITY_COMPLETE.md +448 -0
- package/dist/cjs/EditableContent.js +150 -0
- package/dist/cjs/HtmlViewer.js +587 -0
- package/dist/cjs/PdfComponents.js +16 -0
- package/dist/cjs/PdfDocumentViewer.js +281 -0
- package/dist/cjs/Structura.js +806 -0
- package/dist/cjs/Table.js +164 -0
- package/dist/cjs/TableCell.js +115 -0
- package/dist/cjs/accuracyMetrics.js +39 -0
- package/dist/cjs/helpers/preprocessData.js +143 -0
- package/dist/cjs/index.js +7 -0
- package/dist/cjs/lib/polyfills.js +15 -0
- package/dist/cjs/lib/utils.js +10 -0
- package/dist/cjs/node_modules/react-icons/fa/index.esm.js +14 -0
- package/dist/cjs/node_modules/react-icons/lib/esm/iconBase.js +69 -0
- package/dist/cjs/node_modules/react-icons/lib/esm/iconContext.js +15 -0
- package/dist/cjs/polyfills.js +19 -0
- package/dist/cjs/route.js +102 -0
- package/dist/cjs/styles.css +7 -0
- package/dist/cjs/styles.css.map +1 -0
- package/dist/cjs/ui/badge.js +34 -0
- package/dist/cjs/ui/button.js +71 -0
- package/dist/cjs/ui/card.js +86 -0
- package/dist/cjs/ui/progress.js +45 -0
- package/dist/cjs/ui/scroll-area.js +62 -0
- package/dist/cjs/ui/tabs.js +60 -0
- package/dist/cjs/worker.js +36 -0
- package/dist/esm/EditableContent.js +161 -0
- package/dist/esm/HtmlViewer.js +640 -0
- package/dist/esm/PdfComponents.js +21 -0
- package/dist/esm/PdfDocumentViewer.js +294 -0
- package/dist/esm/Structura.js +951 -0
- package/dist/esm/Table.js +182 -0
- package/dist/esm/TableCell.js +122 -0
- package/dist/esm/_virtual/_rollupPluginBabelHelpers.js +305 -0
- package/dist/esm/accuracyMetrics.js +41 -0
- package/dist/esm/helpers/preprocessData.js +152 -0
- package/dist/esm/index.js +1 -0
- package/dist/esm/lib/polyfills.js +13 -0
- package/dist/esm/lib/utils.js +8 -0
- package/dist/esm/node_modules/react-icons/fa/index.esm.js +11 -0
- package/dist/esm/node_modules/react-icons/lib/esm/iconBase.js +66 -0
- package/dist/esm/node_modules/react-icons/lib/esm/iconContext.js +12 -0
- package/dist/esm/polyfills.js +17 -0
- package/dist/esm/route.js +154 -0
- package/dist/esm/styles.css +7 -0
- package/dist/esm/styles.css.map +1 -0
- package/dist/esm/types/EditableContent.d.ts +9 -0
- package/dist/esm/types/HtmlViewer.d.ts +10 -0
- package/dist/esm/types/PdfComponents.d.ts +35 -0
- package/dist/esm/types/PdfDocumentViewer.d.ts +22 -0
- package/dist/esm/types/Structura.d.ts +11 -0
- package/dist/esm/types/Table.d.ts +12 -0
- package/dist/esm/types/TableCell.d.ts +13 -0
- package/dist/esm/types/accuracy.d.ts +23 -0
- package/dist/esm/types/accuracyMetrics.d.ts +5 -0
- package/dist/esm/types/helpers/flattenJSON.d.ts +1 -0
- package/dist/esm/types/helpers/hardMerging.d.ts +2 -0
- package/dist/esm/types/helpers/index.d.ts +6 -0
- package/dist/esm/types/helpers/jsonToHtml.d.ts +40 -0
- package/dist/esm/types/helpers/preprocessData.d.ts +3 -0
- package/dist/esm/types/helpers/removeMetadata.d.ts +1 -0
- package/dist/esm/types/helpers/tableProcessor.d.ts +1 -0
- package/dist/esm/types/index.d.ts +3 -0
- package/dist/esm/types/lib/polyfills.d.ts +1 -0
- package/dist/esm/types/lib/utils.d.ts +2 -0
- package/dist/esm/types/polyfills.d.ts +1 -0
- package/dist/esm/types/route.d.ts +45 -0
- package/dist/esm/types/test-app/src/App.d.ts +4 -0
- package/dist/esm/types/test-app/src/main.d.ts +1 -0
- package/dist/esm/types/test-app/vite.config.d.ts +2 -0
- package/dist/esm/types/types.d.ts +23 -0
- package/dist/esm/types/ui/alert.d.ts +8 -0
- package/dist/esm/types/ui/badge.d.ts +9 -0
- package/dist/esm/types/ui/button.d.ts +11 -0
- package/dist/esm/types/ui/card.d.ts +8 -0
- package/dist/esm/types/ui/progress.d.ts +6 -0
- package/dist/esm/types/ui/scroll-area.d.ts +5 -0
- package/dist/esm/types/ui/skeleton.d.ts +2 -0
- package/dist/esm/types/ui/tabs.d.ts +7 -0
- package/dist/esm/types/worker.d.ts +1 -0
- package/dist/esm/ui/badge.js +31 -0
- package/dist/esm/ui/button.js +50 -0
- package/dist/esm/ui/card.js +67 -0
- package/dist/esm/ui/progress.js +26 -0
- package/dist/esm/ui/scroll-area.js +45 -0
- package/dist/esm/ui/tabs.js +39 -0
- package/dist/esm/worker.js +50 -0
- package/dist/index.d.ts +38 -0
- package/package.json +85 -0
- package/server/README.md +203 -0
- package/server/db.js +142 -0
- package/server/server.js +165 -0
|
@@ -0,0 +1,587 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, '__esModule', { value: true });
|
|
4
|
+
|
|
5
|
+
var jsxRuntime = require('react/jsx-runtime');
|
|
6
|
+
var React = require('react');
|
|
7
|
+
var vsc = require('react-icons/vsc');
|
|
8
|
+
var index_esm = require('./node_modules/react-icons/fa/index.esm.js');
|
|
9
|
+
var Table = require('./Table.js');
|
|
10
|
+
var EditableContent = require('./EditableContent.js');
|
|
11
|
+
var accuracyMetrics = require('./accuracyMetrics.js');
|
|
12
|
+
|
|
13
|
+
// Utility function to clean HTML content
|
|
14
|
+
const cleanHtml = html => {
|
|
15
|
+
if (!html) return "";
|
|
16
|
+
// Remove newline characters
|
|
17
|
+
return html.replace(/\n/g, " ").replace(/<br\s*\/?>/g, " ")
|
|
18
|
+
// Remove consecutive spaces
|
|
19
|
+
.replace(/\s+/g, " ")
|
|
20
|
+
// Trim leading and trailing spaces
|
|
21
|
+
.trim();
|
|
22
|
+
};
|
|
23
|
+
function JsonModal({
|
|
24
|
+
isOpen,
|
|
25
|
+
onClose,
|
|
26
|
+
data
|
|
27
|
+
}) {
|
|
28
|
+
if (!isOpen) return null;
|
|
29
|
+
return jsxRuntime.jsx("div", {
|
|
30
|
+
className: "fixed inset-0 bg-black bg-opacity-50 flex items-center justify-center z-50",
|
|
31
|
+
children: jsxRuntime.jsxs("div", {
|
|
32
|
+
className: "bg-white rounded-lg p-6 max-w-2xl w-full max-h-[80vh] overflow-auto",
|
|
33
|
+
children: [jsxRuntime.jsxs("div", {
|
|
34
|
+
className: "flex justify-between items-center mb-4",
|
|
35
|
+
children: [jsxRuntime.jsx("h3", {
|
|
36
|
+
className: "text-lg font-semibold",
|
|
37
|
+
children: "JSON Data"
|
|
38
|
+
}), jsxRuntime.jsx("button", {
|
|
39
|
+
onClick: onClose,
|
|
40
|
+
className: "text-gray-500 hover:text-gray-700",
|
|
41
|
+
children: "\u2715"
|
|
42
|
+
})]
|
|
43
|
+
}), jsxRuntime.jsx("pre", {
|
|
44
|
+
className: "bg-gray-100 p-4 rounded-lg overflow-auto",
|
|
45
|
+
children: JSON.stringify(data, null, 2)
|
|
46
|
+
})]
|
|
47
|
+
})
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
function AnalyticsModal({
|
|
51
|
+
isOpen,
|
|
52
|
+
onClose,
|
|
53
|
+
metrics
|
|
54
|
+
}) {
|
|
55
|
+
if (!isOpen) return null;
|
|
56
|
+
return jsxRuntime.jsx("div", {
|
|
57
|
+
className: "fixed inset-0 bg-black bg-opacity-50 flex items-center justify-center z-50",
|
|
58
|
+
children: jsxRuntime.jsxs("div", {
|
|
59
|
+
className: "bg-white rounded-lg p-6 max-w-2xl w-full max-h-[80vh] overflow-auto",
|
|
60
|
+
children: [jsxRuntime.jsxs("div", {
|
|
61
|
+
className: "flex justify-between items-center mb-4",
|
|
62
|
+
children: [jsxRuntime.jsx("h3", {
|
|
63
|
+
className: "text-lg font-semibold",
|
|
64
|
+
children: "Analytics & Metrics"
|
|
65
|
+
}), jsxRuntime.jsx("button", {
|
|
66
|
+
onClick: onClose,
|
|
67
|
+
className: "text-gray-500 hover:text-gray-700",
|
|
68
|
+
children: "\u2715"
|
|
69
|
+
})]
|
|
70
|
+
}), jsxRuntime.jsxs("div", {
|
|
71
|
+
className: "space-y-4",
|
|
72
|
+
children: [jsxRuntime.jsxs("div", {
|
|
73
|
+
className: "bg-gray-50 p-4 rounded-lg",
|
|
74
|
+
children: [jsxRuntime.jsx("h4", {
|
|
75
|
+
className: "font-medium mb-2",
|
|
76
|
+
children: "Overall Accuracy"
|
|
77
|
+
}), jsxRuntime.jsx("div", {
|
|
78
|
+
className: "text-3xl font-bold mb-2",
|
|
79
|
+
children: jsxRuntime.jsxs("span", {
|
|
80
|
+
className: `${metrics.accuracyScore > 90 ? "text-green-600" : metrics.accuracyScore > 70 ? "text-yellow-600" : "text-red-600"}`,
|
|
81
|
+
children: [metrics.accuracyScore.toFixed(2), "%"]
|
|
82
|
+
})
|
|
83
|
+
})]
|
|
84
|
+
}), jsxRuntime.jsxs("div", {
|
|
85
|
+
className: "grid grid-cols-3 gap-4",
|
|
86
|
+
children: [jsxRuntime.jsxs("div", {
|
|
87
|
+
className: "bg-blue-50 p-4 rounded-lg",
|
|
88
|
+
children: [jsxRuntime.jsx("h4", {
|
|
89
|
+
className: "text-sm text-blue-700",
|
|
90
|
+
children: "Total Changes"
|
|
91
|
+
}), jsxRuntime.jsx("p", {
|
|
92
|
+
className: "text-2xl font-bold",
|
|
93
|
+
children: metrics.totalChanges
|
|
94
|
+
})]
|
|
95
|
+
}), jsxRuntime.jsxs("div", {
|
|
96
|
+
className: "bg-green-50 p-4 rounded-lg",
|
|
97
|
+
children: [jsxRuntime.jsx("h4", {
|
|
98
|
+
className: "text-sm text-green-700",
|
|
99
|
+
children: "Characters Edited"
|
|
100
|
+
}), jsxRuntime.jsx("p", {
|
|
101
|
+
className: "text-2xl font-bold",
|
|
102
|
+
children: metrics.totalCharactersEdited
|
|
103
|
+
})]
|
|
104
|
+
}), jsxRuntime.jsxs("div", {
|
|
105
|
+
className: "bg-purple-50 p-4 rounded-lg",
|
|
106
|
+
children: [jsxRuntime.jsx("h4", {
|
|
107
|
+
className: "text-sm text-purple-700",
|
|
108
|
+
children: "Words Edited"
|
|
109
|
+
}), jsxRuntime.jsx("p", {
|
|
110
|
+
className: "text-2xl font-bold",
|
|
111
|
+
children: metrics.totalWordsEdited
|
|
112
|
+
})]
|
|
113
|
+
})]
|
|
114
|
+
}), Object.entries(metrics.blockTypeStats).length > 0 && jsxRuntime.jsxs("div", {
|
|
115
|
+
className: "mt-6",
|
|
116
|
+
children: [jsxRuntime.jsx("h4", {
|
|
117
|
+
className: "font-medium mb-3",
|
|
118
|
+
children: "Changes by Block Type"
|
|
119
|
+
}), jsxRuntime.jsx("div", {
|
|
120
|
+
className: "space-y-2",
|
|
121
|
+
children: Object.entries(metrics.blockTypeStats).map(([type, stats]) => jsxRuntime.jsxs("div", {
|
|
122
|
+
className: "bg-gray-50 p-3 rounded-lg",
|
|
123
|
+
children: [jsxRuntime.jsxs("div", {
|
|
124
|
+
className: "flex justify-between items-center",
|
|
125
|
+
children: [jsxRuntime.jsx("span", {
|
|
126
|
+
className: "font-medium",
|
|
127
|
+
children: type
|
|
128
|
+
}), jsxRuntime.jsxs("span", {
|
|
129
|
+
className: "text-gray-600",
|
|
130
|
+
children: [stats.changes, " changes"]
|
|
131
|
+
})]
|
|
132
|
+
}), jsxRuntime.jsxs("div", {
|
|
133
|
+
className: "text-sm text-gray-500",
|
|
134
|
+
children: [stats.charactersEdited, " characters edited"]
|
|
135
|
+
})]
|
|
136
|
+
}, type))
|
|
137
|
+
})]
|
|
138
|
+
})]
|
|
139
|
+
})]
|
|
140
|
+
})
|
|
141
|
+
});
|
|
142
|
+
}
|
|
143
|
+
// Update icon usage in the JSX
|
|
144
|
+
const VscJsonIcon = vsc.VscJson;
|
|
145
|
+
const FaFileDownloadIcon = index_esm.FaFileDownload;
|
|
146
|
+
const FaChartBarIcon = index_esm.FaChartBar;
|
|
147
|
+
function HtmlViewer({
|
|
148
|
+
//NOSONAR
|
|
149
|
+
jsonData,
|
|
150
|
+
selectedBboxId,
|
|
151
|
+
isLoading,
|
|
152
|
+
onNodeClick,
|
|
153
|
+
onSave
|
|
154
|
+
}) {
|
|
155
|
+
const [editedData, setEditedData] = React.useState(jsonData);
|
|
156
|
+
const [hasChanges, setHasChanges] = React.useState(false);
|
|
157
|
+
const [isModalOpen, setIsModalOpen] = React.useState(false);
|
|
158
|
+
const [modalData, setModalData] = React.useState(null);
|
|
159
|
+
const [showJsonIcons, setShowJsonIcons] = React.useState(true);
|
|
160
|
+
const [activeFormat, setActiveFormat] = React.useState("Show JSON");
|
|
161
|
+
const [accuracyMetrics$1, setAccuracyMetrics] = React.useState({
|
|
162
|
+
totalChanges: 0,
|
|
163
|
+
totalCharactersEdited: 0,
|
|
164
|
+
totalWordsEdited: 0,
|
|
165
|
+
accuracyScore: 100,
|
|
166
|
+
changeHistory: [],
|
|
167
|
+
blockTypeStats: {}
|
|
168
|
+
});
|
|
169
|
+
const [isAnalyticsOpen, setIsAnalyticsOpen] = React.useState(false);
|
|
170
|
+
React.useEffect(() => {
|
|
171
|
+
// Reset state when jsonData changes
|
|
172
|
+
// console.log("HtmlViewer received new jsonData");
|
|
173
|
+
setEditedData(jsonData);
|
|
174
|
+
setHasChanges(false); // Reset changes flag
|
|
175
|
+
setAccuracyMetrics({
|
|
176
|
+
totalChanges: 0,
|
|
177
|
+
totalCharactersEdited: 0,
|
|
178
|
+
totalWordsEdited: 0,
|
|
179
|
+
accuracyScore: 100,
|
|
180
|
+
changeHistory: [],
|
|
181
|
+
blockTypeStats: {}
|
|
182
|
+
});
|
|
183
|
+
}, [jsonData]);
|
|
184
|
+
const handleJsonClick = node => {
|
|
185
|
+
setModalData(node);
|
|
186
|
+
setIsModalOpen(true);
|
|
187
|
+
};
|
|
188
|
+
const updateAccuracyMetrics = React.useCallback((nodeId, blockType, originalContent, newContent) => {
|
|
189
|
+
const differences = accuracyMetrics.calculateDifferences(originalContent, newContent);
|
|
190
|
+
const change = {
|
|
191
|
+
id: nodeId,
|
|
192
|
+
blockType,
|
|
193
|
+
originalContent,
|
|
194
|
+
newContent,
|
|
195
|
+
timestamp: Date.now(),
|
|
196
|
+
charactersDifferent: differences.charactersDifferent,
|
|
197
|
+
wordsDifferent: differences.wordsDifferent
|
|
198
|
+
};
|
|
199
|
+
setAccuracyMetrics(prev => {
|
|
200
|
+
// Update block type stats
|
|
201
|
+
const blockStats = prev.blockTypeStats[blockType] || {
|
|
202
|
+
changes: 0,
|
|
203
|
+
charactersEdited: 0,
|
|
204
|
+
wordsEdited: 0
|
|
205
|
+
};
|
|
206
|
+
const newBlockStats = {
|
|
207
|
+
changes: blockStats.changes + 1,
|
|
208
|
+
charactersEdited: blockStats.charactersEdited + differences.charactersDifferent,
|
|
209
|
+
wordsEdited: blockStats.wordsEdited + differences.wordsDifferent
|
|
210
|
+
};
|
|
211
|
+
// Calculate total original characters
|
|
212
|
+
const totalOriginalCharacters = calculateTotalCharacters(jsonData);
|
|
213
|
+
// Calculate new accuracy score
|
|
214
|
+
const newTotalCharactersEdited = prev.totalCharactersEdited + differences.charactersDifferent;
|
|
215
|
+
const accuracyScore = Math.max(0, 100 - newTotalCharactersEdited / totalOriginalCharacters * 100);
|
|
216
|
+
return {
|
|
217
|
+
totalChanges: prev.totalChanges + 1,
|
|
218
|
+
totalCharactersEdited: newTotalCharactersEdited,
|
|
219
|
+
totalWordsEdited: prev.totalWordsEdited + differences.wordsDifferent,
|
|
220
|
+
accuracyScore,
|
|
221
|
+
changeHistory: [...prev.changeHistory, change],
|
|
222
|
+
blockTypeStats: {
|
|
223
|
+
...prev.blockTypeStats,
|
|
224
|
+
[blockType]: newBlockStats
|
|
225
|
+
}
|
|
226
|
+
};
|
|
227
|
+
});
|
|
228
|
+
}, [jsonData]);
|
|
229
|
+
const handleContentChange = React.useCallback((nodeId, newContent) => {
|
|
230
|
+
setHasChanges(true);
|
|
231
|
+
setEditedData(prevData => {
|
|
232
|
+
const newData = JSON.parse(JSON.stringify(prevData));
|
|
233
|
+
const updateNode = node => {
|
|
234
|
+
if (!node) return false;
|
|
235
|
+
if (node.id === nodeId) {
|
|
236
|
+
const originalContent = node.html || "";
|
|
237
|
+
const blockType = node.block_type || "unknown";
|
|
238
|
+
// Update accuracy metrics before changing content
|
|
239
|
+
updateAccuracyMetrics(nodeId, blockType, originalContent, newContent);
|
|
240
|
+
node.html = newContent;
|
|
241
|
+
return true;
|
|
242
|
+
}
|
|
243
|
+
if (node.children && Array.isArray(node.children)) {
|
|
244
|
+
for (const child of node.children) {
|
|
245
|
+
if (updateNode(child)) return true;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
return false;
|
|
249
|
+
};
|
|
250
|
+
updateNode(newData);
|
|
251
|
+
return newData;
|
|
252
|
+
});
|
|
253
|
+
}, [updateAccuracyMetrics]);
|
|
254
|
+
const handleDownload = () => {
|
|
255
|
+
if (!editedData) return;
|
|
256
|
+
const jsonString = JSON.stringify(editedData, null, 2);
|
|
257
|
+
const blob = new Blob([jsonString], {
|
|
258
|
+
type: "application/json"
|
|
259
|
+
});
|
|
260
|
+
const url = URL.createObjectURL(blob);
|
|
261
|
+
const a = document.createElement("a");
|
|
262
|
+
a.href = url;
|
|
263
|
+
a.download = "updated_bio.json";
|
|
264
|
+
document.body.appendChild(a);
|
|
265
|
+
a.click();
|
|
266
|
+
document.body.removeChild(a);
|
|
267
|
+
URL.revokeObjectURL(url);
|
|
268
|
+
};
|
|
269
|
+
// Function to get the appropriate HTML content based on settings
|
|
270
|
+
const getHtmlContent = node => {
|
|
271
|
+
// For individual nodes in the tree view, just return their HTML directly
|
|
272
|
+
// This preserves the node structure and click interactivity
|
|
273
|
+
// getBlockHtml/jsonToHtml should only be used for full page rendering/export
|
|
274
|
+
return cleanHtml(node.html || '');
|
|
275
|
+
};
|
|
276
|
+
const mergedTablesMap = React.useMemo(() => {
|
|
277
|
+
const map = new Map();
|
|
278
|
+
const findTables = node => {
|
|
279
|
+
if (!node) return;
|
|
280
|
+
if (node.block_type === "Table" && node.merged_table_id) {
|
|
281
|
+
console.log("Found table with merged_table_id:", node.merged_table_id, "ID:", node.id);
|
|
282
|
+
const tables = map.get(node.merged_table_id) || [];
|
|
283
|
+
tables.push(node);
|
|
284
|
+
map.set(node.merged_table_id, tables);
|
|
285
|
+
}
|
|
286
|
+
if (node.children) {
|
|
287
|
+
node.children.forEach(findTables);
|
|
288
|
+
}
|
|
289
|
+
};
|
|
290
|
+
if (jsonData) {
|
|
291
|
+
findTables(jsonData);
|
|
292
|
+
}
|
|
293
|
+
return map;
|
|
294
|
+
}, [jsonData]);
|
|
295
|
+
React.useEffect(() => {
|
|
296
|
+
// console.log(`[HtmlViewer] Scroll effect triggered. selectedBboxId: ${selectedBboxId}`);
|
|
297
|
+
if (selectedBboxId) {
|
|
298
|
+
let elementToScroll = document.getElementById(selectedBboxId);
|
|
299
|
+
// console.log(`[HtmlViewer] Attempt 1: document.getElementById('${selectedBboxId}') result:`, elementToScroll);
|
|
300
|
+
if (!elementToScroll && editedData) {
|
|
301
|
+
// console.log(`[HtmlViewer] Element ${selectedBboxId} not found directly. Proceeding to check if it's part of a merged table.`);
|
|
302
|
+
const findNodeByIdRecursive = (nodes, id) => {
|
|
303
|
+
for (const node of nodes) {
|
|
304
|
+
if (node.id === id) return node;
|
|
305
|
+
if (node.children) {
|
|
306
|
+
const foundInChildren = findNodeByIdRecursive(node.children, id);
|
|
307
|
+
if (foundInChildren) return foundInChildren;
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
return null;
|
|
311
|
+
};
|
|
312
|
+
const findNodeInDocument = (documentData, id) => {
|
|
313
|
+
if (!documentData) {
|
|
314
|
+
// console.log("[HtmlViewer] findNodeInDocument: documentData is null/undefined.");
|
|
315
|
+
return null;
|
|
316
|
+
}
|
|
317
|
+
if (documentData.id === id) return documentData; // Check root node
|
|
318
|
+
if (documentData.children) {
|
|
319
|
+
return findNodeByIdRecursive(documentData.children, id);
|
|
320
|
+
}
|
|
321
|
+
// console.log("[HtmlViewer] findNodeInDocument: documentData has no children to search.");
|
|
322
|
+
return null;
|
|
323
|
+
};
|
|
324
|
+
// console.log("[HtmlViewer] Searching for selectedNodeDetails in editedData:", editedData ? 'editedData exists' : 'editedData is null');
|
|
325
|
+
const selectedNodeDetails = findNodeInDocument(editedData, selectedBboxId);
|
|
326
|
+
// console.log(`[HtmlViewer] selectedNodeDetails for ID ${selectedBboxId}:`, selectedNodeDetails);
|
|
327
|
+
if (selectedNodeDetails && selectedNodeDetails.block_type === "Table" && selectedNodeDetails.merged_table_id) {
|
|
328
|
+
// console.log(`[HtmlViewer] Confirmed: Selected node ${selectedBboxId} (type: ${selectedNodeDetails.block_type}) is part of merged table group ${selectedNodeDetails.merged_table_id}.`);
|
|
329
|
+
const tablesInGroup = mergedTablesMap.get(selectedNodeDetails.merged_table_id);
|
|
330
|
+
// console.log(`[HtmlViewer] Tables in merged group ${selectedNodeDetails.merged_table_id} from mergedTablesMap:`, tablesInGroup);
|
|
331
|
+
if (tablesInGroup && tablesInGroup.length > 0) {
|
|
332
|
+
const primaryTableId = tablesInGroup[0].id; // The first table in the map is the one rendered
|
|
333
|
+
// console.log(`[HtmlViewer] Primary table ID for this group is ${primaryTableId}. Attempting to get this element.`);
|
|
334
|
+
elementToScroll = document.getElementById(primaryTableId);
|
|
335
|
+
// console.log(`[HtmlViewer] Attempt 2 (merged table): document.getElementById('${primaryTableId}') result:`, elementToScroll);
|
|
336
|
+
if (!elementToScroll) {
|
|
337
|
+
console.warn(`[HtmlViewer] Primary table element ${primaryTableId} (for merged group) also not found in DOM.`);
|
|
338
|
+
}
|
|
339
|
+
} else {
|
|
340
|
+
console.warn(`[HtmlViewer] No tables found in mergedTablesMap for group ${selectedNodeDetails.merged_table_id}. Cannot find primary table to scroll.`);
|
|
341
|
+
}
|
|
342
|
+
} else if (selectedNodeDetails) {
|
|
343
|
+
console.log(`[HtmlViewer] Selected node ${selectedBboxId} is type ${selectedNodeDetails.block_type}. It's not a merged table or 'merged_table_id' is missing.`);
|
|
344
|
+
} else {
|
|
345
|
+
console.log(`[HtmlViewer] Details for selected node ${selectedBboxId} not found within the 'editedData' structure. Cannot determine if it's part of a merged table.`);
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
if (elementToScroll) {
|
|
349
|
+
// console.log(`[HtmlViewer] SUCCESS: Scheduling scroll to element with ID: ${elementToScroll.id} using block: 'nearest'`);
|
|
350
|
+
setTimeout(() => {
|
|
351
|
+
const el = document.getElementById(elementToScroll.id); // Re-fetch element in timeout
|
|
352
|
+
if (el) {
|
|
353
|
+
// console.log(`[HtmlViewer] Timeout: Scrolling to element ${el.id} (align to top)`);
|
|
354
|
+
el.scrollIntoView(true); // Align to top, auto behavior
|
|
355
|
+
} else {
|
|
356
|
+
console.warn(`[HtmlViewer] Timeout: Element ${elementToScroll.id} not found at time of scroll.`);
|
|
357
|
+
}
|
|
358
|
+
}, 0); // Small delay to allow DOM to update
|
|
359
|
+
} else {
|
|
360
|
+
console.warn(`[HtmlViewer] FINAL: Element with ID ${selectedBboxId} (or its primary merged table, if applicable) was NOT found in the DOM. No scrolling will occur.`);
|
|
361
|
+
}
|
|
362
|
+
} else {
|
|
363
|
+
console.log("[HtmlViewer] Scroll effect: No selectedBboxId, so no scrolling action taken.");
|
|
364
|
+
}
|
|
365
|
+
}, [selectedBboxId, editedData, mergedTablesMap]); // Added editedData and mergedTablesMap
|
|
366
|
+
React.useMemo(() => {
|
|
367
|
+
const map = new Map();
|
|
368
|
+
const findHeaders = node => {
|
|
369
|
+
if (!node) return;
|
|
370
|
+
if (node.block_type === "Table" && node.merged_table_id) {
|
|
371
|
+
let currentNode = node;
|
|
372
|
+
while (currentNode.next) {
|
|
373
|
+
currentNode = currentNode.next;
|
|
374
|
+
if (currentNode.block_type === "Text" && currentNode.html.includes("BioprocessAl")) {
|
|
375
|
+
map.set(node.merged_table_id, currentNode);
|
|
376
|
+
break;
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
if (node.children) {
|
|
381
|
+
node.children.forEach(findHeaders);
|
|
382
|
+
}
|
|
383
|
+
};
|
|
384
|
+
if (jsonData) {
|
|
385
|
+
findHeaders(jsonData);
|
|
386
|
+
}
|
|
387
|
+
return map;
|
|
388
|
+
}, [jsonData]);
|
|
389
|
+
if (isLoading) {
|
|
390
|
+
return jsxRuntime.jsx("div", {
|
|
391
|
+
className: "flex items-center justify-center h-full",
|
|
392
|
+
children: jsxRuntime.jsxs("div", {
|
|
393
|
+
className: "animate-pulse flex flex-col items-center gap-4",
|
|
394
|
+
children: [jsxRuntime.jsx("div", {
|
|
395
|
+
className: "h-8 w-8 border-4 border-blue-500 border-t-transparent rounded-full animate-spin"
|
|
396
|
+
}), jsxRuntime.jsx("p", {
|
|
397
|
+
className: "text-gray-600",
|
|
398
|
+
children: "Processing PDF..."
|
|
399
|
+
})]
|
|
400
|
+
})
|
|
401
|
+
});
|
|
402
|
+
}
|
|
403
|
+
if (!editedData) {
|
|
404
|
+
return jsxRuntime.jsx("p", {
|
|
405
|
+
className: "text-center p-4",
|
|
406
|
+
children: "Upload a PDF and generate JSON to view content"
|
|
407
|
+
});
|
|
408
|
+
}
|
|
409
|
+
const renderHtmlContent = node => {
|
|
410
|
+
var _a;
|
|
411
|
+
if (!node) return null;
|
|
412
|
+
const isSelected = node.id === selectedBboxId;
|
|
413
|
+
const hasChildren = node.children && node.children.length > 0;
|
|
414
|
+
const isTable = ((_a = node.block_type) === null || _a === void 0 ? void 0 : _a.toLowerCase()) === "table";
|
|
415
|
+
const isTableOfContents = node.block_type === "TableOfContents";
|
|
416
|
+
const isText = node.block_type === "Text";
|
|
417
|
+
const isHandwritten = node.block_type === "Handwriting";
|
|
418
|
+
const isSectionHeader = node.block_type === "SectionHeader";
|
|
419
|
+
const isPage = node.block_type === "Page";
|
|
420
|
+
// Get the appropriate HTML content
|
|
421
|
+
const htmlContent = getHtmlContent(node);
|
|
422
|
+
const isHeading = htmlContent && (htmlContent.startsWith("<h1") || htmlContent.startsWith("<h2") || htmlContent.startsWith("<h3") || htmlContent.startsWith("<h4"));
|
|
423
|
+
// Skip tables that are part of a merged group but aren't the first table
|
|
424
|
+
if (isTable && node.merged_table_id) {
|
|
425
|
+
const tables = mergedTablesMap.get(node.merged_table_id) || [];
|
|
426
|
+
if (tables.length > 0 && tables[0].id !== node.id) {
|
|
427
|
+
// Return null to completely skip rendering this table
|
|
428
|
+
return null;
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
// Handle click on content
|
|
432
|
+
const handleContentClick = e => {
|
|
433
|
+
// Check if the node has a valid ID that could correspond to a bbox in the PDF
|
|
434
|
+
// Only process direct clicks, not bubbled events from child elements
|
|
435
|
+
if (e.target === e.currentTarget && node.id && onNodeClick && !isPage) {
|
|
436
|
+
console.log("HTML content clicked:", node.id);
|
|
437
|
+
onNodeClick(node.id);
|
|
438
|
+
}
|
|
439
|
+
};
|
|
440
|
+
return jsxRuntime.jsxs("div", {
|
|
441
|
+
id: node.id,
|
|
442
|
+
className: `p-2 my-1 rounded transition-colors relative group max-w-full overflow-hidden ${isSelected ? "bg-blue-100 border-2 border-blue-500" : ""} ${onNodeClick && !isPage ? "cursor-pointer hover:bg-gray-50" : ""}`,
|
|
443
|
+
onClick: isPage ? undefined : handleContentClick,
|
|
444
|
+
children: [!isPage && showJsonIcons && jsxRuntime.jsx("button", {
|
|
445
|
+
onClick: e => {
|
|
446
|
+
e.stopPropagation(); // Prevent triggering parent onClick
|
|
447
|
+
handleJsonClick(node);
|
|
448
|
+
},
|
|
449
|
+
className: `absolute right-2 top-2 z-10 ${showJsonIcons ? "opacity-0 group-hover:opacity-100 transition-opacity" : "hidden"} text-gray-500 hover:text-gray-700`,
|
|
450
|
+
title: "View JSON",
|
|
451
|
+
children: jsxRuntime.jsx(VscJsonIcon, {
|
|
452
|
+
size: 18
|
|
453
|
+
})
|
|
454
|
+
}), isTable || isTableOfContents ? jsxRuntime.jsx(Table.default, {
|
|
455
|
+
node: node,
|
|
456
|
+
selectedBboxId: selectedBboxId,
|
|
457
|
+
onJsonClick: handleJsonClick,
|
|
458
|
+
onContentChange: handleContentChange,
|
|
459
|
+
mergedTables: node.merged_table_id ? (mergedTablesMap.get(node.merged_table_id) || []).slice(1) : [],
|
|
460
|
+
hasLlmHtml: !!node.llm_table_html,
|
|
461
|
+
showJsonIcons: showJsonIcons,
|
|
462
|
+
onNodeClick: onNodeClick
|
|
463
|
+
}) : isText || isHandwritten || isSectionHeader ? jsxRuntime.jsx(EditableContent.default, {
|
|
464
|
+
id: node.id,
|
|
465
|
+
content: htmlContent,
|
|
466
|
+
onContentChange: handleContentChange,
|
|
467
|
+
isHeading: !!isHeading,
|
|
468
|
+
onNodeClick: onNodeClick && !isPage ? () => onNodeClick(node.id) : undefined
|
|
469
|
+
}) : jsxRuntime.jsxs(jsxRuntime.Fragment, {
|
|
470
|
+
children: [jsxRuntime.jsx("div", {
|
|
471
|
+
className: "prose max-w-none w-full overflow-hidden break-words",
|
|
472
|
+
dangerouslySetInnerHTML: {
|
|
473
|
+
__html: htmlContent
|
|
474
|
+
}
|
|
475
|
+
}), hasChildren && jsxRuntime.jsx("div", {
|
|
476
|
+
className: "ml-4 mt-2 border-l-2 border-gray-200 pl-4 max-w-full overflow-hidden",
|
|
477
|
+
children: node.children.map(child => renderHtmlContent(child))
|
|
478
|
+
})]
|
|
479
|
+
})]
|
|
480
|
+
}, node.id);
|
|
481
|
+
};
|
|
482
|
+
const getAllNodes = data => {
|
|
483
|
+
if (!data) return [];
|
|
484
|
+
// Get all children
|
|
485
|
+
const children = data.children || [];
|
|
486
|
+
// Filter out tables that should be hidden (part of a merge but not the first one)
|
|
487
|
+
return children.filter(node => {
|
|
488
|
+
if (node.block_type === "Table" && node.merged_table_id) {
|
|
489
|
+
const tables = mergedTablesMap.get(node.merged_table_id) || [];
|
|
490
|
+
// Keep only if this is the first table in the merge group
|
|
491
|
+
return tables.length === 0 || tables[0].id === node.id;
|
|
492
|
+
}
|
|
493
|
+
return true; // Keep all other nodes
|
|
494
|
+
});
|
|
495
|
+
};
|
|
496
|
+
const allNodes = getAllNodes(editedData);
|
|
497
|
+
// Update the header section to include both buttons
|
|
498
|
+
const renderHeader = () => jsxRuntime.jsxs("div", {
|
|
499
|
+
className: "sticky top-0 z-20 bg-white border-b border-gray-200 p-4 flex justify-between items-center flex-shrink-0",
|
|
500
|
+
children: [jsxRuntime.jsx("div", {
|
|
501
|
+
className: "flex gap-2",
|
|
502
|
+
children: hasChanges && jsxRuntime.jsxs(jsxRuntime.Fragment, {
|
|
503
|
+
children: [onSave && jsxRuntime.jsxs("button", {
|
|
504
|
+
onClick: () => onSave(editedData),
|
|
505
|
+
className: "inline-flex items-center gap-2 px-4 py-2 bg-green-600 text-white rounded-md hover:bg-green-700 focus:outline-none focus:ring-2 focus:ring-green-500 focus:ring-offset-2",
|
|
506
|
+
title: "Save Changes",
|
|
507
|
+
children: [jsxRuntime.jsx(FaFileDownloadIcon, {
|
|
508
|
+
size: 18
|
|
509
|
+
}), "Save"]
|
|
510
|
+
}), jsxRuntime.jsxs("button", {
|
|
511
|
+
onClick: handleDownload,
|
|
512
|
+
className: "inline-flex items-center gap-2 px-4 py-2 bg-blue-600 text-white rounded-md hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:ring-offset-2",
|
|
513
|
+
title: "Download Updated JSON",
|
|
514
|
+
children: [jsxRuntime.jsx(FaFileDownloadIcon, {
|
|
515
|
+
size: 18
|
|
516
|
+
}), "Download"]
|
|
517
|
+
})]
|
|
518
|
+
})
|
|
519
|
+
}), jsxRuntime.jsxs("div", {
|
|
520
|
+
className: "flex gap-2 items-center",
|
|
521
|
+
children: [jsxRuntime.jsx("button", {
|
|
522
|
+
onClick: () => setIsAnalyticsOpen(true),
|
|
523
|
+
className: "inline-flex items-center gap-2 px-4 py-2 bg-green-600 text-white rounded-md hover:bg-green-700 focus:outline-none focus:ring-2 focus:ring-green-500 focus:ring-offset-2",
|
|
524
|
+
title: "View Analytics",
|
|
525
|
+
children: jsxRuntime.jsx(FaChartBarIcon, {
|
|
526
|
+
size: 18
|
|
527
|
+
})
|
|
528
|
+
}), jsxRuntime.jsxs("div", {
|
|
529
|
+
className: "flex bg-gray-100 rounded-lg p-1",
|
|
530
|
+
children: [jsxRuntime.jsx("button", {
|
|
531
|
+
onClick: () => {
|
|
532
|
+
setShowJsonIcons(true);
|
|
533
|
+
setActiveFormat("Show JSON");
|
|
534
|
+
},
|
|
535
|
+
className: `py-1.5 px-3 text-sm font-medium rounded-md transition-colors whitespace-nowrap ${activeFormat === "Show JSON" ? "bg-white text-gray-900 shadow-sm" : "text-gray-600 hover:text-gray-900"}`,
|
|
536
|
+
children: "Show JSON"
|
|
537
|
+
}), jsxRuntime.jsx("button", {
|
|
538
|
+
onClick: () => {
|
|
539
|
+
setShowJsonIcons(false);
|
|
540
|
+
setActiveFormat("Hide JSON");
|
|
541
|
+
},
|
|
542
|
+
className: `py-1.5 px-3 text-sm font-medium rounded-md transition-colors whitespace-nowrap ${activeFormat === "Hide JSON" ? "bg-white text-gray-900 shadow-sm" : "text-gray-600 hover:text-gray-900"}`,
|
|
543
|
+
children: "Hide JSON"
|
|
544
|
+
})]
|
|
545
|
+
})]
|
|
546
|
+
})]
|
|
547
|
+
});
|
|
548
|
+
return jsxRuntime.jsxs("div", {
|
|
549
|
+
className: "w-full h-full max-w-full flex flex-col overflow-hidden",
|
|
550
|
+
children: [renderHeader(), jsxRuntime.jsx("div", {
|
|
551
|
+
className: "flex-1 overflow-auto min-h-0 max-w-full",
|
|
552
|
+
children: jsxRuntime.jsx("div", {
|
|
553
|
+
className: "p-4 max-w-full",
|
|
554
|
+
children: allNodes.map(node => jsxRuntime.jsx("div", {
|
|
555
|
+
className: "w-full max-w-full overflow-hidden break-words",
|
|
556
|
+
children: renderHtmlContent(node)
|
|
557
|
+
}, node.id))
|
|
558
|
+
})
|
|
559
|
+
}), jsxRuntime.jsx(AnalyticsModal, {
|
|
560
|
+
isOpen: isAnalyticsOpen,
|
|
561
|
+
onClose: () => setIsAnalyticsOpen(false),
|
|
562
|
+
metrics: accuracyMetrics$1
|
|
563
|
+
}), jsxRuntime.jsx(JsonModal, {
|
|
564
|
+
isOpen: isModalOpen,
|
|
565
|
+
onClose: () => setIsModalOpen(false),
|
|
566
|
+
data: modalData
|
|
567
|
+
})]
|
|
568
|
+
});
|
|
569
|
+
}
|
|
570
|
+
// Utility function to calculate total characters in the document
|
|
571
|
+
function calculateTotalCharacters(data) {
|
|
572
|
+
if (!data) return 0;
|
|
573
|
+
let total = 0;
|
|
574
|
+
const countCharacters = node => {
|
|
575
|
+
if (node.html) {
|
|
576
|
+
const cleanText = node.html.replace(/<[^>]*>/g, "").trim();
|
|
577
|
+
total += cleanText.length;
|
|
578
|
+
}
|
|
579
|
+
if (node.children && Array.isArray(node.children)) {
|
|
580
|
+
node.children.forEach(countCharacters);
|
|
581
|
+
}
|
|
582
|
+
};
|
|
583
|
+
countCharacters(data);
|
|
584
|
+
return total;
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
exports.default = HtmlViewer;
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
require('react/jsx-runtime');
|
|
4
|
+
var React = require('react');
|
|
5
|
+
require('./lib/polyfills.js');
|
|
6
|
+
|
|
7
|
+
// Lazy load the PDF components
|
|
8
|
+
const Document = /*#__PURE__*/React.lazy(() => import('react-pdf').then(mod => ({
|
|
9
|
+
default: mod.Document
|
|
10
|
+
})));
|
|
11
|
+
const Page = /*#__PURE__*/React.lazy(() => import('react-pdf').then(mod => ({
|
|
12
|
+
default: mod.Page
|
|
13
|
+
})));
|
|
14
|
+
|
|
15
|
+
exports.Document = Document;
|
|
16
|
+
exports.Page = Page;
|