@tfw.in/structura-lib 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/PRODUCTION_ARCHITECTURE.md +511 -0
  2. package/README.md +379 -0
  3. package/SAVE_FUNCTIONALITY_COMPLETE.md +448 -0
  4. package/dist/cjs/EditableContent.js +150 -0
  5. package/dist/cjs/HtmlViewer.js +587 -0
  6. package/dist/cjs/PdfComponents.js +16 -0
  7. package/dist/cjs/PdfDocumentViewer.js +281 -0
  8. package/dist/cjs/Structura.js +806 -0
  9. package/dist/cjs/Table.js +164 -0
  10. package/dist/cjs/TableCell.js +115 -0
  11. package/dist/cjs/accuracyMetrics.js +39 -0
  12. package/dist/cjs/helpers/preprocessData.js +143 -0
  13. package/dist/cjs/index.js +7 -0
  14. package/dist/cjs/lib/polyfills.js +15 -0
  15. package/dist/cjs/lib/utils.js +10 -0
  16. package/dist/cjs/node_modules/react-icons/fa/index.esm.js +14 -0
  17. package/dist/cjs/node_modules/react-icons/lib/esm/iconBase.js +69 -0
  18. package/dist/cjs/node_modules/react-icons/lib/esm/iconContext.js +15 -0
  19. package/dist/cjs/polyfills.js +19 -0
  20. package/dist/cjs/route.js +102 -0
  21. package/dist/cjs/styles.css +7 -0
  22. package/dist/cjs/styles.css.map +1 -0
  23. package/dist/cjs/ui/badge.js +34 -0
  24. package/dist/cjs/ui/button.js +71 -0
  25. package/dist/cjs/ui/card.js +86 -0
  26. package/dist/cjs/ui/progress.js +45 -0
  27. package/dist/cjs/ui/scroll-area.js +62 -0
  28. package/dist/cjs/ui/tabs.js +60 -0
  29. package/dist/cjs/worker.js +36 -0
  30. package/dist/esm/EditableContent.js +161 -0
  31. package/dist/esm/HtmlViewer.js +640 -0
  32. package/dist/esm/PdfComponents.js +21 -0
  33. package/dist/esm/PdfDocumentViewer.js +294 -0
  34. package/dist/esm/Structura.js +951 -0
  35. package/dist/esm/Table.js +182 -0
  36. package/dist/esm/TableCell.js +122 -0
  37. package/dist/esm/_virtual/_rollupPluginBabelHelpers.js +305 -0
  38. package/dist/esm/accuracyMetrics.js +41 -0
  39. package/dist/esm/helpers/preprocessData.js +152 -0
  40. package/dist/esm/index.js +1 -0
  41. package/dist/esm/lib/polyfills.js +13 -0
  42. package/dist/esm/lib/utils.js +8 -0
  43. package/dist/esm/node_modules/react-icons/fa/index.esm.js +11 -0
  44. package/dist/esm/node_modules/react-icons/lib/esm/iconBase.js +66 -0
  45. package/dist/esm/node_modules/react-icons/lib/esm/iconContext.js +12 -0
  46. package/dist/esm/polyfills.js +17 -0
  47. package/dist/esm/route.js +154 -0
  48. package/dist/esm/styles.css +7 -0
  49. package/dist/esm/styles.css.map +1 -0
  50. package/dist/esm/types/EditableContent.d.ts +9 -0
  51. package/dist/esm/types/HtmlViewer.d.ts +10 -0
  52. package/dist/esm/types/PdfComponents.d.ts +35 -0
  53. package/dist/esm/types/PdfDocumentViewer.d.ts +22 -0
  54. package/dist/esm/types/Structura.d.ts +11 -0
  55. package/dist/esm/types/Table.d.ts +12 -0
  56. package/dist/esm/types/TableCell.d.ts +13 -0
  57. package/dist/esm/types/accuracy.d.ts +23 -0
  58. package/dist/esm/types/accuracyMetrics.d.ts +5 -0
  59. package/dist/esm/types/helpers/flattenJSON.d.ts +1 -0
  60. package/dist/esm/types/helpers/hardMerging.d.ts +2 -0
  61. package/dist/esm/types/helpers/index.d.ts +6 -0
  62. package/dist/esm/types/helpers/jsonToHtml.d.ts +40 -0
  63. package/dist/esm/types/helpers/preprocessData.d.ts +3 -0
  64. package/dist/esm/types/helpers/removeMetadata.d.ts +1 -0
  65. package/dist/esm/types/helpers/tableProcessor.d.ts +1 -0
  66. package/dist/esm/types/index.d.ts +3 -0
  67. package/dist/esm/types/lib/polyfills.d.ts +1 -0
  68. package/dist/esm/types/lib/utils.d.ts +2 -0
  69. package/dist/esm/types/polyfills.d.ts +1 -0
  70. package/dist/esm/types/route.d.ts +45 -0
  71. package/dist/esm/types/test-app/src/App.d.ts +4 -0
  72. package/dist/esm/types/test-app/src/main.d.ts +1 -0
  73. package/dist/esm/types/test-app/vite.config.d.ts +2 -0
  74. package/dist/esm/types/types.d.ts +23 -0
  75. package/dist/esm/types/ui/alert.d.ts +8 -0
  76. package/dist/esm/types/ui/badge.d.ts +9 -0
  77. package/dist/esm/types/ui/button.d.ts +11 -0
  78. package/dist/esm/types/ui/card.d.ts +8 -0
  79. package/dist/esm/types/ui/progress.d.ts +6 -0
  80. package/dist/esm/types/ui/scroll-area.d.ts +5 -0
  81. package/dist/esm/types/ui/skeleton.d.ts +2 -0
  82. package/dist/esm/types/ui/tabs.d.ts +7 -0
  83. package/dist/esm/types/worker.d.ts +1 -0
  84. package/dist/esm/ui/badge.js +31 -0
  85. package/dist/esm/ui/button.js +50 -0
  86. package/dist/esm/ui/card.js +67 -0
  87. package/dist/esm/ui/progress.js +26 -0
  88. package/dist/esm/ui/scroll-area.js +45 -0
  89. package/dist/esm/ui/tabs.js +39 -0
  90. package/dist/esm/worker.js +50 -0
  91. package/dist/index.d.ts +38 -0
  92. package/package.json +85 -0
  93. package/server/README.md +203 -0
  94. package/server/db.js +142 -0
  95. package/server/server.js +165 -0
@@ -0,0 +1,640 @@
1
+ import { slicedToArray as _slicedToArray, objectSpread2 as _objectSpread2, defineProperty as _defineProperty, toConsumableArray as _toConsumableArray, createForOfIteratorHelper as _createForOfIteratorHelper } from './_virtual/_rollupPluginBabelHelpers.js';
2
+ import { jsx, jsxs, Fragment } from 'react/jsx-runtime';
3
+ import { useState, useEffect, useCallback, useMemo } from 'react';
4
+ import { VscJson } from 'react-icons/vsc';
5
+ import { FaFileDownload, FaChartBar } from './node_modules/react-icons/fa/index.esm.js';
6
+ import Table from './Table.js';
7
+ import EditableContent from './EditableContent.js';
8
+ import { calculateDifferences } from './accuracyMetrics.js';
9
+
10
+ // Utility function to clean HTML content
11
+ var cleanHtml = function cleanHtml(html) {
12
+ if (!html) return "";
13
+ // Remove newline characters
14
+ return html.replace(/\n/g, " ").replace(/<br\s*\/?>/g, " ")
15
+ // Remove consecutive spaces
16
+ .replace(/\s+/g, " ")
17
+ // Trim leading and trailing spaces
18
+ .trim();
19
+ };
20
+ function JsonModal(_ref) {
21
+ var isOpen = _ref.isOpen,
22
+ onClose = _ref.onClose,
23
+ data = _ref.data;
24
+ if (!isOpen) return null;
25
+ return jsx("div", {
26
+ className: "fixed inset-0 bg-black bg-opacity-50 flex items-center justify-center z-50",
27
+ children: jsxs("div", {
28
+ className: "bg-white rounded-lg p-6 max-w-2xl w-full max-h-[80vh] overflow-auto",
29
+ children: [jsxs("div", {
30
+ className: "flex justify-between items-center mb-4",
31
+ children: [jsx("h3", {
32
+ className: "text-lg font-semibold",
33
+ children: "JSON Data"
34
+ }), jsx("button", {
35
+ onClick: onClose,
36
+ className: "text-gray-500 hover:text-gray-700",
37
+ children: "\u2715"
38
+ })]
39
+ }), jsx("pre", {
40
+ className: "bg-gray-100 p-4 rounded-lg overflow-auto",
41
+ children: JSON.stringify(data, null, 2)
42
+ })]
43
+ })
44
+ });
45
+ }
46
+ function AnalyticsModal(_ref2) {
47
+ var isOpen = _ref2.isOpen,
48
+ onClose = _ref2.onClose,
49
+ metrics = _ref2.metrics;
50
+ if (!isOpen) return null;
51
+ return jsx("div", {
52
+ className: "fixed inset-0 bg-black bg-opacity-50 flex items-center justify-center z-50",
53
+ children: jsxs("div", {
54
+ className: "bg-white rounded-lg p-6 max-w-2xl w-full max-h-[80vh] overflow-auto",
55
+ children: [jsxs("div", {
56
+ className: "flex justify-between items-center mb-4",
57
+ children: [jsx("h3", {
58
+ className: "text-lg font-semibold",
59
+ children: "Analytics & Metrics"
60
+ }), jsx("button", {
61
+ onClick: onClose,
62
+ className: "text-gray-500 hover:text-gray-700",
63
+ children: "\u2715"
64
+ })]
65
+ }), jsxs("div", {
66
+ className: "space-y-4",
67
+ children: [jsxs("div", {
68
+ className: "bg-gray-50 p-4 rounded-lg",
69
+ children: [jsx("h4", {
70
+ className: "font-medium mb-2",
71
+ children: "Overall Accuracy"
72
+ }), jsx("div", {
73
+ className: "text-3xl font-bold mb-2",
74
+ children: jsxs("span", {
75
+ className: "".concat(metrics.accuracyScore > 90 ? "text-green-600" : metrics.accuracyScore > 70 ? "text-yellow-600" : "text-red-600"),
76
+ children: [metrics.accuracyScore.toFixed(2), "%"]
77
+ })
78
+ })]
79
+ }), jsxs("div", {
80
+ className: "grid grid-cols-3 gap-4",
81
+ children: [jsxs("div", {
82
+ className: "bg-blue-50 p-4 rounded-lg",
83
+ children: [jsx("h4", {
84
+ className: "text-sm text-blue-700",
85
+ children: "Total Changes"
86
+ }), jsx("p", {
87
+ className: "text-2xl font-bold",
88
+ children: metrics.totalChanges
89
+ })]
90
+ }), jsxs("div", {
91
+ className: "bg-green-50 p-4 rounded-lg",
92
+ children: [jsx("h4", {
93
+ className: "text-sm text-green-700",
94
+ children: "Characters Edited"
95
+ }), jsx("p", {
96
+ className: "text-2xl font-bold",
97
+ children: metrics.totalCharactersEdited
98
+ })]
99
+ }), jsxs("div", {
100
+ className: "bg-purple-50 p-4 rounded-lg",
101
+ children: [jsx("h4", {
102
+ className: "text-sm text-purple-700",
103
+ children: "Words Edited"
104
+ }), jsx("p", {
105
+ className: "text-2xl font-bold",
106
+ children: metrics.totalWordsEdited
107
+ })]
108
+ })]
109
+ }), Object.entries(metrics.blockTypeStats).length > 0 && jsxs("div", {
110
+ className: "mt-6",
111
+ children: [jsx("h4", {
112
+ className: "font-medium mb-3",
113
+ children: "Changes by Block Type"
114
+ }), jsx("div", {
115
+ className: "space-y-2",
116
+ children: Object.entries(metrics.blockTypeStats).map(function (_ref3) {
117
+ var _ref4 = _slicedToArray(_ref3, 2),
118
+ type = _ref4[0],
119
+ stats = _ref4[1];
120
+ return jsxs("div", {
121
+ className: "bg-gray-50 p-3 rounded-lg",
122
+ children: [jsxs("div", {
123
+ className: "flex justify-between items-center",
124
+ children: [jsx("span", {
125
+ className: "font-medium",
126
+ children: type
127
+ }), jsxs("span", {
128
+ className: "text-gray-600",
129
+ children: [stats.changes, " changes"]
130
+ })]
131
+ }), jsxs("div", {
132
+ className: "text-sm text-gray-500",
133
+ children: [stats.charactersEdited, " characters edited"]
134
+ })]
135
+ }, type);
136
+ })
137
+ })]
138
+ })]
139
+ })]
140
+ })
141
+ });
142
+ }
143
+ // Update icon usage in the JSX
144
+ var VscJsonIcon = VscJson;
145
+ var FaFileDownloadIcon = FaFileDownload;
146
+ var FaChartBarIcon = FaChartBar;
147
+ function HtmlViewer(_ref5) {
148
+ var jsonData = _ref5.jsonData,
149
+ selectedBboxId = _ref5.selectedBboxId,
150
+ isLoading = _ref5.isLoading,
151
+ onNodeClick = _ref5.onNodeClick,
152
+ onSave = _ref5.onSave;
153
+ var _useState = useState(jsonData),
154
+ _useState2 = _slicedToArray(_useState, 2),
155
+ editedData = _useState2[0],
156
+ setEditedData = _useState2[1];
157
+ var _useState3 = useState(false),
158
+ _useState4 = _slicedToArray(_useState3, 2),
159
+ hasChanges = _useState4[0],
160
+ setHasChanges = _useState4[1];
161
+ var _useState5 = useState(false),
162
+ _useState6 = _slicedToArray(_useState5, 2),
163
+ isModalOpen = _useState6[0],
164
+ setIsModalOpen = _useState6[1];
165
+ var _useState7 = useState(null),
166
+ _useState8 = _slicedToArray(_useState7, 2),
167
+ modalData = _useState8[0],
168
+ setModalData = _useState8[1];
169
+ var _useState9 = useState(true),
170
+ _useState0 = _slicedToArray(_useState9, 2),
171
+ showJsonIcons = _useState0[0],
172
+ setShowJsonIcons = _useState0[1];
173
+ var _useState1 = useState("Show JSON"),
174
+ _useState10 = _slicedToArray(_useState1, 2),
175
+ activeFormat = _useState10[0],
176
+ setActiveFormat = _useState10[1];
177
+ var _useState11 = useState({
178
+ totalChanges: 0,
179
+ totalCharactersEdited: 0,
180
+ totalWordsEdited: 0,
181
+ accuracyScore: 100,
182
+ changeHistory: [],
183
+ blockTypeStats: {}
184
+ }),
185
+ _useState12 = _slicedToArray(_useState11, 2),
186
+ accuracyMetrics = _useState12[0],
187
+ setAccuracyMetrics = _useState12[1];
188
+ var _useState13 = useState(false),
189
+ _useState14 = _slicedToArray(_useState13, 2),
190
+ isAnalyticsOpen = _useState14[0],
191
+ setIsAnalyticsOpen = _useState14[1];
192
+ useEffect(function () {
193
+ // Reset state when jsonData changes
194
+ // console.log("HtmlViewer received new jsonData");
195
+ setEditedData(jsonData);
196
+ setHasChanges(false); // Reset changes flag
197
+ setAccuracyMetrics({
198
+ totalChanges: 0,
199
+ totalCharactersEdited: 0,
200
+ totalWordsEdited: 0,
201
+ accuracyScore: 100,
202
+ changeHistory: [],
203
+ blockTypeStats: {}
204
+ });
205
+ }, [jsonData]);
206
+ var handleJsonClick = function handleJsonClick(node) {
207
+ setModalData(node);
208
+ setIsModalOpen(true);
209
+ };
210
+ var updateAccuracyMetrics = useCallback(function (nodeId, blockType, originalContent, newContent) {
211
+ var differences = calculateDifferences(originalContent, newContent);
212
+ var change = {
213
+ id: nodeId,
214
+ blockType: blockType,
215
+ originalContent: originalContent,
216
+ newContent: newContent,
217
+ timestamp: Date.now(),
218
+ charactersDifferent: differences.charactersDifferent,
219
+ wordsDifferent: differences.wordsDifferent
220
+ };
221
+ setAccuracyMetrics(function (prev) {
222
+ // Update block type stats
223
+ var blockStats = prev.blockTypeStats[blockType] || {
224
+ changes: 0,
225
+ charactersEdited: 0,
226
+ wordsEdited: 0
227
+ };
228
+ var newBlockStats = {
229
+ changes: blockStats.changes + 1,
230
+ charactersEdited: blockStats.charactersEdited + differences.charactersDifferent,
231
+ wordsEdited: blockStats.wordsEdited + differences.wordsDifferent
232
+ };
233
+ // Calculate total original characters
234
+ var totalOriginalCharacters = calculateTotalCharacters(jsonData);
235
+ // Calculate new accuracy score
236
+ var newTotalCharactersEdited = prev.totalCharactersEdited + differences.charactersDifferent;
237
+ var accuracyScore = Math.max(0, 100 - newTotalCharactersEdited / totalOriginalCharacters * 100);
238
+ return {
239
+ totalChanges: prev.totalChanges + 1,
240
+ totalCharactersEdited: newTotalCharactersEdited,
241
+ totalWordsEdited: prev.totalWordsEdited + differences.wordsDifferent,
242
+ accuracyScore: accuracyScore,
243
+ changeHistory: [].concat(_toConsumableArray(prev.changeHistory), [change]),
244
+ blockTypeStats: _objectSpread2(_objectSpread2({}, prev.blockTypeStats), {}, _defineProperty({}, blockType, newBlockStats))
245
+ };
246
+ });
247
+ }, [jsonData]);
248
+ var handleContentChange = useCallback(function (nodeId, newContent) {
249
+ setHasChanges(true);
250
+ setEditedData(function (prevData) {
251
+ var newData = JSON.parse(JSON.stringify(prevData));
252
+ var _updateNode = function updateNode(node) {
253
+ if (!node) return false;
254
+ if (node.id === nodeId) {
255
+ var originalContent = node.html || "";
256
+ var blockType = node.block_type || "unknown";
257
+ // Update accuracy metrics before changing content
258
+ updateAccuracyMetrics(nodeId, blockType, originalContent, newContent);
259
+ node.html = newContent;
260
+ return true;
261
+ }
262
+ if (node.children && Array.isArray(node.children)) {
263
+ var _iterator = _createForOfIteratorHelper(node.children),
264
+ _step;
265
+ try {
266
+ for (_iterator.s(); !(_step = _iterator.n()).done;) {
267
+ var child = _step.value;
268
+ if (_updateNode(child)) return true;
269
+ }
270
+ } catch (err) {
271
+ _iterator.e(err);
272
+ } finally {
273
+ _iterator.f();
274
+ }
275
+ }
276
+ return false;
277
+ };
278
+ _updateNode(newData);
279
+ return newData;
280
+ });
281
+ }, [updateAccuracyMetrics]);
282
+ var handleDownload = function handleDownload() {
283
+ if (!editedData) return;
284
+ var jsonString = JSON.stringify(editedData, null, 2);
285
+ var blob = new Blob([jsonString], {
286
+ type: "application/json"
287
+ });
288
+ var url = URL.createObjectURL(blob);
289
+ var a = document.createElement("a");
290
+ a.href = url;
291
+ a.download = "updated_bio.json";
292
+ document.body.appendChild(a);
293
+ a.click();
294
+ document.body.removeChild(a);
295
+ URL.revokeObjectURL(url);
296
+ };
297
+ // Function to get the appropriate HTML content based on settings
298
+ var getHtmlContent = function getHtmlContent(node) {
299
+ // For individual nodes in the tree view, just return their HTML directly
300
+ // This preserves the node structure and click interactivity
301
+ // getBlockHtml/jsonToHtml should only be used for full page rendering/export
302
+ return cleanHtml(node.html || '');
303
+ };
304
+ var mergedTablesMap = useMemo(function () {
305
+ var map = new Map();
306
+ var _findTables = function findTables(node) {
307
+ if (!node) return;
308
+ if (node.block_type === "Table" && node.merged_table_id) {
309
+ console.log("Found table with merged_table_id:", node.merged_table_id, "ID:", node.id);
310
+ var tables = map.get(node.merged_table_id) || [];
311
+ tables.push(node);
312
+ map.set(node.merged_table_id, tables);
313
+ }
314
+ if (node.children) {
315
+ node.children.forEach(_findTables);
316
+ }
317
+ };
318
+ if (jsonData) {
319
+ _findTables(jsonData);
320
+ }
321
+ return map;
322
+ }, [jsonData]);
323
+ useEffect(function () {
324
+ // console.log(`[HtmlViewer] Scroll effect triggered. selectedBboxId: ${selectedBboxId}`);
325
+ if (selectedBboxId) {
326
+ var elementToScroll = document.getElementById(selectedBboxId);
327
+ // console.log(`[HtmlViewer] Attempt 1: document.getElementById('${selectedBboxId}') result:`, elementToScroll);
328
+ if (!elementToScroll && editedData) {
329
+ // console.log(`[HtmlViewer] Element ${selectedBboxId} not found directly. Proceeding to check if it's part of a merged table.`);
330
+ var _findNodeByIdRecursive = function findNodeByIdRecursive(nodes, id) {
331
+ var _iterator2 = _createForOfIteratorHelper(nodes),
332
+ _step2;
333
+ try {
334
+ for (_iterator2.s(); !(_step2 = _iterator2.n()).done;) {
335
+ var node = _step2.value;
336
+ if (node.id === id) return node;
337
+ if (node.children) {
338
+ var foundInChildren = _findNodeByIdRecursive(node.children, id);
339
+ if (foundInChildren) return foundInChildren;
340
+ }
341
+ }
342
+ } catch (err) {
343
+ _iterator2.e(err);
344
+ } finally {
345
+ _iterator2.f();
346
+ }
347
+ return null;
348
+ };
349
+ var findNodeInDocument = function findNodeInDocument(documentData, id) {
350
+ if (!documentData) {
351
+ // console.log("[HtmlViewer] findNodeInDocument: documentData is null/undefined.");
352
+ return null;
353
+ }
354
+ if (documentData.id === id) return documentData; // Check root node
355
+ if (documentData.children) {
356
+ return _findNodeByIdRecursive(documentData.children, id);
357
+ }
358
+ // console.log("[HtmlViewer] findNodeInDocument: documentData has no children to search.");
359
+ return null;
360
+ };
361
+ // console.log("[HtmlViewer] Searching for selectedNodeDetails in editedData:", editedData ? 'editedData exists' : 'editedData is null');
362
+ var selectedNodeDetails = findNodeInDocument(editedData, selectedBboxId);
363
+ // console.log(`[HtmlViewer] selectedNodeDetails for ID ${selectedBboxId}:`, selectedNodeDetails);
364
+ if (selectedNodeDetails && selectedNodeDetails.block_type === "Table" && selectedNodeDetails.merged_table_id) {
365
+ // console.log(`[HtmlViewer] Confirmed: Selected node ${selectedBboxId} (type: ${selectedNodeDetails.block_type}) is part of merged table group ${selectedNodeDetails.merged_table_id}.`);
366
+ var tablesInGroup = mergedTablesMap.get(selectedNodeDetails.merged_table_id);
367
+ // console.log(`[HtmlViewer] Tables in merged group ${selectedNodeDetails.merged_table_id} from mergedTablesMap:`, tablesInGroup);
368
+ if (tablesInGroup && tablesInGroup.length > 0) {
369
+ var primaryTableId = tablesInGroup[0].id; // The first table in the map is the one rendered
370
+ // console.log(`[HtmlViewer] Primary table ID for this group is ${primaryTableId}. Attempting to get this element.`);
371
+ elementToScroll = document.getElementById(primaryTableId);
372
+ // console.log(`[HtmlViewer] Attempt 2 (merged table): document.getElementById('${primaryTableId}') result:`, elementToScroll);
373
+ if (!elementToScroll) {
374
+ console.warn("[HtmlViewer] Primary table element ".concat(primaryTableId, " (for merged group) also not found in DOM."));
375
+ }
376
+ } else {
377
+ console.warn("[HtmlViewer] No tables found in mergedTablesMap for group ".concat(selectedNodeDetails.merged_table_id, ". Cannot find primary table to scroll."));
378
+ }
379
+ } else if (selectedNodeDetails) {
380
+ console.log("[HtmlViewer] Selected node ".concat(selectedBboxId, " is type ").concat(selectedNodeDetails.block_type, ". It's not a merged table or 'merged_table_id' is missing."));
381
+ } else {
382
+ console.log("[HtmlViewer] Details for selected node ".concat(selectedBboxId, " not found within the 'editedData' structure. Cannot determine if it's part of a merged table."));
383
+ }
384
+ }
385
+ if (elementToScroll) {
386
+ // console.log(`[HtmlViewer] SUCCESS: Scheduling scroll to element with ID: ${elementToScroll.id} using block: 'nearest'`);
387
+ setTimeout(function () {
388
+ var el = document.getElementById(elementToScroll.id); // Re-fetch element in timeout
389
+ if (el) {
390
+ // console.log(`[HtmlViewer] Timeout: Scrolling to element ${el.id} (align to top)`);
391
+ el.scrollIntoView(true); // Align to top, auto behavior
392
+ } else {
393
+ console.warn("[HtmlViewer] Timeout: Element ".concat(elementToScroll.id, " not found at time of scroll."));
394
+ }
395
+ }, 0); // Small delay to allow DOM to update
396
+ } else {
397
+ console.warn("[HtmlViewer] FINAL: Element with ID ".concat(selectedBboxId, " (or its primary merged table, if applicable) was NOT found in the DOM. No scrolling will occur."));
398
+ }
399
+ } else {
400
+ console.log("[HtmlViewer] Scroll effect: No selectedBboxId, so no scrolling action taken.");
401
+ }
402
+ }, [selectedBboxId, editedData, mergedTablesMap]); // Added editedData and mergedTablesMap
403
+ useMemo(function () {
404
+ var map = new Map();
405
+ var _findHeaders = function findHeaders(node) {
406
+ if (!node) return;
407
+ if (node.block_type === "Table" && node.merged_table_id) {
408
+ var currentNode = node;
409
+ while (currentNode.next) {
410
+ currentNode = currentNode.next;
411
+ if (currentNode.block_type === "Text" && currentNode.html.includes("BioprocessAl")) {
412
+ map.set(node.merged_table_id, currentNode);
413
+ break;
414
+ }
415
+ }
416
+ }
417
+ if (node.children) {
418
+ node.children.forEach(_findHeaders);
419
+ }
420
+ };
421
+ if (jsonData) {
422
+ _findHeaders(jsonData);
423
+ }
424
+ return map;
425
+ }, [jsonData]);
426
+ if (isLoading) {
427
+ return jsx("div", {
428
+ className: "flex items-center justify-center h-full",
429
+ children: jsxs("div", {
430
+ className: "animate-pulse flex flex-col items-center gap-4",
431
+ children: [jsx("div", {
432
+ className: "h-8 w-8 border-4 border-blue-500 border-t-transparent rounded-full animate-spin"
433
+ }), jsx("p", {
434
+ className: "text-gray-600",
435
+ children: "Processing PDF..."
436
+ })]
437
+ })
438
+ });
439
+ }
440
+ if (!editedData) {
441
+ return jsx("p", {
442
+ className: "text-center p-4",
443
+ children: "Upload a PDF and generate JSON to view content"
444
+ });
445
+ }
446
+ var _renderHtmlContent = function renderHtmlContent(node) {
447
+ var _a;
448
+ if (!node) return null;
449
+ var isSelected = node.id === selectedBboxId;
450
+ var hasChildren = node.children && node.children.length > 0;
451
+ var isTable = ((_a = node.block_type) === null || _a === void 0 ? void 0 : _a.toLowerCase()) === "table";
452
+ var isTableOfContents = node.block_type === "TableOfContents";
453
+ var isText = node.block_type === "Text";
454
+ var isHandwritten = node.block_type === "Handwriting";
455
+ var isSectionHeader = node.block_type === "SectionHeader";
456
+ var isPage = node.block_type === "Page";
457
+ // Get the appropriate HTML content
458
+ var htmlContent = getHtmlContent(node);
459
+ var isHeading = htmlContent && (htmlContent.startsWith("<h1") || htmlContent.startsWith("<h2") || htmlContent.startsWith("<h3") || htmlContent.startsWith("<h4"));
460
+ // Skip tables that are part of a merged group but aren't the first table
461
+ if (isTable && node.merged_table_id) {
462
+ var tables = mergedTablesMap.get(node.merged_table_id) || [];
463
+ if (tables.length > 0 && tables[0].id !== node.id) {
464
+ // Return null to completely skip rendering this table
465
+ return null;
466
+ }
467
+ }
468
+ // Handle click on content
469
+ var handleContentClick = function handleContentClick(e) {
470
+ // Check if the node has a valid ID that could correspond to a bbox in the PDF
471
+ // Only process direct clicks, not bubbled events from child elements
472
+ if (e.target === e.currentTarget && node.id && onNodeClick && !isPage) {
473
+ console.log("HTML content clicked:", node.id);
474
+ onNodeClick(node.id);
475
+ }
476
+ };
477
+ return jsxs("div", {
478
+ id: node.id,
479
+ className: "p-2 my-1 rounded transition-colors relative group max-w-full overflow-hidden ".concat(isSelected ? "bg-blue-100 border-2 border-blue-500" : "", " ").concat(onNodeClick && !isPage ? "cursor-pointer hover:bg-gray-50" : ""),
480
+ onClick: isPage ? undefined : handleContentClick,
481
+ children: [!isPage && showJsonIcons && jsx("button", {
482
+ onClick: function onClick(e) {
483
+ e.stopPropagation(); // Prevent triggering parent onClick
484
+ handleJsonClick(node);
485
+ },
486
+ className: "absolute right-2 top-2 z-10 ".concat(showJsonIcons ? "opacity-0 group-hover:opacity-100 transition-opacity" : "hidden", " text-gray-500 hover:text-gray-700"),
487
+ title: "View JSON",
488
+ children: jsx(VscJsonIcon, {
489
+ size: 18
490
+ })
491
+ }), isTable || isTableOfContents ? jsx(Table, {
492
+ node: node,
493
+ selectedBboxId: selectedBboxId,
494
+ onJsonClick: handleJsonClick,
495
+ onContentChange: handleContentChange,
496
+ mergedTables: node.merged_table_id ? (mergedTablesMap.get(node.merged_table_id) || []).slice(1) : [],
497
+ hasLlmHtml: !!node.llm_table_html,
498
+ showJsonIcons: showJsonIcons,
499
+ onNodeClick: onNodeClick
500
+ }) : isText || isHandwritten || isSectionHeader ? jsx(EditableContent, {
501
+ id: node.id,
502
+ content: htmlContent,
503
+ onContentChange: handleContentChange,
504
+ isHeading: !!isHeading,
505
+ onNodeClick: onNodeClick && !isPage ? function () {
506
+ return onNodeClick(node.id);
507
+ } : undefined
508
+ }) : jsxs(Fragment, {
509
+ children: [jsx("div", {
510
+ className: "prose max-w-none w-full overflow-hidden break-words",
511
+ dangerouslySetInnerHTML: {
512
+ __html: htmlContent
513
+ }
514
+ }), hasChildren && jsx("div", {
515
+ className: "ml-4 mt-2 border-l-2 border-gray-200 pl-4 max-w-full overflow-hidden",
516
+ children: node.children.map(function (child) {
517
+ return _renderHtmlContent(child);
518
+ })
519
+ })]
520
+ })]
521
+ }, node.id);
522
+ };
523
+ var getAllNodes = function getAllNodes(data) {
524
+ if (!data) return [];
525
+ // Get all children
526
+ var children = data.children || [];
527
+ // Filter out tables that should be hidden (part of a merge but not the first one)
528
+ return children.filter(function (node) {
529
+ if (node.block_type === "Table" && node.merged_table_id) {
530
+ var tables = mergedTablesMap.get(node.merged_table_id) || [];
531
+ // Keep only if this is the first table in the merge group
532
+ return tables.length === 0 || tables[0].id === node.id;
533
+ }
534
+ return true; // Keep all other nodes
535
+ });
536
+ };
537
+ var allNodes = getAllNodes(editedData);
538
+ // Update the header section to include both buttons
539
+ var renderHeader = function renderHeader() {
540
+ return jsxs("div", {
541
+ className: "sticky top-0 z-20 bg-white border-b border-gray-200 p-4 flex justify-between items-center flex-shrink-0",
542
+ children: [jsx("div", {
543
+ className: "flex gap-2",
544
+ children: hasChanges && jsxs(Fragment, {
545
+ children: [onSave && jsxs("button", {
546
+ onClick: function onClick() {
547
+ return onSave(editedData);
548
+ },
549
+ className: "inline-flex items-center gap-2 px-4 py-2 bg-green-600 text-white rounded-md hover:bg-green-700 focus:outline-none focus:ring-2 focus:ring-green-500 focus:ring-offset-2",
550
+ title: "Save Changes",
551
+ children: [jsx(FaFileDownloadIcon, {
552
+ size: 18
553
+ }), "Save"]
554
+ }), jsxs("button", {
555
+ onClick: handleDownload,
556
+ className: "inline-flex items-center gap-2 px-4 py-2 bg-blue-600 text-white rounded-md hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:ring-offset-2",
557
+ title: "Download Updated JSON",
558
+ children: [jsx(FaFileDownloadIcon, {
559
+ size: 18
560
+ }), "Download"]
561
+ })]
562
+ })
563
+ }), jsxs("div", {
564
+ className: "flex gap-2 items-center",
565
+ children: [jsx("button", {
566
+ onClick: function onClick() {
567
+ return setIsAnalyticsOpen(true);
568
+ },
569
+ className: "inline-flex items-center gap-2 px-4 py-2 bg-green-600 text-white rounded-md hover:bg-green-700 focus:outline-none focus:ring-2 focus:ring-green-500 focus:ring-offset-2",
570
+ title: "View Analytics",
571
+ children: jsx(FaChartBarIcon, {
572
+ size: 18
573
+ })
574
+ }), jsxs("div", {
575
+ className: "flex bg-gray-100 rounded-lg p-1",
576
+ children: [jsx("button", {
577
+ onClick: function onClick() {
578
+ setShowJsonIcons(true);
579
+ setActiveFormat("Show JSON");
580
+ },
581
+ className: "py-1.5 px-3 text-sm font-medium rounded-md transition-colors whitespace-nowrap ".concat(activeFormat === "Show JSON" ? "bg-white text-gray-900 shadow-sm" : "text-gray-600 hover:text-gray-900"),
582
+ children: "Show JSON"
583
+ }), jsx("button", {
584
+ onClick: function onClick() {
585
+ setShowJsonIcons(false);
586
+ setActiveFormat("Hide JSON");
587
+ },
588
+ className: "py-1.5 px-3 text-sm font-medium rounded-md transition-colors whitespace-nowrap ".concat(activeFormat === "Hide JSON" ? "bg-white text-gray-900 shadow-sm" : "text-gray-600 hover:text-gray-900"),
589
+ children: "Hide JSON"
590
+ })]
591
+ })]
592
+ })]
593
+ });
594
+ };
595
+ return jsxs("div", {
596
+ className: "w-full h-full max-w-full flex flex-col overflow-hidden",
597
+ children: [renderHeader(), jsx("div", {
598
+ className: "flex-1 overflow-auto min-h-0 max-w-full",
599
+ children: jsx("div", {
600
+ className: "p-4 max-w-full",
601
+ children: allNodes.map(function (node) {
602
+ return jsx("div", {
603
+ className: "w-full max-w-full overflow-hidden break-words",
604
+ children: _renderHtmlContent(node)
605
+ }, node.id);
606
+ })
607
+ })
608
+ }), jsx(AnalyticsModal, {
609
+ isOpen: isAnalyticsOpen,
610
+ onClose: function onClose() {
611
+ return setIsAnalyticsOpen(false);
612
+ },
613
+ metrics: accuracyMetrics
614
+ }), jsx(JsonModal, {
615
+ isOpen: isModalOpen,
616
+ onClose: function onClose() {
617
+ return setIsModalOpen(false);
618
+ },
619
+ data: modalData
620
+ })]
621
+ });
622
+ }
623
+ // Utility function to calculate total characters in the document
624
+ function calculateTotalCharacters(data) {
625
+ if (!data) return 0;
626
+ var total = 0;
627
+ var _countCharacters = function countCharacters(node) {
628
+ if (node.html) {
629
+ var cleanText = node.html.replace(/<[^>]*>/g, "").trim();
630
+ total += cleanText.length;
631
+ }
632
+ if (node.children && Array.isArray(node.children)) {
633
+ node.children.forEach(_countCharacters);
634
+ }
635
+ };
636
+ _countCharacters(data);
637
+ return total;
638
+ }
639
+
640
+ export { HtmlViewer as default };
@@ -0,0 +1,21 @@
1
+ import 'react/jsx-runtime';
2
+ import { lazy } from 'react';
3
+ import './lib/polyfills.js';
4
+
5
+ // Lazy load the PDF components
6
+ var Document = /*#__PURE__*/lazy(function () {
7
+ return import('react-pdf').then(function (mod) {
8
+ return {
9
+ default: mod.Document
10
+ };
11
+ });
12
+ });
13
+ var Page = /*#__PURE__*/lazy(function () {
14
+ return import('react-pdf').then(function (mod) {
15
+ return {
16
+ default: mod.Page
17
+ };
18
+ });
19
+ });
20
+
21
+ export { Document, Page };