natural-pdf 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs/api/index.md +386 -0
- docs/assets/favicon.png +3 -0
- docs/assets/favicon.svg +3 -0
- docs/assets/javascripts/custom.js +17 -0
- docs/assets/logo.svg +3 -0
- docs/assets/sample-screen.png +0 -0
- docs/assets/social-preview.png +17 -0
- docs/assets/social-preview.svg +17 -0
- docs/assets/stylesheets/custom.css +65 -0
- docs/document-qa/index.ipynb +435 -0
- docs/document-qa/index.md +79 -0
- docs/element-selection/index.ipynb +915 -0
- docs/element-selection/index.md +229 -0
- docs/index.md +170 -0
- docs/installation/index.md +69 -0
- docs/interactive-widget/index.ipynb +962 -0
- docs/interactive-widget/index.md +12 -0
- docs/layout-analysis/index.ipynb +818 -0
- docs/layout-analysis/index.md +185 -0
- docs/ocr/index.md +209 -0
- docs/pdf-navigation/index.ipynb +314 -0
- docs/pdf-navigation/index.md +97 -0
- docs/regions/index.ipynb +816 -0
- docs/regions/index.md +294 -0
- docs/tables/index.ipynb +658 -0
- docs/tables/index.md +144 -0
- docs/text-analysis/index.ipynb +370 -0
- docs/text-analysis/index.md +105 -0
- docs/text-extraction/index.ipynb +1478 -0
- docs/text-extraction/index.md +292 -0
- docs/tutorials/01-loading-and-extraction.ipynb +1710 -0
- docs/tutorials/01-loading-and-extraction.md +95 -0
- docs/tutorials/02-finding-elements.ipynb +340 -0
- docs/tutorials/02-finding-elements.md +149 -0
- docs/tutorials/03-extracting-blocks.ipynb +147 -0
- docs/tutorials/03-extracting-blocks.md +48 -0
- docs/tutorials/04-table-extraction.ipynb +114 -0
- docs/tutorials/04-table-extraction.md +50 -0
- docs/tutorials/05-excluding-content.ipynb +270 -0
- docs/tutorials/05-excluding-content.md +109 -0
- docs/tutorials/06-document-qa.ipynb +332 -0
- docs/tutorials/06-document-qa.md +91 -0
- docs/tutorials/07-layout-analysis.ipynb +288 -0
- docs/tutorials/07-layout-analysis.md +66 -0
- docs/tutorials/07-working-with-regions.ipynb +413 -0
- docs/tutorials/07-working-with-regions.md +151 -0
- docs/tutorials/08-spatial-navigation.ipynb +508 -0
- docs/tutorials/08-spatial-navigation.md +190 -0
- docs/tutorials/09-section-extraction.ipynb +2434 -0
- docs/tutorials/09-section-extraction.md +256 -0
- docs/tutorials/10-form-field-extraction.ipynb +512 -0
- docs/tutorials/10-form-field-extraction.md +201 -0
- docs/tutorials/11-enhanced-table-processing.ipynb +54 -0
- docs/tutorials/11-enhanced-table-processing.md +9 -0
- docs/tutorials/12-ocr-integration.ipynb +604 -0
- docs/tutorials/12-ocr-integration.md +175 -0
- docs/tutorials/13-semantic-search.ipynb +1328 -0
- docs/tutorials/13-semantic-search.md +77 -0
- docs/visual-debugging/index.ipynb +2970 -0
- docs/visual-debugging/index.md +157 -0
- docs/visual-debugging/region.png +0 -0
- natural_pdf/__init__.py +50 -33
- natural_pdf/analyzers/__init__.py +2 -1
- natural_pdf/analyzers/layout/base.py +32 -24
- natural_pdf/analyzers/layout/docling.py +131 -72
- natural_pdf/analyzers/layout/gemini.py +264 -0
- natural_pdf/analyzers/layout/layout_analyzer.py +156 -113
- natural_pdf/analyzers/layout/layout_manager.py +125 -58
- natural_pdf/analyzers/layout/layout_options.py +43 -17
- natural_pdf/analyzers/layout/paddle.py +152 -95
- natural_pdf/analyzers/layout/surya.py +164 -92
- natural_pdf/analyzers/layout/tatr.py +149 -84
- natural_pdf/analyzers/layout/yolo.py +89 -45
- natural_pdf/analyzers/text_options.py +22 -15
- natural_pdf/analyzers/text_structure.py +131 -85
- natural_pdf/analyzers/utils.py +30 -23
- natural_pdf/collections/pdf_collection.py +146 -97
- natural_pdf/core/__init__.py +1 -1
- natural_pdf/core/element_manager.py +419 -337
- natural_pdf/core/highlighting_service.py +268 -196
- natural_pdf/core/page.py +1044 -521
- natural_pdf/core/pdf.py +516 -313
- natural_pdf/elements/__init__.py +1 -1
- natural_pdf/elements/base.py +307 -225
- natural_pdf/elements/collections.py +805 -543
- natural_pdf/elements/line.py +39 -36
- natural_pdf/elements/rect.py +32 -30
- natural_pdf/elements/region.py +889 -879
- natural_pdf/elements/text.py +127 -99
- natural_pdf/exporters/__init__.py +0 -1
- natural_pdf/exporters/searchable_pdf.py +261 -102
- natural_pdf/ocr/__init__.py +57 -35
- natural_pdf/ocr/engine.py +150 -46
- natural_pdf/ocr/engine_easyocr.py +146 -150
- natural_pdf/ocr/engine_paddle.py +118 -175
- natural_pdf/ocr/engine_surya.py +78 -141
- natural_pdf/ocr/ocr_factory.py +114 -0
- natural_pdf/ocr/ocr_manager.py +122 -124
- natural_pdf/ocr/ocr_options.py +16 -20
- natural_pdf/ocr/utils.py +98 -0
- natural_pdf/qa/__init__.py +1 -1
- natural_pdf/qa/document_qa.py +119 -111
- natural_pdf/search/__init__.py +37 -31
- natural_pdf/search/haystack_search_service.py +312 -189
- natural_pdf/search/haystack_utils.py +186 -122
- natural_pdf/search/search_options.py +25 -14
- natural_pdf/search/search_service_protocol.py +12 -6
- natural_pdf/search/searchable_mixin.py +261 -176
- natural_pdf/selectors/__init__.py +2 -1
- natural_pdf/selectors/parser.py +159 -316
- natural_pdf/templates/__init__.py +1 -1
- natural_pdf/templates/spa/css/style.css +334 -0
- natural_pdf/templates/spa/index.html +31 -0
- natural_pdf/templates/spa/js/app.js +472 -0
- natural_pdf/templates/spa/words.txt +235976 -0
- natural_pdf/utils/debug.py +32 -0
- natural_pdf/utils/highlighting.py +8 -2
- natural_pdf/utils/identifiers.py +29 -0
- natural_pdf/utils/packaging.py +418 -0
- natural_pdf/utils/reading_order.py +65 -63
- natural_pdf/utils/text_extraction.py +195 -0
- natural_pdf/utils/visualization.py +70 -61
- natural_pdf/widgets/__init__.py +2 -3
- natural_pdf/widgets/viewer.py +749 -718
- {natural_pdf-0.1.4.dist-info → natural_pdf-0.1.6.dist-info}/METADATA +53 -17
- natural_pdf-0.1.6.dist-info/RECORD +141 -0
- {natural_pdf-0.1.4.dist-info → natural_pdf-0.1.6.dist-info}/WHEEL +1 -1
- natural_pdf-0.1.6.dist-info/top_level.txt +4 -0
- notebooks/Examples.ipynb +1293 -0
- pdfs/.gitkeep +0 -0
- pdfs/01-practice.pdf +543 -0
- pdfs/0500000US42001.pdf +0 -0
- pdfs/0500000US42007.pdf +0 -0
- pdfs/2014 Statistics.pdf +0 -0
- pdfs/2019 Statistics.pdf +0 -0
- pdfs/Atlanta_Public_Schools_GA_sample.pdf +0 -0
- pdfs/needs-ocr.pdf +0 -0
- natural_pdf/templates/ocr_debug.html +0 -517
- natural_pdf-0.1.4.dist-info/RECORD +0 -61
- natural_pdf-0.1.4.dist-info/top_level.txt +0 -1
- {natural_pdf-0.1.4.dist-info → natural_pdf-0.1.6.dist-info}/licenses/LICENSE +0 -0
natural_pdf/widgets/viewer.py
CHANGED
@@ -1,765 +1,796 @@
|
|
1
1
|
# natural_pdf/widgets/viewer.py
|
2
2
|
|
3
|
-
import
|
4
|
-
from traitlets import Unicode, List, Dict, observe
|
3
|
+
import logging
|
5
4
|
import os
|
6
|
-
import logging # Add logging
|
7
|
-
import json
|
8
|
-
from IPython.display import display, HTML, Javascript
|
9
|
-
import uuid
|
10
|
-
from PIL import Image
|
11
5
|
|
12
6
|
logger = logging.getLogger(__name__)
|
13
7
|
|
14
|
-
#
|
15
|
-
|
16
|
-
|
8
|
+
# Initialize flag and module/class variables to None
|
9
|
+
_IPYWIDGETS_AVAILABLE = False
|
10
|
+
widgets = None
|
11
|
+
SimpleInteractiveViewerWidget = None
|
12
|
+
InteractiveViewerWidget = None
|
17
13
|
|
18
14
|
try:
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
logger.debug(f"Generating HTML with image: {page_image[:30]}... and {len(elements)} elements (using scaled coords)")
|
70
|
-
|
71
|
-
# Create the container div
|
72
|
-
container_html = f"""
|
73
|
-
<div id="{self.widget_id}" class="pdf-viewer" style="position: relative; font-family: Arial, sans-serif;">
|
74
|
-
<div class="toolbar" style="margin-bottom: 10px; padding: 5px; background-color: #f0f0f0; border-radius: 4px;">
|
75
|
-
<button id="{self.widget_id}-zoom-in" style="margin-right: 5px;">Zoom In (+)</button>
|
76
|
-
<button id="{self.widget_id}-zoom-out" style="margin-right: 5px;">Zoom Out (-)</button>
|
77
|
-
<button id="{self.widget_id}-reset-zoom" style="margin-right: 5px;">Reset</button>
|
78
|
-
</div>
|
79
|
-
<div style="display: flex; flex-direction: row;">
|
80
|
-
<div class="pdf-outer-container" style="position: relative; overflow: hidden; border: 1px solid #ccc; flex-grow: 1;">
|
81
|
-
<div id="{self.widget_id}-zoom-pan-container" class="zoom-pan-container" style="position: relative; width: fit-content; height: fit-content; transform-origin: top left; cursor: grab;">
|
82
|
-
<!-- The image is rendered at scale, so its dimensions match scaled coordinates -->
|
83
|
-
<img src="{page_image}" style="display: block; max-width: none; height: auto;" />
|
84
|
-
<div id="{self.widget_id}-elements-layer" class="elements-layer" style="position: absolute; top: 0; left: 0; width: 100%; height: 100%; pointer-events: none;">
|
85
|
-
"""
|
86
|
-
|
87
|
-
# Add SVG overlay layer
|
88
|
-
container_html += f"""
|
89
|
-
</div>
|
90
|
-
<div id="{self.widget_id}-svg-layer" class="svg-layer" style="position: absolute; top: 0; left: 0; width: 100%; height: 100%; pointer-events: none;">
|
91
|
-
<!-- SVG viewport should match the scaled image size -->
|
92
|
-
<svg width="100%" height="100%">
|
93
|
-
"""
|
94
|
-
|
95
|
-
# Add elements and SVG boxes using the SCALED coordinates
|
96
|
-
for i, element in enumerate(elements):
|
97
|
-
element_type = element.get('type', 'unknown')
|
98
|
-
# Use the already scaled coordinates
|
99
|
-
x0 = element.get('x0', 0)
|
100
|
-
y0 = element.get('y0', 0)
|
101
|
-
x1 = element.get('x1', 0)
|
102
|
-
y1 = element.get('y1', 0)
|
103
|
-
|
104
|
-
# Calculate width and height from scaled coords
|
105
|
-
width = x1 - x0
|
106
|
-
height = y1 - y0
|
107
|
-
|
108
|
-
# Create the element div with the right styling based on type
|
109
|
-
# Use scaled coordinates for positioning and dimensions
|
110
|
-
element_style = "position: absolute; pointer-events: auto; cursor: pointer; "
|
111
|
-
element_style += f"left: {x0}px; top: {y0}px; width: {width}px; height: {height}px; "
|
112
|
-
|
113
|
-
# Different styling for different element types
|
114
|
-
if element_type == 'text':
|
115
|
-
element_style += "background-color: rgba(255, 255, 0, 0.3); border: 1px dashed transparent; "
|
116
|
-
elif element_type == 'image':
|
117
|
-
element_style += "background-color: rgba(0, 128, 255, 0.3); border: 1px dashed transparent; "
|
118
|
-
elif element_type == 'figure':
|
119
|
-
element_style += "background-color: rgba(255, 0, 255, 0.3); border: 1px dashed transparent; "
|
120
|
-
elif element_type == 'table':
|
121
|
-
element_style += "background-color: rgba(0, 255, 0, 0.3); border: 1px dashed transparent; "
|
15
|
+
# Attempt to import the core optional dependency
|
16
|
+
import ipywidgets as widgets_imported
|
17
|
+
|
18
|
+
widgets = widgets_imported # Assign to the global name if import succeeds
|
19
|
+
_IPYWIDGETS_AVAILABLE = True
|
20
|
+
logger.debug("Successfully imported ipywidgets. Defining viewer widgets.")
|
21
|
+
|
22
|
+
# --- Dependencies needed ONLY if ipywidgets is available ---
|
23
|
+
import base64
|
24
|
+
import json
|
25
|
+
import uuid
|
26
|
+
from io import BytesIO
|
27
|
+
|
28
|
+
from IPython.display import HTML, Javascript, display
|
29
|
+
from PIL import Image
|
30
|
+
from traitlets import Dict, List, Unicode, observe
|
31
|
+
|
32
|
+
# --- Read JS code from file (only needed if widgets are defined) --- #
|
33
|
+
_MODULE_DIR = os.path.dirname(__file__)
|
34
|
+
_FRONTEND_JS_PATH = os.path.join(_MODULE_DIR, "frontend", "viewer.js")
|
35
|
+
try:
|
36
|
+
with open(_FRONTEND_JS_PATH, "r", encoding="utf-8") as f:
|
37
|
+
_FRONTEND_JS_CODE = f.read()
|
38
|
+
logger.debug(f"Successfully read frontend JS from: {_FRONTEND_JS_PATH}")
|
39
|
+
except FileNotFoundError:
|
40
|
+
logger.error(f"Frontend JS file not found at {_FRONTEND_JS_PATH}. Widget will likely fail.")
|
41
|
+
_FRONTEND_JS_CODE = "console.error('Frontend JS file not found! Widget cannot load.');"
|
42
|
+
except Exception as e:
|
43
|
+
logger.error(f"Error reading frontend JS file {_FRONTEND_JS_PATH}: {e}")
|
44
|
+
_FRONTEND_JS_CODE = f"console.error('Error reading frontend JS file: {e}');"
|
45
|
+
|
46
|
+
# --- Define Widget Classes ONLY if ipywidgets is available ---
|
47
|
+
class SimpleInteractiveViewerWidget(widgets.DOMWidget):
|
48
|
+
def __init__(self, pdf_data=None, **kwargs):
|
49
|
+
"""
|
50
|
+
Create a simple interactive PDF viewer widget.
|
51
|
+
|
52
|
+
Args:
|
53
|
+
pdf_data (dict, optional): Dictionary containing 'page_image', 'elements', etc.
|
54
|
+
**kwargs: Additional parameters including image_uri, elements, etc.
|
55
|
+
"""
|
56
|
+
super().__init__()
|
57
|
+
|
58
|
+
# Support both pdf_data dict and individual kwargs
|
59
|
+
if pdf_data:
|
60
|
+
self.pdf_data = pdf_data
|
61
|
+
# Ensure backward compatibility - if image_uri exists but page_image doesn't
|
62
|
+
if "image_uri" in pdf_data and "page_image" not in pdf_data:
|
63
|
+
self.pdf_data["page_image"] = pdf_data["image_uri"]
|
122
64
|
else:
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
65
|
+
# Check for image_uri in kwargs
|
66
|
+
image_source = kwargs.get("image_uri", "")
|
67
|
+
|
68
|
+
self.pdf_data = {"page_image": image_source, "elements": kwargs.get("elements", [])}
|
69
|
+
|
70
|
+
# Log for debugging
|
71
|
+
logger.debug(f"SimpleInteractiveViewerWidget initialized with widget_id={id(self)}")
|
72
|
+
logger.debug(
|
73
|
+
f"Image source provided: {self.pdf_data.get('page_image', 'None')[:30]}..."
|
74
|
+
)
|
75
|
+
logger.debug(f"Number of elements: {len(self.pdf_data.get('elements', []))}")
|
76
|
+
|
77
|
+
self.widget_id = f"pdf-viewer-{str(uuid.uuid4())[:8]}"
|
78
|
+
self._generate_html()
|
79
|
+
|
80
|
+
def _generate_html(self):
|
81
|
+
"""Generate the HTML for the PDF viewer"""
|
82
|
+
# Extract data - Coordinates in self.pdf_data['elements'] are already scaled
|
83
|
+
page_image = self.pdf_data.get("page_image", "")
|
84
|
+
elements = self.pdf_data.get("elements", [])
|
85
|
+
|
86
|
+
logger.debug(
|
87
|
+
f"Generating HTML with image: {page_image[:30]}... and {len(elements)} elements (using scaled coords)"
|
88
|
+
)
|
89
|
+
|
90
|
+
# Create the container div
|
91
|
+
container_html = f"""
|
92
|
+
<div id="{self.widget_id}" class="pdf-viewer" style="position: relative; font-family: Arial, sans-serif;">
|
93
|
+
<div class="toolbar" style="margin-bottom: 10px; padding: 5px; background-color: #f0f0f0; border-radius: 4px;">
|
94
|
+
<button id="{self.widget_id}-zoom-in" style="margin-right: 5px;">Zoom In (+)</button>
|
95
|
+
<button id="{self.widget_id}-zoom-out" style="margin-right: 5px;">Zoom Out (-)</button>
|
96
|
+
<button id="{self.widget_id}-reset-zoom" style="margin-right: 5px;">Reset</button>
|
97
|
+
</div>
|
98
|
+
<div style="display: flex; flex-direction: row;">
|
99
|
+
<div class="pdf-outer-container" style="position: relative; overflow: hidden; border: 1px solid #ccc; flex-grow: 1;">
|
100
|
+
<div id="{self.widget_id}-zoom-pan-container" class="zoom-pan-container" style="position: relative; width: fit-content; height: fit-content; transform-origin: top left; cursor: grab;">
|
101
|
+
<!-- The image is rendered at scale, so its dimensions match scaled coordinates -->
|
102
|
+
<img src="{page_image}" style="display: block; max-width: none; height: auto;" />
|
103
|
+
<div id="{self.widget_id}-elements-layer" class="elements-layer" style="position: absolute; top: 0; left: 0; width: 100%; height: 100%; pointer-events: none;">
|
128
104
|
"""
|
129
|
-
|
130
|
-
# Add SVG
|
105
|
+
|
106
|
+
# Add SVG overlay layer
|
131
107
|
container_html += f"""
|
132
|
-
|
133
|
-
|
108
|
+
</div>
|
109
|
+
<div id="{self.widget_id}-svg-layer" class="svg-layer" style="position: absolute; top: 0; left: 0; width: 100%; height: 100%; pointer-events: none;">
|
110
|
+
<!-- SVG viewport should match the scaled image size -->
|
111
|
+
<svg width="100%" height="100%">
|
134
112
|
"""
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
113
|
+
|
114
|
+
# Add elements and SVG boxes using the SCALED coordinates
|
115
|
+
for i, element in enumerate(elements):
|
116
|
+
element_type = element.get("type", "unknown")
|
117
|
+
# Use the already scaled coordinates
|
118
|
+
x0 = element.get("x0", 0)
|
119
|
+
y0 = element.get("y0", 0)
|
120
|
+
x1 = element.get("x1", 0)
|
121
|
+
y1 = element.get("y1", 0)
|
122
|
+
|
123
|
+
# Calculate width and height from scaled coords
|
124
|
+
width = x1 - x0
|
125
|
+
height = y1 - y0
|
126
|
+
|
127
|
+
# Create the element div with the right styling based on type
|
128
|
+
# Use scaled coordinates for positioning and dimensions
|
129
|
+
element_style = "position: absolute; pointer-events: auto; cursor: pointer; "
|
130
|
+
element_style += (
|
131
|
+
f"left: {x0}px; top: {y0}px; width: {width}px; height: {height}px; "
|
132
|
+
)
|
133
|
+
|
134
|
+
# Different styling for different element types
|
135
|
+
if element_type == "text":
|
136
|
+
element_style += (
|
137
|
+
"background-color: rgba(255, 255, 0, 0.3); border: 1px dashed transparent; "
|
138
|
+
)
|
139
|
+
elif element_type == "image":
|
140
|
+
element_style += (
|
141
|
+
"background-color: rgba(0, 128, 255, 0.3); border: 1px dashed transparent; "
|
142
|
+
)
|
143
|
+
elif element_type == "figure":
|
144
|
+
element_style += (
|
145
|
+
"background-color: rgba(255, 0, 255, 0.3); border: 1px dashed transparent; "
|
146
|
+
)
|
147
|
+
elif element_type == "table":
|
148
|
+
element_style += (
|
149
|
+
"background-color: rgba(0, 255, 0, 0.3); border: 1px dashed transparent; "
|
150
|
+
)
|
151
|
+
else:
|
152
|
+
element_style += "background-color: rgba(200, 200, 200, 0.3); border: 1px dashed transparent; "
|
153
|
+
|
154
|
+
# Add element div
|
155
|
+
container_html += f"""
|
156
|
+
<div class="pdf-element" data-element-id="{i}" style="{element_style}"></div>
|
157
|
+
"""
|
158
|
+
|
159
|
+
# Add SVG rectangle using scaled coordinates and dimensions
|
160
|
+
container_html += f"""
|
161
|
+
<rect data-element-id="{i}" x="{x0}" y="{y0}" width="{width}" height="{height}"
|
162
|
+
fill="none" stroke="rgba(255, 165, 0, 0.85)" stroke-width="1.5" />
|
163
|
+
"""
|
164
|
+
|
165
|
+
# Close SVG and container divs
|
166
|
+
container_html += f"""
|
167
|
+
</svg>
|
168
|
+
</div>
|
139
169
|
</div>
|
140
170
|
</div>
|
141
|
-
</div>
|
142
|
-
|
143
|
-
<div id="{self.widget_id}-info-panel" class="info-panel" style="display: block; margin-left: 20px; padding: 10px; width: 300px; max-height: 80vh; overflow-y: auto; border: 1px solid #eee; background-color: #f9f9f9;">
|
144
|
-
<h4 style="margin-top: 0; margin-bottom: 5px; border-bottom: 1px solid #ccc; padding-bottom: 5px;">Element Info</h4>
|
145
|
-
<pre id="{self.widget_id}-element-data" style="white-space: pre-wrap; word-break: break-all; font-size: 0.9em;"></pre>
|
146
|
-
</div>
|
147
|
-
|
148
|
-
</div>
|
149
|
-
"""
|
150
|
-
|
151
|
-
# Display the HTML
|
152
|
-
display(HTML(container_html))
|
153
|
-
|
154
|
-
# Generate JavaScript to add interactivity
|
155
|
-
self._add_javascript()
|
156
|
-
|
157
|
-
def _add_javascript(self):
|
158
|
-
"""Add JavaScript to make the viewer interactive"""
|
159
|
-
# Create JavaScript for element selection and SVG highlighting
|
160
|
-
js_code = """
|
161
|
-
(function() {
|
162
|
-
// Store widget ID in a variable to avoid issues with string templates
|
163
|
-
const widgetId = "%s";
|
164
|
-
|
165
|
-
// Initialize PDF viewer registry if it doesn't exist
|
166
|
-
if (!window.pdfViewerRegistry) {
|
167
|
-
window.pdfViewerRegistry = {};
|
168
|
-
}
|
169
|
-
|
170
|
-
// Store PDF data for this widget
|
171
|
-
window.pdfViewerRegistry[widgetId] = {
|
172
|
-
initialData: %s,
|
173
|
-
selectedElement: null,
|
174
|
-
scale: 1.0, // Initial zoom scale
|
175
|
-
translateX: 0, // Initial pan X
|
176
|
-
translateY: 0, // Initial pan Y
|
177
|
-
isDragging: false, // Flag for panning
|
178
|
-
startX: 0, // Drag start X
|
179
|
-
startY: 0, // Drag start Y
|
180
|
-
startTranslateX: 0, // Translate X at drag start
|
181
|
-
startTranslateY: 0, // Translate Y at drag start
|
182
|
-
justDragged: false // Flag to differentiate click from drag completion
|
183
|
-
};
|
184
|
-
|
185
|
-
// Get references to elements
|
186
|
-
const viewerData = window.pdfViewerRegistry[widgetId];
|
187
|
-
const outerContainer = document.querySelector(`#${widgetId} .pdf-outer-container`);
|
188
|
-
const zoomPanContainer = document.getElementById(`${widgetId}-zoom-pan-container`);
|
189
|
-
const elements = zoomPanContainer.querySelectorAll(".pdf-element");
|
190
|
-
const zoomInButton = document.getElementById(`${widgetId}-zoom-in`);
|
191
|
-
const zoomOutButton = document.getElementById(`${widgetId}-zoom-out`);
|
192
|
-
const resetButton = document.getElementById(`${widgetId}-reset-zoom`);
|
193
|
-
|
194
|
-
// --- Helper function to apply transform ---
|
195
|
-
function applyTransform() {
|
196
|
-
zoomPanContainer.style.transform = `translate(${viewerData.translateX}px, ${viewerData.translateY}px) scale(${viewerData.scale})`;
|
197
|
-
}
|
198
|
-
|
199
|
-
// --- Zooming Logic ---
|
200
|
-
function handleZoom(event) {
|
201
|
-
event.preventDefault(); // Prevent default scroll
|
202
171
|
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
172
|
+
<div id="{self.widget_id}-info-panel" class="info-panel" style="display: block; margin-left: 20px; padding: 10px; width: 300px; max-height: 80vh; overflow-y: auto; border: 1px solid #eee; background-color: #f9f9f9;">
|
173
|
+
<h4 style="margin-top: 0; margin-bottom: 5px; border-bottom: 1px solid #ccc; padding-bottom: 5px;">Element Info</h4>
|
174
|
+
<pre id="{self.widget_id}-element-data" style="white-space: pre-wrap; word-break: break-all; font-size: 0.9em;"></pre>
|
175
|
+
</div>
|
207
176
|
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
177
|
+
</div>
|
178
|
+
"""
|
179
|
+
|
180
|
+
# Display the HTML
|
181
|
+
display(HTML(container_html))
|
182
|
+
|
183
|
+
# Generate JavaScript to add interactivity
|
184
|
+
self._add_javascript()
|
185
|
+
|
186
|
+
def _add_javascript(self):
|
187
|
+
"""Add JavaScript to make the viewer interactive"""
|
188
|
+
# Create JavaScript for element selection and SVG highlighting
|
189
|
+
js_code = """
|
190
|
+
(function() {
|
191
|
+
// Store widget ID in a variable to avoid issues with string templates
|
192
|
+
const widgetId = "%s";
|
212
193
|
|
213
|
-
//
|
214
|
-
|
215
|
-
|
194
|
+
// Initialize PDF viewer registry if it doesn't exist
|
195
|
+
if (!window.pdfViewerRegistry) {
|
196
|
+
window.pdfViewerRegistry = {};
|
197
|
+
}
|
216
198
|
|
217
|
-
//
|
218
|
-
|
199
|
+
// Store PDF data for this widget
|
200
|
+
window.pdfViewerRegistry[widgetId] = {
|
201
|
+
initialData: %s,
|
202
|
+
selectedElement: null,
|
203
|
+
scale: 1.0, // Initial zoom scale
|
204
|
+
translateX: 0, // Initial pan X
|
205
|
+
translateY: 0, // Initial pan Y
|
206
|
+
isDragging: false, // Flag for panning
|
207
|
+
startX: 0, // Drag start X
|
208
|
+
startY: 0, // Drag start Y
|
209
|
+
startTranslateX: 0, // Translate X at drag start
|
210
|
+
startTranslateY: 0, // Translate Y at drag start
|
211
|
+
justDragged: false // Flag to differentiate click from drag completion
|
212
|
+
};
|
219
213
|
|
220
|
-
//
|
221
|
-
viewerData
|
222
|
-
|
214
|
+
// Get references to elements
|
215
|
+
const viewerData = window.pdfViewerRegistry[widgetId];
|
216
|
+
const outerContainer = document.querySelector(`#${widgetId} .pdf-outer-container`);
|
217
|
+
const zoomPanContainer = document.getElementById(`${widgetId}-zoom-pan-container`);
|
218
|
+
const elements = zoomPanContainer.querySelectorAll(".pdf-element");
|
219
|
+
const zoomInButton = document.getElementById(`${widgetId}-zoom-in`);
|
220
|
+
const zoomOutButton = document.getElementById(`${widgetId}-zoom-out`);
|
221
|
+
const resetButton = document.getElementById(`${widgetId}-reset-zoom`);
|
223
222
|
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
outerContainer.addEventListener('wheel', handleZoom);
|
228
|
-
|
229
|
-
// --- Panning Logic ---
|
230
|
-
const dragThreshold = 5; // Pixels to move before drag starts
|
231
|
-
|
232
|
-
function handleMouseDown(event) {
|
233
|
-
// Prevent default only if needed (e.g., text selection on image)
|
234
|
-
if (event.target.tagName === 'IMG') {
|
235
|
-
event.preventDefault();
|
236
|
-
}
|
237
|
-
// Allow mousedown events on elements to proceed for potential clicks
|
238
|
-
// Record start position for potential drag
|
239
|
-
viewerData.startX = event.clientX;
|
240
|
-
viewerData.startY = event.clientY;
|
241
|
-
// Store initial translate values to calculate relative movement
|
242
|
-
viewerData.startTranslateX = viewerData.translateX;
|
243
|
-
viewerData.startTranslateY = viewerData.translateY;
|
244
|
-
// Don't set isDragging = true yet
|
245
|
-
// Don't change pointerEvents yet
|
246
|
-
}
|
247
|
-
|
248
|
-
function handleMouseMove(event) {
|
249
|
-
// Check if mouse button is actually down (browser inconsistencies)
|
250
|
-
if (event.buttons !== 1) {
|
251
|
-
if (viewerData.isDragging) {
|
252
|
-
// Force drag end if button is released unexpectedly
|
253
|
-
handleMouseUp(event);
|
254
|
-
}
|
255
|
-
return;
|
256
|
-
}
|
257
|
-
|
258
|
-
const currentX = event.clientX;
|
259
|
-
const currentY = event.clientY;
|
260
|
-
const deltaX = currentX - viewerData.startX;
|
261
|
-
const deltaY = currentY - viewerData.startY;
|
262
|
-
|
263
|
-
// If not already dragging, check if threshold is exceeded
|
264
|
-
if (!viewerData.isDragging) {
|
265
|
-
const movedDistance = Math.hypot(deltaX, deltaY);
|
266
|
-
if (movedDistance > dragThreshold) {
|
267
|
-
viewerData.isDragging = true;
|
268
|
-
zoomPanContainer.style.cursor = 'grabbing';
|
269
|
-
// Now disable pointer events on elements since a drag has started
|
270
|
-
elements.forEach(el => el.style.pointerEvents = 'none');
|
271
|
-
}
|
272
|
-
}
|
273
|
-
|
274
|
-
// If dragging, update transform
|
275
|
-
if (viewerData.isDragging) {
|
276
|
-
// Prevent text selection during drag
|
277
|
-
event.preventDefault();
|
278
|
-
viewerData.translateX = viewerData.startTranslateX + deltaX;
|
279
|
-
viewerData.translateY = viewerData.startTranslateY + deltaY;
|
280
|
-
applyTransform();
|
223
|
+
// --- Helper function to apply transform ---
|
224
|
+
function applyTransform() {
|
225
|
+
zoomPanContainer.style.transform = `translate(${viewerData.translateX}px, ${viewerData.translateY}px) scale(${viewerData.scale})`;
|
281
226
|
}
|
282
|
-
}
|
283
|
-
|
284
|
-
function handleMouseUp(event) {
|
285
|
-
const wasDragging = viewerData.isDragging;
|
286
227
|
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
if (wasDragging) {
|
291
|
-
viewerData.isDragging = false;
|
292
|
-
// Restore pointer events now that drag is finished
|
293
|
-
elements.forEach(el => el.style.pointerEvents = 'auto');
|
228
|
+
// --- Zooming Logic ---
|
229
|
+
function handleZoom(event) {
|
230
|
+
event.preventDefault(); // Prevent default scroll
|
294
231
|
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
//
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
// Attach to window or document for smoother dragging even if mouse leaves outerContainer
|
318
|
-
// Using outerContainer for now, might need adjustment if dragging feels jerky near edges
|
319
|
-
outerContainer.addEventListener('mousemove', handleMouseMove);
|
320
|
-
|
321
|
-
// Mouseup ends the drag *or* allows a click to proceed
|
322
|
-
// Attach to window or document to ensure drag ends even if mouse released outside
|
323
|
-
// Using outerContainer for now
|
324
|
-
outerContainer.addEventListener('mouseup', handleMouseUp);
|
325
|
-
|
326
|
-
// Stop dragging if mouse leaves the outer container entirely (optional but good practice)
|
327
|
-
outerContainer.addEventListener('mouseleave', (event) => {
|
328
|
-
// Only act if the primary mouse button is NOT pressed anymore when leaving
|
329
|
-
if (viewerData.isDragging && event.buttons !== 1) {
|
330
|
-
handleMouseUp(event);
|
331
|
-
}
|
332
|
-
});
|
333
|
-
|
334
|
-
// --- Button Listeners ---
|
335
|
-
zoomInButton.addEventListener('click', () => {
|
336
|
-
const centerRect = outerContainer.getBoundingClientRect();
|
337
|
-
const centerX = centerRect.width / 2;
|
338
|
-
const centerY = centerRect.height / 2;
|
339
|
-
const zoomFactor = 1.2;
|
340
|
-
const newScale = Math.min(5, viewerData.scale * zoomFactor);
|
341
|
-
const pointX = (centerX - viewerData.translateX) / viewerData.scale;
|
342
|
-
const pointY = (centerY - viewerData.translateY) / viewerData.scale;
|
343
|
-
viewerData.scale = newScale;
|
344
|
-
viewerData.translateX = centerX - pointX * viewerData.scale;
|
345
|
-
viewerData.translateY = centerY - pointY * viewerData.scale;
|
346
|
-
applyTransform();
|
347
|
-
});
|
348
|
-
|
349
|
-
zoomOutButton.addEventListener('click', () => {
|
350
|
-
const centerRect = outerContainer.getBoundingClientRect();
|
351
|
-
const centerX = centerRect.width / 2;
|
352
|
-
const centerY = centerRect.height / 2;
|
353
|
-
const zoomFactor = 1 / 1.2;
|
354
|
-
const newScale = Math.max(0.5, viewerData.scale * zoomFactor);
|
355
|
-
const pointX = (centerX - viewerData.translateX) / viewerData.scale;
|
356
|
-
const pointY = (centerY - viewerData.translateY) / viewerData.scale;
|
357
|
-
viewerData.scale = newScale;
|
358
|
-
viewerData.translateX = centerX - pointX * viewerData.scale;
|
359
|
-
viewerData.translateY = centerY - pointY * viewerData.scale;
|
360
|
-
applyTransform();
|
361
|
-
});
|
362
|
-
|
363
|
-
resetButton.addEventListener('click', () => {
|
364
|
-
viewerData.scale = 1.0;
|
365
|
-
viewerData.translateX = 0;
|
366
|
-
viewerData.translateY = 0;
|
367
|
-
applyTransform();
|
368
|
-
// Also reset selection on zoom reset
|
369
|
-
if (viewerData.selectedElement !== null) {
|
370
|
-
resetElementStyle(viewerData.selectedElement);
|
371
|
-
viewerData.selectedElement = null;
|
372
|
-
// Optionally clear info panel
|
373
|
-
// const elementData = document.getElementById(widgetId + "-element-data");
|
374
|
-
// if (elementData) elementData.textContent = '';
|
375
|
-
}
|
376
|
-
});
|
377
|
-
|
378
|
-
// --- Helper function to reset element style ---
|
379
|
-
function resetElementStyle(elementIdx) {
|
380
|
-
const el = zoomPanContainer.querySelector(`.pdf-element[data-element-id='${elementIdx}']`);
|
381
|
-
const svgRect = document.querySelector(`#${widgetId} .svg-layer svg rect[data-element-id='${elementIdx}']`);
|
382
|
-
if (!el) return;
|
383
|
-
|
384
|
-
const viewer = window.pdfViewerRegistry[widgetId];
|
385
|
-
const eType = viewer.initialData.elements[elementIdx].type || 'unknown';
|
386
|
-
|
387
|
-
if (eType === 'text') {
|
388
|
-
el.style.backgroundColor = "rgba(255, 255, 0, 0.3)";
|
389
|
-
} else if (eType === 'image') {
|
390
|
-
el.style.backgroundColor = "rgba(0, 128, 255, 0.3)";
|
391
|
-
} else if (eType === 'figure') {
|
392
|
-
el.style.backgroundColor = "rgba(255, 0, 255, 0.3)";
|
393
|
-
} else if (eType === 'table') {
|
394
|
-
el.style.backgroundColor = "rgba(0, 255, 0, 0.3)";
|
395
|
-
} else {
|
396
|
-
el.style.backgroundColor = "rgba(200, 200, 200, 0.3)";
|
232
|
+
const zoomIntensity = 0.1;
|
233
|
+
const wheelDelta = event.deltaY < 0 ? 1 : -1; // +1 for zoom in, -1 for zoom out
|
234
|
+
const zoomFactor = Math.exp(wheelDelta * zoomIntensity);
|
235
|
+
const newScale = Math.max(0.5, Math.min(5, viewerData.scale * zoomFactor)); // Clamp scale
|
236
|
+
|
237
|
+
// Calculate mouse position relative to the outer container
|
238
|
+
const rect = outerContainer.getBoundingClientRect();
|
239
|
+
const mouseX = event.clientX - rect.left;
|
240
|
+
const mouseY = event.clientY - rect.top;
|
241
|
+
|
242
|
+
// Calculate the point in the content that the mouse is pointing to
|
243
|
+
const pointX = (mouseX - viewerData.translateX) / viewerData.scale;
|
244
|
+
const pointY = (mouseY - viewerData.translateY) / viewerData.scale;
|
245
|
+
|
246
|
+
// Update scale
|
247
|
+
viewerData.scale = newScale;
|
248
|
+
|
249
|
+
// Calculate new translation to keep the pointed-at location fixed
|
250
|
+
viewerData.translateX = mouseX - pointX * viewerData.scale;
|
251
|
+
viewerData.translateY = mouseY - pointY * viewerData.scale;
|
252
|
+
|
253
|
+
applyTransform();
|
397
254
|
}
|
398
|
-
|
255
|
+
|
256
|
+
outerContainer.addEventListener('wheel', handleZoom);
|
257
|
+
|
258
|
+
// --- Panning Logic ---
|
259
|
+
const dragThreshold = 5; // Pixels to move before drag starts
|
399
260
|
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
if (svgRect) {
|
416
|
-
svgRect.setAttribute("stroke", "rgba(64, 158, 255, 0.9)");
|
417
|
-
svgRect.setAttribute("stroke-width", "2.5");
|
418
|
-
}
|
419
|
-
}
|
420
|
-
|
421
|
-
// --- Background Click Listener (on outer container) ---
|
422
|
-
outerContainer.addEventListener('click', (event) => {
|
423
|
-
// Ignore click if it resulted from the end of a drag
|
424
|
-
if (viewerData.justDragged) {
|
425
|
-
return;
|
261
|
+
function handleMouseDown(event) {
|
262
|
+
// Prevent default only if needed (e.g., text selection on image)
|
263
|
+
if (event.target.tagName === 'IMG') {
|
264
|
+
event.preventDefault();
|
265
|
+
}
|
266
|
+
// Allow mousedown events on elements to proceed for potential clicks
|
267
|
+
// Record start position for potential drag
|
268
|
+
viewerData.startX = event.clientX;
|
269
|
+
viewerData.startY = event.clientY;
|
270
|
+
// Store initial translate values to calculate relative movement
|
271
|
+
viewerData.startTranslateX = viewerData.translateX;
|
272
|
+
viewerData.startTranslateY = viewerData.translateY;
|
273
|
+
// Don't set isDragging = true yet
|
274
|
+
// Don't change pointerEvents yet
|
426
275
|
}
|
276
|
+
|
277
|
+
function handleMouseMove(event) {
|
278
|
+
// Check if mouse button is actually down (browser inconsistencies)
|
279
|
+
if (event.buttons !== 1) {
|
280
|
+
if (viewerData.isDragging) {
|
281
|
+
// Force drag end if button is released unexpectedly
|
282
|
+
handleMouseUp(event);
|
283
|
+
}
|
284
|
+
return;
|
285
|
+
}
|
427
286
|
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
287
|
+
const currentX = event.clientX;
|
288
|
+
const currentY = event.clientY;
|
289
|
+
const deltaX = currentX - viewerData.startX;
|
290
|
+
const deltaY = currentY - viewerData.startY;
|
291
|
+
|
292
|
+
// If not already dragging, check if threshold is exceeded
|
293
|
+
if (!viewerData.isDragging) {
|
294
|
+
const movedDistance = Math.hypot(deltaX, deltaY);
|
295
|
+
if (movedDistance > dragThreshold) {
|
296
|
+
viewerData.isDragging = true;
|
297
|
+
zoomPanContainer.style.cursor = 'grabbing';
|
298
|
+
// Now disable pointer events on elements since a drag has started
|
299
|
+
elements.forEach(el => el.style.pointerEvents = 'none');
|
300
|
+
}
|
301
|
+
}
|
436
302
|
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
const elementData = document.getElementById(widgetId + "-element-data");
|
445
|
-
if (infoPanel && elementData) {
|
446
|
-
// infoPanel.style.display = "none"; // Or hide it
|
447
|
-
elementData.textContent = ""; // Clear content
|
303
|
+
// If dragging, update transform
|
304
|
+
if (viewerData.isDragging) {
|
305
|
+
// Prevent text selection during drag
|
306
|
+
event.preventDefault();
|
307
|
+
viewerData.translateX = viewerData.startTranslateX + deltaX;
|
308
|
+
viewerData.translateY = viewerData.startTranslateY + deltaY;
|
309
|
+
applyTransform();
|
448
310
|
}
|
449
311
|
}
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
elements.forEach(function(el) {
|
454
|
-
el.addEventListener("click", function(event) {
|
455
|
-
// Stop propagation to prevent the background click handler from immediately deselecting.
|
456
|
-
event.stopPropagation();
|
312
|
+
|
313
|
+
function handleMouseUp(event) {
|
314
|
+
const wasDragging = viewerData.isDragging;
|
457
315
|
|
458
|
-
|
459
|
-
|
316
|
+
// Always reset cursor on mouse up
|
317
|
+
zoomPanContainer.style.cursor = 'grab';
|
460
318
|
|
461
|
-
|
462
|
-
|
463
|
-
|
319
|
+
if (wasDragging) {
|
320
|
+
viewerData.isDragging = false;
|
321
|
+
// Restore pointer events now that drag is finished
|
322
|
+
elements.forEach(el => el.style.pointerEvents = 'auto');
|
323
|
+
|
324
|
+
// Set flag to indicate a drag just finished
|
325
|
+
viewerData.justDragged = true;
|
326
|
+
// Reset the flag after a minimal delay, allowing the click event to be ignored
|
327
|
+
setTimeout(() => { viewerData.justDragged = false; }, 0);
|
328
|
+
|
329
|
+
// IMPORTANT: Prevent this mouseup from triggering other default actions
|
330
|
+
event.preventDefault();
|
331
|
+
// Stop propagation might not be needed here if the click listener checks justDragged
|
332
|
+
// event.stopPropagation();
|
333
|
+
} else {
|
334
|
+
// If it wasn't a drag, do nothing here.
|
335
|
+
// The browser should naturally fire a 'click' event on the target element
|
336
|
+
// which will be handled by the element's specific click listener
|
337
|
+
// or the outerContainer's listener if it was on the background.
|
338
|
+
}
|
339
|
+
}
|
340
|
+
|
341
|
+
// Mousedown starts the *potential* for a drag
|
342
|
+
// Attach to outer container to catch drags starting anywhere inside
|
343
|
+
outerContainer.addEventListener('mousedown', handleMouseDown);
|
344
|
+
|
345
|
+
// Mousemove determines if it's *actually* a drag and updates position
|
346
|
+
// Attach to window or document for smoother dragging even if mouse leaves outerContainer
|
347
|
+
// Using outerContainer for now, might need adjustment if dragging feels jerky near edges
|
348
|
+
outerContainer.addEventListener('mousemove', handleMouseMove);
|
349
|
+
|
350
|
+
// Mouseup ends the drag *or* allows a click to proceed
|
351
|
+
// Attach to window or document to ensure drag ends even if mouse released outside
|
352
|
+
// Using outerContainer for now
|
353
|
+
outerContainer.addEventListener('mouseup', handleMouseUp);
|
354
|
+
|
355
|
+
// Stop dragging if mouse leaves the outer container entirely (optional but good practice)
|
356
|
+
outerContainer.addEventListener('mouseleave', (event) => {
|
357
|
+
// Only act if the primary mouse button is NOT pressed anymore when leaving
|
358
|
+
if (viewerData.isDragging && event.buttons !== 1) {
|
359
|
+
handleMouseUp(event);
|
360
|
+
}
|
361
|
+
});
|
362
|
+
|
363
|
+
// --- Button Listeners ---
|
364
|
+
zoomInButton.addEventListener('click', () => {
|
365
|
+
const centerRect = outerContainer.getBoundingClientRect();
|
366
|
+
const centerX = centerRect.width / 2;
|
367
|
+
const centerY = centerRect.height / 2;
|
368
|
+
const zoomFactor = 1.2;
|
369
|
+
const newScale = Math.min(5, viewerData.scale * zoomFactor);
|
370
|
+
const pointX = (centerX - viewerData.translateX) / viewerData.scale;
|
371
|
+
const pointY = (centerY - viewerData.translateY) / viewerData.scale;
|
372
|
+
viewerData.scale = newScale;
|
373
|
+
viewerData.translateX = centerX - pointX * viewerData.scale;
|
374
|
+
viewerData.translateY = centerY - pointY * viewerData.scale;
|
375
|
+
applyTransform();
|
376
|
+
});
|
377
|
+
|
378
|
+
zoomOutButton.addEventListener('click', () => {
|
379
|
+
const centerRect = outerContainer.getBoundingClientRect();
|
380
|
+
const centerX = centerRect.width / 2;
|
381
|
+
const centerY = centerRect.height / 2;
|
382
|
+
const zoomFactor = 1 / 1.2;
|
383
|
+
const newScale = Math.max(0.5, viewerData.scale * zoomFactor);
|
384
|
+
const pointX = (centerX - viewerData.translateX) / viewerData.scale;
|
385
|
+
const pointY = (centerY - viewerData.translateY) / viewerData.scale;
|
386
|
+
viewerData.scale = newScale;
|
387
|
+
viewerData.translateX = centerX - pointX * viewerData.scale;
|
388
|
+
viewerData.translateY = centerY - pointY * viewerData.scale;
|
389
|
+
applyTransform();
|
390
|
+
});
|
391
|
+
|
392
|
+
resetButton.addEventListener('click', () => {
|
393
|
+
viewerData.scale = 1.0;
|
394
|
+
viewerData.translateX = 0;
|
395
|
+
viewerData.translateY = 0;
|
396
|
+
applyTransform();
|
397
|
+
// Also reset selection on zoom reset
|
398
|
+
if (viewerData.selectedElement !== null) {
|
399
|
+
resetElementStyle(viewerData.selectedElement);
|
400
|
+
viewerData.selectedElement = null;
|
401
|
+
// Optionally clear info panel
|
402
|
+
// const elementData = document.getElementById(widgetId + "-element-data");
|
403
|
+
// if (elementData) elementData.textContent = '';
|
464
404
|
}
|
405
|
+
});
|
406
|
+
|
407
|
+
// --- Helper function to reset element style ---
|
408
|
+
function resetElementStyle(elementIdx) {
|
409
|
+
const el = zoomPanContainer.querySelector(`.pdf-element[data-element-id='${elementIdx}']`);
|
410
|
+
const svgRect = document.querySelector(`#${widgetId} .svg-layer svg rect[data-element-id='${elementIdx}']`);
|
411
|
+
if (!el) return;
|
465
412
|
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
}
|
476
|
-
|
477
|
-
|
478
|
-
// Store newly selected element
|
479
|
-
viewer.selectedElement = elementIdx;
|
480
|
-
|
481
|
-
// Highlight newly selected element
|
482
|
-
setElementHighlightStyle(elementIdx);
|
483
|
-
|
484
|
-
// Update info panel
|
485
|
-
const infoPanel = document.getElementById(widgetId + "-info-panel");
|
486
|
-
const elementData = document.getElementById(widgetId + "-element-data");
|
487
|
-
|
488
|
-
if (infoPanel && elementData) {
|
489
|
-
const element = viewer.initialData.elements[elementIdx];
|
490
|
-
if (!element) { /* console.error(`[${widgetId}] Element data not found for index ${elementIdx}!`); */ return; }
|
491
|
-
infoPanel.style.display = "block";
|
492
|
-
elementData.textContent = JSON.stringify(element, null, 2);
|
413
|
+
const viewer = window.pdfViewerRegistry[widgetId];
|
414
|
+
const eType = viewer.initialData.elements[elementIdx].type || 'unknown';
|
415
|
+
|
416
|
+
if (eType === 'text') {
|
417
|
+
el.style.backgroundColor = "rgba(255, 255, 0, 0.3)";
|
418
|
+
} else if (eType === 'image') {
|
419
|
+
el.style.backgroundColor = "rgba(0, 128, 255, 0.3)";
|
420
|
+
} else if (eType === 'figure') {
|
421
|
+
el.style.backgroundColor = "rgba(255, 0, 255, 0.3)";
|
422
|
+
} else if (eType === 'table') {
|
423
|
+
el.style.backgroundColor = "rgba(0, 255, 0, 0.3)";
|
493
424
|
} else {
|
494
|
-
|
425
|
+
el.style.backgroundColor = "rgba(200, 200, 200, 0.3)";
|
495
426
|
}
|
496
|
-
|
427
|
+
el.style.border = "1px dashed transparent";
|
428
|
+
|
429
|
+
if (svgRect) {
|
430
|
+
svgRect.setAttribute("stroke", "rgba(255, 165, 0, 0.85)");
|
431
|
+
svgRect.setAttribute("stroke-width", "1.5");
|
432
|
+
}
|
433
|
+
}
|
434
|
+
|
435
|
+
// --- Helper function to set element style (selected/hover) ---
|
436
|
+
function setElementHighlightStyle(elementIdx) {
|
437
|
+
const el = zoomPanContainer.querySelector(`.pdf-element[data-element-id='${elementIdx}']`);
|
438
|
+
const svgRect = document.querySelector(`#${widgetId} .svg-layer svg rect[data-element-id='${elementIdx}']`);
|
439
|
+
if (!el) return;
|
440
|
+
|
441
|
+
el.style.backgroundColor = "rgba(64, 158, 255, 0.15)";
|
442
|
+
el.style.border = "2px solid rgba(64, 158, 255, 0.6)";
|
443
|
+
|
444
|
+
if (svgRect) {
|
445
|
+
svgRect.setAttribute("stroke", "rgba(64, 158, 255, 0.9)");
|
446
|
+
svgRect.setAttribute("stroke-width", "2.5");
|
447
|
+
}
|
448
|
+
}
|
497
449
|
|
498
|
-
//
|
499
|
-
|
500
|
-
//
|
501
|
-
|
502
|
-
|
503
|
-
return; // Do nothing if an element is selected
|
450
|
+
// --- Background Click Listener (on outer container) ---
|
451
|
+
outerContainer.addEventListener('click', (event) => {
|
452
|
+
// Ignore click if it resulted from the end of a drag
|
453
|
+
if (viewerData.justDragged) {
|
454
|
+
return;
|
504
455
|
}
|
505
|
-
|
506
|
-
|
456
|
+
|
457
|
+
// If the click is on an element itself, let the element's click handler manage it.
|
458
|
+
if (event.target.closest('.pdf-element')) {
|
459
|
+
return;
|
460
|
+
}
|
461
|
+
// If dragging, don't deselect
|
462
|
+
if (viewerData.isDragging) {
|
507
463
|
return;
|
508
464
|
}
|
509
465
|
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
infoPanel.style.display = "block";
|
523
|
-
elementData.textContent = JSON.stringify(element, null, 2);
|
524
|
-
} else {
|
525
|
-
// Don't spam console on hover if it's not found initially
|
526
|
-
// console.error(`[${widgetId}] Info panel or element data container not found via getElementById on hover!`);
|
466
|
+
// If an element is selected, deselect it
|
467
|
+
if (viewerData.selectedElement !== null) {
|
468
|
+
resetElementStyle(viewerData.selectedElement);
|
469
|
+
viewerData.selectedElement = null;
|
470
|
+
|
471
|
+
// Optionally clear the info panel
|
472
|
+
const infoPanel = document.getElementById(widgetId + "-info-panel");
|
473
|
+
const elementData = document.getElementById(widgetId + "-element-data");
|
474
|
+
if (infoPanel && elementData) {
|
475
|
+
// infoPanel.style.display = "none"; // Or hide it
|
476
|
+
elementData.textContent = ""; // Clear content
|
477
|
+
}
|
527
478
|
}
|
528
479
|
});
|
529
|
-
|
530
|
-
el.addEventListener("mouseleave", function() {
|
531
|
-
// *** Only reset hover if NOTHING is selected ***
|
532
|
-
const viewer = window.pdfViewerRegistry[widgetId];
|
533
|
-
if (viewer.selectedElement !== null) {
|
534
|
-
return; // Do nothing if an element is selected
|
535
|
-
}
|
536
|
-
// Avoid hover effect while dragging
|
537
|
-
if (viewer.isDragging) {
|
538
|
-
return;
|
539
|
-
}
|
540
480
|
|
541
|
-
|
481
|
+
// Add click handlers to elements
|
482
|
+
elements.forEach(function(el) {
|
483
|
+
el.addEventListener("click", function(event) {
|
484
|
+
// Stop propagation to prevent the background click handler from immediately deselecting.
|
485
|
+
event.stopPropagation();
|
486
|
+
|
487
|
+
const elementIdx = parseInt(this.dataset.elementId);
|
488
|
+
const viewer = window.pdfViewerRegistry[widgetId];
|
489
|
+
|
490
|
+
// If there was a previously selected element, reset its style
|
491
|
+
if (viewer.selectedElement !== null && viewer.selectedElement !== elementIdx) {
|
492
|
+
resetElementStyle(viewer.selectedElement);
|
493
|
+
}
|
494
|
+
|
495
|
+
// If clicking the already selected element, deselect it (optional, uncomment if desired)
|
496
|
+
/*
|
497
|
+
if (viewer.selectedElement === elementIdx) {
|
498
|
+
resetElementStyle(elementIdx);
|
499
|
+
viewer.selectedElement = null;
|
500
|
+
// Clear info panel maybe?
|
501
|
+
const elementData = document.getElementById(widgetId + "-element-data");
|
502
|
+
if (elementData) elementData.textContent = '';
|
503
|
+
return; // Stop further processing
|
504
|
+
}
|
505
|
+
*/
|
506
|
+
|
507
|
+
// Store newly selected element
|
508
|
+
viewer.selectedElement = elementIdx;
|
509
|
+
|
510
|
+
// Highlight newly selected element
|
511
|
+
setElementHighlightStyle(elementIdx);
|
512
|
+
|
513
|
+
// Update info panel
|
514
|
+
const infoPanel = document.getElementById(widgetId + "-info-panel");
|
515
|
+
const elementData = document.getElementById(widgetId + "-element-data");
|
516
|
+
|
517
|
+
if (infoPanel && elementData) {
|
518
|
+
const element = viewer.initialData.elements[elementIdx];
|
519
|
+
if (!element) { /* console.error(`[${widgetId}] Element data not found for index ${elementIdx}!`); */ return; }
|
520
|
+
infoPanel.style.display = "block";
|
521
|
+
elementData.textContent = JSON.stringify(element, null, 2);
|
522
|
+
} else {
|
523
|
+
/* console.error(`[${widgetId}] Info panel or element data container not found via getElementById on click!`); */
|
524
|
+
}
|
525
|
+
});
|
542
526
|
|
543
|
-
//
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
527
|
+
// Add hover effects
|
528
|
+
el.addEventListener("mouseenter", function() {
|
529
|
+
// *** Only apply hover if NOTHING is selected ***
|
530
|
+
const viewer = window.pdfViewerRegistry[widgetId];
|
531
|
+
if (viewer.selectedElement !== null) {
|
532
|
+
return; // Do nothing if an element is selected
|
533
|
+
}
|
534
|
+
// Avoid hover effect while dragging
|
535
|
+
if (viewer.isDragging) {
|
536
|
+
return;
|
537
|
+
}
|
538
|
+
|
539
|
+
const elementIdx = parseInt(this.dataset.elementId);
|
540
|
+
|
541
|
+
// Apply hover styling
|
542
|
+
setElementHighlightStyle(elementIdx);
|
554
543
|
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
# Base element dict with required info
|
642
|
-
elem_dict = {
|
643
|
-
'id': i,
|
644
|
-
# Use the standardized .type property
|
645
|
-
'type': element.type,
|
646
|
-
# Scaled coordinates for positioning in HTML/SVG
|
647
|
-
'x0': original_x0 * scale,
|
648
|
-
'y0': original_y0 * scale,
|
649
|
-
'x1': original_x1 * scale,
|
650
|
-
'y1': original_y1 * scale,
|
651
|
-
'width': width * scale,
|
652
|
-
'height': height * scale,
|
653
|
-
}
|
654
|
-
|
655
|
-
# --- Get Default Attributes --- #
|
656
|
-
attributes_found = set()
|
657
|
-
for attr_name in default_attributes_to_get:
|
658
|
-
if hasattr(element, attr_name):
|
544
|
+
// Show element info on hover (only if nothing selected)
|
545
|
+
const infoPanel = document.getElementById(widgetId + "-info-panel");
|
546
|
+
const elementData = document.getElementById(widgetId + "-element-data");
|
547
|
+
|
548
|
+
if (infoPanel && elementData) {
|
549
|
+
const element = viewer.initialData.elements[elementIdx];
|
550
|
+
if (!element) { /* console.error(`[${widgetId}] Element data not found for index ${elementIdx}!`); */ return; }
|
551
|
+
infoPanel.style.display = "block";
|
552
|
+
elementData.textContent = JSON.stringify(element, null, 2);
|
553
|
+
} else {
|
554
|
+
// Don't spam console on hover if it's not found initially
|
555
|
+
// console.error(`[${widgetId}] Info panel or element data container not found via getElementById on hover!`);
|
556
|
+
}
|
557
|
+
});
|
558
|
+
|
559
|
+
el.addEventListener("mouseleave", function() {
|
560
|
+
// *** Only reset hover if NOTHING is selected ***
|
561
|
+
const viewer = window.pdfViewerRegistry[widgetId];
|
562
|
+
if (viewer.selectedElement !== null) {
|
563
|
+
return; // Do nothing if an element is selected
|
564
|
+
}
|
565
|
+
// Avoid hover effect while dragging
|
566
|
+
if (viewer.isDragging) {
|
567
|
+
return;
|
568
|
+
}
|
569
|
+
|
570
|
+
const elementIdx = parseInt(this.dataset.elementId);
|
571
|
+
|
572
|
+
// Reset styling
|
573
|
+
resetElementStyle(elementIdx);
|
574
|
+
|
575
|
+
// Optionally hide/clear the info panel on mouse leave when nothing is selected
|
576
|
+
// const infoPanel = document.getElementById(widgetId + "-info-panel");
|
577
|
+
// const elementData = document.getElementById(widgetId + "-element-data");
|
578
|
+
// if (infoPanel && elementData) {
|
579
|
+
// elementData.textContent = '';
|
580
|
+
// }
|
581
|
+
});
|
582
|
+
});
|
583
|
+
|
584
|
+
})();
|
585
|
+
""" % (
|
586
|
+
self.widget_id,
|
587
|
+
json.dumps(self.pdf_data),
|
588
|
+
)
|
589
|
+
|
590
|
+
# Add the JavaScript
|
591
|
+
display(Javascript(js_code))
|
592
|
+
|
593
|
+
def _repr_html_(self):
|
594
|
+
"""Return empty string as HTML has already been displayed"""
|
595
|
+
return ""
|
596
|
+
|
597
|
+
@classmethod
|
598
|
+
def from_page(cls, page, on_element_click=None, include_attributes=None):
|
599
|
+
"""
|
600
|
+
Create a viewer widget from a Page object.
|
601
|
+
|
602
|
+
Args:
|
603
|
+
page: A natural_pdf.core.page.Page object
|
604
|
+
on_element_click: Optional callback function for element clicks
|
605
|
+
include_attributes: Optional list of *additional* specific attributes to include.
|
606
|
+
A default set of common/useful attributes is always included.
|
607
|
+
|
608
|
+
Returns:
|
609
|
+
SimpleInteractiveViewerWidget instance or None if image rendering fails.
|
610
|
+
"""
|
611
|
+
# Get the page image
|
612
|
+
import base64
|
613
|
+
import json # Ensure json is imported
|
614
|
+
from io import BytesIO
|
615
|
+
|
616
|
+
from PIL import Image # Ensure Image is imported
|
617
|
+
|
618
|
+
# Render page to image using the correct method and parameter
|
619
|
+
scale = 1.0 # Define scale factor used for rendering
|
620
|
+
try:
|
621
|
+
img_object = page.to_image(resolution=int(72 * scale)) # Call to_image
|
622
|
+
# Check if .original attribute exists, otherwise assume img_object is the PIL Image
|
623
|
+
if hasattr(img_object, "original") and isinstance(img_object.original, Image.Image):
|
624
|
+
img = img_object.original
|
625
|
+
elif isinstance(img_object, Image.Image):
|
626
|
+
img = img_object
|
627
|
+
else:
|
628
|
+
# If it's neither, maybe it's the raw bytes? Try opening it.
|
659
629
|
try:
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
#
|
725
|
-
""
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
#
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
630
|
+
img = Image.open(BytesIO(img_object)).convert("RGB")
|
631
|
+
except Exception:
|
632
|
+
raise TypeError(
|
633
|
+
f"page.to_image() returned unexpected type: {type(img_object)}"
|
634
|
+
)
|
635
|
+
logger.debug(f"Successfully rendered page {page.index} using to_image()")
|
636
|
+
except Exception as render_err:
|
637
|
+
logger.error(
|
638
|
+
f"Error rendering page {page.index} image for widget: {render_err}",
|
639
|
+
exc_info=True,
|
640
|
+
)
|
641
|
+
# Return None or raise the error? Let's raise for now to make it clear.
|
642
|
+
raise ValueError(f"Failed to render page image: {render_err}") from render_err
|
643
|
+
|
644
|
+
buffered = BytesIO()
|
645
|
+
img.save(buffered, format="PNG")
|
646
|
+
img_str = base64.b64encode(buffered.getvalue()).decode()
|
647
|
+
image_uri = f"data:image/png;base64,{img_str}"
|
648
|
+
|
649
|
+
# Convert elements to dict format
|
650
|
+
elements = []
|
651
|
+
# Use page.elements directly if available, otherwise fallback to find_all
|
652
|
+
page_elements = getattr(page, "elements", page.find_all("*"))
|
653
|
+
|
654
|
+
# Filter out 'char' elements
|
655
|
+
filtered_page_elements = [
|
656
|
+
el for el in page_elements if getattr(el, "type", "").lower() != "char"
|
657
|
+
]
|
658
|
+
logger.debug(
|
659
|
+
f"Filtered out char elements, keeping {len(filtered_page_elements)} elements."
|
660
|
+
)
|
661
|
+
|
662
|
+
# Define a list of common/useful attributes (properties) to check for
|
663
|
+
default_attributes_to_get = [
|
664
|
+
"text",
|
665
|
+
"fontname",
|
666
|
+
"size",
|
667
|
+
"bold",
|
668
|
+
"italic",
|
669
|
+
"color",
|
670
|
+
"linewidth", # For lines (pdfplumber uses 'linewidth')
|
671
|
+
"is_horizontal",
|
672
|
+
"is_vertical", # For lines
|
673
|
+
"source",
|
674
|
+
"confidence", # For text/OCR
|
675
|
+
"label", # Common for layout elements
|
676
|
+
"model", # Add the model name (engine)
|
677
|
+
# Add any other common properties you expect from your elements
|
678
|
+
"upright",
|
679
|
+
"direction", # from pdfplumber chars/words
|
680
|
+
]
|
681
|
+
|
682
|
+
for i, element in enumerate(filtered_page_elements):
|
683
|
+
# Get original coordinates and calculated width/height (always present via base class)
|
684
|
+
original_x0 = element.x0
|
685
|
+
original_y0 = element.top
|
686
|
+
original_x1 = element.x1
|
687
|
+
original_y1 = element.bottom
|
688
|
+
width = element.width
|
689
|
+
height = element.height
|
690
|
+
|
691
|
+
# Base element dict with required info
|
692
|
+
elem_dict = {
|
693
|
+
"id": i,
|
694
|
+
# Use the standardized .type property
|
695
|
+
"type": element.type,
|
696
|
+
# Scaled coordinates for positioning in HTML/SVG
|
697
|
+
"x0": original_x0 * scale,
|
698
|
+
"y0": original_y0 * scale,
|
699
|
+
"x1": original_x1 * scale,
|
700
|
+
"y1": original_y1 * scale,
|
701
|
+
"width": width * scale,
|
702
|
+
"height": height * scale,
|
703
|
+
}
|
704
|
+
|
705
|
+
# --- Get Default Attributes --- #
|
706
|
+
attributes_found = set()
|
707
|
+
for attr_name in default_attributes_to_get:
|
708
|
+
if hasattr(element, attr_name):
|
709
|
+
try:
|
710
|
+
value = getattr(element, attr_name)
|
711
|
+
# Convert non-JSON serializable types to string
|
712
|
+
processed_value = value
|
713
|
+
if (
|
714
|
+
not isinstance(value, (str, int, float, bool, list, dict, tuple))
|
715
|
+
and value is not None
|
716
|
+
):
|
717
|
+
processed_value = str(value)
|
718
|
+
elem_dict[attr_name] = processed_value
|
719
|
+
attributes_found.add(attr_name)
|
720
|
+
except Exception as e:
|
721
|
+
logger.warning(
|
722
|
+
f"Could not get or process default attribute '{attr_name}' for element {i} ({element.type}): {e}"
|
723
|
+
)
|
724
|
+
|
725
|
+
# --- Get User-Requested Attributes (if any) --- #
|
726
|
+
if include_attributes:
|
727
|
+
for attr_name in include_attributes:
|
728
|
+
# Only process if not already added and exists
|
729
|
+
if attr_name not in attributes_found and hasattr(element, attr_name):
|
730
|
+
try:
|
731
|
+
value = getattr(element, attr_name)
|
732
|
+
processed_value = value
|
733
|
+
if (
|
734
|
+
not isinstance(
|
735
|
+
value, (str, int, float, bool, list, dict, tuple)
|
736
|
+
)
|
737
|
+
and value is not None
|
738
|
+
):
|
739
|
+
processed_value = str(value)
|
740
|
+
elem_dict[attr_name] = processed_value
|
741
|
+
except Exception as e:
|
742
|
+
logger.warning(
|
743
|
+
f"Could not get or process requested attribute '{attr_name}' for element {i} ({element.type}): {e}"
|
744
|
+
)
|
745
|
+
for attr_name in elem_dict:
|
746
|
+
if isinstance(elem_dict[attr_name], float):
|
747
|
+
elem_dict[attr_name] = round(elem_dict[attr_name], 2)
|
748
|
+
elements.append(elem_dict)
|
749
|
+
|
750
|
+
logger.debug(
|
751
|
+
f"Prepared {len(elements)} elements for widget with scaled coordinates and curated attributes."
|
752
|
+
)
|
753
|
+
|
754
|
+
# Create and return widget
|
755
|
+
# The actual JSON conversion happens when the data is sent to the frontend
|
756
|
+
return cls(image_uri=image_uri, elements=elements)
|
757
|
+
|
758
|
+
# Keep the original widget class for reference, but make it not register
|
759
|
+
# by commenting out the decorator
|
760
|
+
# @widgets.register
|
761
|
+
class InteractiveViewerWidget(widgets.DOMWidget):
|
762
|
+
"""Jupyter widget for interactively viewing PDF page elements."""
|
763
|
+
|
764
|
+
_view_name = Unicode("InteractiveViewerView").tag(sync=True)
|
765
|
+
_view_module = Unicode("viewer_widget").tag(sync=True)
|
766
|
+
_view_module_version = Unicode("^0.1.0").tag(sync=True)
|
767
|
+
|
768
|
+
image_uri = Unicode("").tag(sync=True)
|
769
|
+
page_dimensions = Dict({}).tag(sync=True)
|
770
|
+
elements = List([]).tag(sync=True)
|
771
|
+
|
772
|
+
def __init__(self, **kwargs):
|
773
|
+
super().__init__(**kwargs)
|
774
|
+
logger.debug("InteractiveViewerWidget initialized (Python).")
|
775
|
+
|
776
|
+
# Example observer (optional)
|
777
|
+
@observe("elements")
|
778
|
+
def _elements_changed(self, change):
|
779
|
+
# Only log if logger level allows
|
780
|
+
if logger.isEnabledFor(logging.DEBUG):
|
781
|
+
logger.debug(f"Python: Elements traitlet changed. New count: {len(change['new'])}")
|
782
|
+
# Can add Python-side logic here if needed when elements change
|
783
|
+
# print(f"Python: Elements traitlet changed. New count: {len(change['new'])}")
|
784
|
+
pass
|
785
|
+
|
786
|
+
except ImportError:
|
787
|
+
logger.info(
|
788
|
+
"Optional dependency 'ipywidgets' not found. Interactive viewer widgets will not be defined."
|
789
|
+
)
|
790
|
+
# Ensure class variables are None if import fails
|
791
|
+
SimpleInteractiveViewerWidget = None
|
792
|
+
InteractiveViewerWidget = None
|
793
|
+
_IPYWIDGETS_AVAILABLE = False # Explicitly set flag to False here too
|
794
|
+
|
795
|
+
# Example usage - kept outside the try/except as comments
|
796
|
+
# ... (existing example usage comments) ...
|