natural-pdf 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. docs/api/index.md +386 -0
  2. docs/assets/favicon.png +3 -0
  3. docs/assets/favicon.svg +3 -0
  4. docs/assets/javascripts/custom.js +17 -0
  5. docs/assets/logo.svg +3 -0
  6. docs/assets/sample-screen.png +0 -0
  7. docs/assets/social-preview.png +17 -0
  8. docs/assets/social-preview.svg +17 -0
  9. docs/assets/stylesheets/custom.css +65 -0
  10. docs/document-qa/index.ipynb +435 -0
  11. docs/document-qa/index.md +79 -0
  12. docs/element-selection/index.ipynb +915 -0
  13. docs/element-selection/index.md +229 -0
  14. docs/index.md +170 -0
  15. docs/installation/index.md +69 -0
  16. docs/interactive-widget/index.ipynb +962 -0
  17. docs/interactive-widget/index.md +12 -0
  18. docs/layout-analysis/index.ipynb +818 -0
  19. docs/layout-analysis/index.md +185 -0
  20. docs/ocr/index.md +209 -0
  21. docs/pdf-navigation/index.ipynb +314 -0
  22. docs/pdf-navigation/index.md +97 -0
  23. docs/regions/index.ipynb +816 -0
  24. docs/regions/index.md +294 -0
  25. docs/tables/index.ipynb +658 -0
  26. docs/tables/index.md +144 -0
  27. docs/text-analysis/index.ipynb +370 -0
  28. docs/text-analysis/index.md +105 -0
  29. docs/text-extraction/index.ipynb +1478 -0
  30. docs/text-extraction/index.md +292 -0
  31. docs/tutorials/01-loading-and-extraction.ipynb +1710 -0
  32. docs/tutorials/01-loading-and-extraction.md +95 -0
  33. docs/tutorials/02-finding-elements.ipynb +340 -0
  34. docs/tutorials/02-finding-elements.md +149 -0
  35. docs/tutorials/03-extracting-blocks.ipynb +147 -0
  36. docs/tutorials/03-extracting-blocks.md +48 -0
  37. docs/tutorials/04-table-extraction.ipynb +114 -0
  38. docs/tutorials/04-table-extraction.md +50 -0
  39. docs/tutorials/05-excluding-content.ipynb +270 -0
  40. docs/tutorials/05-excluding-content.md +109 -0
  41. docs/tutorials/06-document-qa.ipynb +332 -0
  42. docs/tutorials/06-document-qa.md +91 -0
  43. docs/tutorials/07-layout-analysis.ipynb +288 -0
  44. docs/tutorials/07-layout-analysis.md +66 -0
  45. docs/tutorials/07-working-with-regions.ipynb +413 -0
  46. docs/tutorials/07-working-with-regions.md +151 -0
  47. docs/tutorials/08-spatial-navigation.ipynb +508 -0
  48. docs/tutorials/08-spatial-navigation.md +190 -0
  49. docs/tutorials/09-section-extraction.ipynb +2434 -0
  50. docs/tutorials/09-section-extraction.md +256 -0
  51. docs/tutorials/10-form-field-extraction.ipynb +512 -0
  52. docs/tutorials/10-form-field-extraction.md +201 -0
  53. docs/tutorials/11-enhanced-table-processing.ipynb +54 -0
  54. docs/tutorials/11-enhanced-table-processing.md +9 -0
  55. docs/tutorials/12-ocr-integration.ipynb +604 -0
  56. docs/tutorials/12-ocr-integration.md +175 -0
  57. docs/tutorials/13-semantic-search.ipynb +1328 -0
  58. docs/tutorials/13-semantic-search.md +77 -0
  59. docs/visual-debugging/index.ipynb +2970 -0
  60. docs/visual-debugging/index.md +157 -0
  61. docs/visual-debugging/region.png +0 -0
  62. natural_pdf/__init__.py +50 -33
  63. natural_pdf/analyzers/__init__.py +2 -1
  64. natural_pdf/analyzers/layout/base.py +32 -24
  65. natural_pdf/analyzers/layout/docling.py +131 -72
  66. natural_pdf/analyzers/layout/gemini.py +264 -0
  67. natural_pdf/analyzers/layout/layout_analyzer.py +156 -113
  68. natural_pdf/analyzers/layout/layout_manager.py +125 -58
  69. natural_pdf/analyzers/layout/layout_options.py +43 -17
  70. natural_pdf/analyzers/layout/paddle.py +152 -95
  71. natural_pdf/analyzers/layout/surya.py +164 -92
  72. natural_pdf/analyzers/layout/tatr.py +149 -84
  73. natural_pdf/analyzers/layout/yolo.py +89 -45
  74. natural_pdf/analyzers/text_options.py +22 -15
  75. natural_pdf/analyzers/text_structure.py +131 -85
  76. natural_pdf/analyzers/utils.py +30 -23
  77. natural_pdf/collections/pdf_collection.py +146 -97
  78. natural_pdf/core/__init__.py +1 -1
  79. natural_pdf/core/element_manager.py +419 -337
  80. natural_pdf/core/highlighting_service.py +268 -196
  81. natural_pdf/core/page.py +1044 -521
  82. natural_pdf/core/pdf.py +516 -313
  83. natural_pdf/elements/__init__.py +1 -1
  84. natural_pdf/elements/base.py +307 -225
  85. natural_pdf/elements/collections.py +805 -543
  86. natural_pdf/elements/line.py +39 -36
  87. natural_pdf/elements/rect.py +32 -30
  88. natural_pdf/elements/region.py +889 -879
  89. natural_pdf/elements/text.py +127 -99
  90. natural_pdf/exporters/__init__.py +0 -1
  91. natural_pdf/exporters/searchable_pdf.py +261 -102
  92. natural_pdf/ocr/__init__.py +57 -35
  93. natural_pdf/ocr/engine.py +150 -46
  94. natural_pdf/ocr/engine_easyocr.py +146 -150
  95. natural_pdf/ocr/engine_paddle.py +118 -175
  96. natural_pdf/ocr/engine_surya.py +78 -141
  97. natural_pdf/ocr/ocr_factory.py +114 -0
  98. natural_pdf/ocr/ocr_manager.py +122 -124
  99. natural_pdf/ocr/ocr_options.py +16 -20
  100. natural_pdf/ocr/utils.py +98 -0
  101. natural_pdf/qa/__init__.py +1 -1
  102. natural_pdf/qa/document_qa.py +119 -111
  103. natural_pdf/search/__init__.py +37 -31
  104. natural_pdf/search/haystack_search_service.py +312 -189
  105. natural_pdf/search/haystack_utils.py +186 -122
  106. natural_pdf/search/search_options.py +25 -14
  107. natural_pdf/search/search_service_protocol.py +12 -6
  108. natural_pdf/search/searchable_mixin.py +261 -176
  109. natural_pdf/selectors/__init__.py +2 -1
  110. natural_pdf/selectors/parser.py +159 -316
  111. natural_pdf/templates/__init__.py +1 -1
  112. natural_pdf/templates/spa/css/style.css +334 -0
  113. natural_pdf/templates/spa/index.html +31 -0
  114. natural_pdf/templates/spa/js/app.js +472 -0
  115. natural_pdf/templates/spa/words.txt +235976 -0
  116. natural_pdf/utils/debug.py +32 -0
  117. natural_pdf/utils/highlighting.py +8 -2
  118. natural_pdf/utils/identifiers.py +29 -0
  119. natural_pdf/utils/packaging.py +418 -0
  120. natural_pdf/utils/reading_order.py +65 -63
  121. natural_pdf/utils/text_extraction.py +195 -0
  122. natural_pdf/utils/visualization.py +70 -61
  123. natural_pdf/widgets/__init__.py +2 -3
  124. natural_pdf/widgets/viewer.py +749 -718
  125. {natural_pdf-0.1.4.dist-info → natural_pdf-0.1.6.dist-info}/METADATA +53 -17
  126. natural_pdf-0.1.6.dist-info/RECORD +141 -0
  127. {natural_pdf-0.1.4.dist-info → natural_pdf-0.1.6.dist-info}/WHEEL +1 -1
  128. natural_pdf-0.1.6.dist-info/top_level.txt +4 -0
  129. notebooks/Examples.ipynb +1293 -0
  130. pdfs/.gitkeep +0 -0
  131. pdfs/01-practice.pdf +543 -0
  132. pdfs/0500000US42001.pdf +0 -0
  133. pdfs/0500000US42007.pdf +0 -0
  134. pdfs/2014 Statistics.pdf +0 -0
  135. pdfs/2019 Statistics.pdf +0 -0
  136. pdfs/Atlanta_Public_Schools_GA_sample.pdf +0 -0
  137. pdfs/needs-ocr.pdf +0 -0
  138. natural_pdf/templates/ocr_debug.html +0 -517
  139. natural_pdf-0.1.4.dist-info/RECORD +0 -61
  140. natural_pdf-0.1.4.dist-info/top_level.txt +0 -1
  141. {natural_pdf-0.1.4.dist-info → natural_pdf-0.1.6.dist-info}/licenses/LICENSE +0 -0
@@ -1,765 +1,796 @@
1
1
  # natural_pdf/widgets/viewer.py
2
2
 
3
- import ipywidgets as widgets
4
- from traitlets import Unicode, List, Dict, observe
3
+ import logging
5
4
  import os
6
- import logging # Add logging
7
- import json
8
- from IPython.display import display, HTML, Javascript
9
- import uuid
10
- from PIL import Image
11
5
 
12
6
  logger = logging.getLogger(__name__)
13
7
 
14
- # --- Read JS code from file --- #
15
- _MODULE_DIR = os.path.dirname(__file__)
16
- _FRONTEND_JS_PATH = os.path.join(_MODULE_DIR, 'frontend', 'viewer.js')
8
+ # Initialize flag and module/class variables to None
9
+ _IPYWIDGETS_AVAILABLE = False
10
+ widgets = None
11
+ SimpleInteractiveViewerWidget = None
12
+ InteractiveViewerWidget = None
17
13
 
18
14
  try:
19
- with open(_FRONTEND_JS_PATH, 'r', encoding='utf-8') as f:
20
- _FRONTEND_JS_CODE = f.read()
21
- logger.debug(f"Successfully read frontend JS from: {_FRONTEND_JS_PATH}")
22
- except FileNotFoundError:
23
- logger.error(f"Frontend JS file not found at {_FRONTEND_JS_PATH}. Widget will likely fail.")
24
- _FRONTEND_JS_CODE = "console.error('Frontend JS file not found! Widget cannot load.');"
25
- except Exception as e:
26
- logger.error(f"Error reading frontend JS file {_FRONTEND_JS_PATH}: {e}")
27
- _FRONTEND_JS_CODE = f"console.error('Error reading frontend JS file: {e}');"
28
-
29
- class SimpleInteractiveViewerWidget(widgets.DOMWidget):
30
- def __init__(self, pdf_data=None, **kwargs):
31
- """
32
- Create a simple interactive PDF viewer widget.
33
-
34
- Args:
35
- pdf_data (dict, optional): Dictionary containing 'page_image', 'elements', etc.
36
- **kwargs: Additional parameters including image_uri, elements, etc.
37
- """
38
- super().__init__()
39
-
40
- # Support both pdf_data dict and individual kwargs
41
- if pdf_data:
42
- self.pdf_data = pdf_data
43
- # Ensure backward compatibility - if image_uri exists but page_image doesn't
44
- if 'image_uri' in pdf_data and 'page_image' not in pdf_data:
45
- self.pdf_data['page_image'] = pdf_data['image_uri']
46
- else:
47
- # Check for image_uri in kwargs
48
- image_source = kwargs.get('image_uri', '')
49
-
50
- self.pdf_data = {
51
- 'page_image': image_source,
52
- 'elements': kwargs.get('elements', [])
53
- }
54
-
55
- # Log for debugging
56
- logger.debug(f"SimpleInteractiveViewerWidget initialized with widget_id={id(self)}")
57
- logger.debug(f"Image source provided: {self.pdf_data.get('page_image', 'None')[:30]}...")
58
- logger.debug(f"Number of elements: {len(self.pdf_data.get('elements', []))}")
59
-
60
- self.widget_id = f"pdf-viewer-{str(uuid.uuid4())[:8]}"
61
- self._generate_html()
62
-
63
- def _generate_html(self):
64
- """Generate the HTML for the PDF viewer"""
65
- # Extract data - Coordinates in self.pdf_data['elements'] are already scaled
66
- page_image = self.pdf_data.get('page_image', '')
67
- elements = self.pdf_data.get('elements', [])
68
-
69
- logger.debug(f"Generating HTML with image: {page_image[:30]}... and {len(elements)} elements (using scaled coords)")
70
-
71
- # Create the container div
72
- container_html = f"""
73
- <div id="{self.widget_id}" class="pdf-viewer" style="position: relative; font-family: Arial, sans-serif;">
74
- <div class="toolbar" style="margin-bottom: 10px; padding: 5px; background-color: #f0f0f0; border-radius: 4px;">
75
- <button id="{self.widget_id}-zoom-in" style="margin-right: 5px;">Zoom In (+)</button>
76
- <button id="{self.widget_id}-zoom-out" style="margin-right: 5px;">Zoom Out (-)</button>
77
- <button id="{self.widget_id}-reset-zoom" style="margin-right: 5px;">Reset</button>
78
- </div>
79
- <div style="display: flex; flex-direction: row;">
80
- <div class="pdf-outer-container" style="position: relative; overflow: hidden; border: 1px solid #ccc; flex-grow: 1;">
81
- <div id="{self.widget_id}-zoom-pan-container" class="zoom-pan-container" style="position: relative; width: fit-content; height: fit-content; transform-origin: top left; cursor: grab;">
82
- <!-- The image is rendered at scale, so its dimensions match scaled coordinates -->
83
- <img src="{page_image}" style="display: block; max-width: none; height: auto;" />
84
- <div id="{self.widget_id}-elements-layer" class="elements-layer" style="position: absolute; top: 0; left: 0; width: 100%; height: 100%; pointer-events: none;">
85
- """
86
-
87
- # Add SVG overlay layer
88
- container_html += f"""
89
- </div>
90
- <div id="{self.widget_id}-svg-layer" class="svg-layer" style="position: absolute; top: 0; left: 0; width: 100%; height: 100%; pointer-events: none;">
91
- <!-- SVG viewport should match the scaled image size -->
92
- <svg width="100%" height="100%">
93
- """
94
-
95
- # Add elements and SVG boxes using the SCALED coordinates
96
- for i, element in enumerate(elements):
97
- element_type = element.get('type', 'unknown')
98
- # Use the already scaled coordinates
99
- x0 = element.get('x0', 0)
100
- y0 = element.get('y0', 0)
101
- x1 = element.get('x1', 0)
102
- y1 = element.get('y1', 0)
103
-
104
- # Calculate width and height from scaled coords
105
- width = x1 - x0
106
- height = y1 - y0
107
-
108
- # Create the element div with the right styling based on type
109
- # Use scaled coordinates for positioning and dimensions
110
- element_style = "position: absolute; pointer-events: auto; cursor: pointer; "
111
- element_style += f"left: {x0}px; top: {y0}px; width: {width}px; height: {height}px; "
112
-
113
- # Different styling for different element types
114
- if element_type == 'text':
115
- element_style += "background-color: rgba(255, 255, 0, 0.3); border: 1px dashed transparent; "
116
- elif element_type == 'image':
117
- element_style += "background-color: rgba(0, 128, 255, 0.3); border: 1px dashed transparent; "
118
- elif element_type == 'figure':
119
- element_style += "background-color: rgba(255, 0, 255, 0.3); border: 1px dashed transparent; "
120
- elif element_type == 'table':
121
- element_style += "background-color: rgba(0, 255, 0, 0.3); border: 1px dashed transparent; "
15
+ # Attempt to import the core optional dependency
16
+ import ipywidgets as widgets_imported
17
+
18
+ widgets = widgets_imported # Assign to the global name if import succeeds
19
+ _IPYWIDGETS_AVAILABLE = True
20
+ logger.debug("Successfully imported ipywidgets. Defining viewer widgets.")
21
+
22
+ # --- Dependencies needed ONLY if ipywidgets is available ---
23
+ import base64
24
+ import json
25
+ import uuid
26
+ from io import BytesIO
27
+
28
+ from IPython.display import HTML, Javascript, display
29
+ from PIL import Image
30
+ from traitlets import Dict, List, Unicode, observe
31
+
32
+ # --- Read JS code from file (only needed if widgets are defined) --- #
33
+ _MODULE_DIR = os.path.dirname(__file__)
34
+ _FRONTEND_JS_PATH = os.path.join(_MODULE_DIR, "frontend", "viewer.js")
35
+ try:
36
+ with open(_FRONTEND_JS_PATH, "r", encoding="utf-8") as f:
37
+ _FRONTEND_JS_CODE = f.read()
38
+ logger.debug(f"Successfully read frontend JS from: {_FRONTEND_JS_PATH}")
39
+ except FileNotFoundError:
40
+ logger.error(f"Frontend JS file not found at {_FRONTEND_JS_PATH}. Widget will likely fail.")
41
+ _FRONTEND_JS_CODE = "console.error('Frontend JS file not found! Widget cannot load.');"
42
+ except Exception as e:
43
+ logger.error(f"Error reading frontend JS file {_FRONTEND_JS_PATH}: {e}")
44
+ _FRONTEND_JS_CODE = f"console.error('Error reading frontend JS file: {e}');"
45
+
46
+ # --- Define Widget Classes ONLY if ipywidgets is available ---
47
+ class SimpleInteractiveViewerWidget(widgets.DOMWidget):
48
+ def __init__(self, pdf_data=None, **kwargs):
49
+ """
50
+ Create a simple interactive PDF viewer widget.
51
+
52
+ Args:
53
+ pdf_data (dict, optional): Dictionary containing 'page_image', 'elements', etc.
54
+ **kwargs: Additional parameters including image_uri, elements, etc.
55
+ """
56
+ super().__init__()
57
+
58
+ # Support both pdf_data dict and individual kwargs
59
+ if pdf_data:
60
+ self.pdf_data = pdf_data
61
+ # Ensure backward compatibility - if image_uri exists but page_image doesn't
62
+ if "image_uri" in pdf_data and "page_image" not in pdf_data:
63
+ self.pdf_data["page_image"] = pdf_data["image_uri"]
122
64
  else:
123
- element_style += "background-color: rgba(200, 200, 200, 0.3); border: 1px dashed transparent; "
124
-
125
- # Add element div
126
- container_html += f"""
127
- <div class="pdf-element" data-element-id="{i}" style="{element_style}"></div>
65
+ # Check for image_uri in kwargs
66
+ image_source = kwargs.get("image_uri", "")
67
+
68
+ self.pdf_data = {"page_image": image_source, "elements": kwargs.get("elements", [])}
69
+
70
+ # Log for debugging
71
+ logger.debug(f"SimpleInteractiveViewerWidget initialized with widget_id={id(self)}")
72
+ logger.debug(
73
+ f"Image source provided: {self.pdf_data.get('page_image', 'None')[:30]}..."
74
+ )
75
+ logger.debug(f"Number of elements: {len(self.pdf_data.get('elements', []))}")
76
+
77
+ self.widget_id = f"pdf-viewer-{str(uuid.uuid4())[:8]}"
78
+ self._generate_html()
79
+
80
+ def _generate_html(self):
81
+ """Generate the HTML for the PDF viewer"""
82
+ # Extract data - Coordinates in self.pdf_data['elements'] are already scaled
83
+ page_image = self.pdf_data.get("page_image", "")
84
+ elements = self.pdf_data.get("elements", [])
85
+
86
+ logger.debug(
87
+ f"Generating HTML with image: {page_image[:30]}... and {len(elements)} elements (using scaled coords)"
88
+ )
89
+
90
+ # Create the container div
91
+ container_html = f"""
92
+ <div id="{self.widget_id}" class="pdf-viewer" style="position: relative; font-family: Arial, sans-serif;">
93
+ <div class="toolbar" style="margin-bottom: 10px; padding: 5px; background-color: #f0f0f0; border-radius: 4px;">
94
+ <button id="{self.widget_id}-zoom-in" style="margin-right: 5px;">Zoom In (+)</button>
95
+ <button id="{self.widget_id}-zoom-out" style="margin-right: 5px;">Zoom Out (-)</button>
96
+ <button id="{self.widget_id}-reset-zoom" style="margin-right: 5px;">Reset</button>
97
+ </div>
98
+ <div style="display: flex; flex-direction: row;">
99
+ <div class="pdf-outer-container" style="position: relative; overflow: hidden; border: 1px solid #ccc; flex-grow: 1;">
100
+ <div id="{self.widget_id}-zoom-pan-container" class="zoom-pan-container" style="position: relative; width: fit-content; height: fit-content; transform-origin: top left; cursor: grab;">
101
+ <!-- The image is rendered at scale, so its dimensions match scaled coordinates -->
102
+ <img src="{page_image}" style="display: block; max-width: none; height: auto;" />
103
+ <div id="{self.widget_id}-elements-layer" class="elements-layer" style="position: absolute; top: 0; left: 0; width: 100%; height: 100%; pointer-events: none;">
128
104
  """
129
-
130
- # Add SVG rectangle using scaled coordinates and dimensions
105
+
106
+ # Add SVG overlay layer
131
107
  container_html += f"""
132
- <rect data-element-id="{i}" x="{x0}" y="{y0}" width="{width}" height="{height}"
133
- fill="none" stroke="rgba(255, 165, 0, 0.85)" stroke-width="1.5" />
108
+ </div>
109
+ <div id="{self.widget_id}-svg-layer" class="svg-layer" style="position: absolute; top: 0; left: 0; width: 100%; height: 100%; pointer-events: none;">
110
+ <!-- SVG viewport should match the scaled image size -->
111
+ <svg width="100%" height="100%">
134
112
  """
135
-
136
- # Close SVG and container divs
137
- container_html += f"""
138
- </svg>
113
+
114
+ # Add elements and SVG boxes using the SCALED coordinates
115
+ for i, element in enumerate(elements):
116
+ element_type = element.get("type", "unknown")
117
+ # Use the already scaled coordinates
118
+ x0 = element.get("x0", 0)
119
+ y0 = element.get("y0", 0)
120
+ x1 = element.get("x1", 0)
121
+ y1 = element.get("y1", 0)
122
+
123
+ # Calculate width and height from scaled coords
124
+ width = x1 - x0
125
+ height = y1 - y0
126
+
127
+ # Create the element div with the right styling based on type
128
+ # Use scaled coordinates for positioning and dimensions
129
+ element_style = "position: absolute; pointer-events: auto; cursor: pointer; "
130
+ element_style += (
131
+ f"left: {x0}px; top: {y0}px; width: {width}px; height: {height}px; "
132
+ )
133
+
134
+ # Different styling for different element types
135
+ if element_type == "text":
136
+ element_style += (
137
+ "background-color: rgba(255, 255, 0, 0.3); border: 1px dashed transparent; "
138
+ )
139
+ elif element_type == "image":
140
+ element_style += (
141
+ "background-color: rgba(0, 128, 255, 0.3); border: 1px dashed transparent; "
142
+ )
143
+ elif element_type == "figure":
144
+ element_style += (
145
+ "background-color: rgba(255, 0, 255, 0.3); border: 1px dashed transparent; "
146
+ )
147
+ elif element_type == "table":
148
+ element_style += (
149
+ "background-color: rgba(0, 255, 0, 0.3); border: 1px dashed transparent; "
150
+ )
151
+ else:
152
+ element_style += "background-color: rgba(200, 200, 200, 0.3); border: 1px dashed transparent; "
153
+
154
+ # Add element div
155
+ container_html += f"""
156
+ <div class="pdf-element" data-element-id="{i}" style="{element_style}"></div>
157
+ """
158
+
159
+ # Add SVG rectangle using scaled coordinates and dimensions
160
+ container_html += f"""
161
+ <rect data-element-id="{i}" x="{x0}" y="{y0}" width="{width}" height="{height}"
162
+ fill="none" stroke="rgba(255, 165, 0, 0.85)" stroke-width="1.5" />
163
+ """
164
+
165
+ # Close SVG and container divs
166
+ container_html += f"""
167
+ </svg>
168
+ </div>
139
169
  </div>
140
170
  </div>
141
- </div>
142
-
143
- <div id="{self.widget_id}-info-panel" class="info-panel" style="display: block; margin-left: 20px; padding: 10px; width: 300px; max-height: 80vh; overflow-y: auto; border: 1px solid #eee; background-color: #f9f9f9;">
144
- <h4 style="margin-top: 0; margin-bottom: 5px; border-bottom: 1px solid #ccc; padding-bottom: 5px;">Element Info</h4>
145
- <pre id="{self.widget_id}-element-data" style="white-space: pre-wrap; word-break: break-all; font-size: 0.9em;"></pre>
146
- </div>
147
-
148
- </div>
149
- """
150
-
151
- # Display the HTML
152
- display(HTML(container_html))
153
-
154
- # Generate JavaScript to add interactivity
155
- self._add_javascript()
156
-
157
- def _add_javascript(self):
158
- """Add JavaScript to make the viewer interactive"""
159
- # Create JavaScript for element selection and SVG highlighting
160
- js_code = """
161
- (function() {
162
- // Store widget ID in a variable to avoid issues with string templates
163
- const widgetId = "%s";
164
-
165
- // Initialize PDF viewer registry if it doesn't exist
166
- if (!window.pdfViewerRegistry) {
167
- window.pdfViewerRegistry = {};
168
- }
169
-
170
- // Store PDF data for this widget
171
- window.pdfViewerRegistry[widgetId] = {
172
- initialData: %s,
173
- selectedElement: null,
174
- scale: 1.0, // Initial zoom scale
175
- translateX: 0, // Initial pan X
176
- translateY: 0, // Initial pan Y
177
- isDragging: false, // Flag for panning
178
- startX: 0, // Drag start X
179
- startY: 0, // Drag start Y
180
- startTranslateX: 0, // Translate X at drag start
181
- startTranslateY: 0, // Translate Y at drag start
182
- justDragged: false // Flag to differentiate click from drag completion
183
- };
184
-
185
- // Get references to elements
186
- const viewerData = window.pdfViewerRegistry[widgetId];
187
- const outerContainer = document.querySelector(`#${widgetId} .pdf-outer-container`);
188
- const zoomPanContainer = document.getElementById(`${widgetId}-zoom-pan-container`);
189
- const elements = zoomPanContainer.querySelectorAll(".pdf-element");
190
- const zoomInButton = document.getElementById(`${widgetId}-zoom-in`);
191
- const zoomOutButton = document.getElementById(`${widgetId}-zoom-out`);
192
- const resetButton = document.getElementById(`${widgetId}-reset-zoom`);
193
-
194
- // --- Helper function to apply transform ---
195
- function applyTransform() {
196
- zoomPanContainer.style.transform = `translate(${viewerData.translateX}px, ${viewerData.translateY}px) scale(${viewerData.scale})`;
197
- }
198
-
199
- // --- Zooming Logic ---
200
- function handleZoom(event) {
201
- event.preventDefault(); // Prevent default scroll
202
171
 
203
- const zoomIntensity = 0.1;
204
- const wheelDelta = event.deltaY < 0 ? 1 : -1; // +1 for zoom in, -1 for zoom out
205
- const zoomFactor = Math.exp(wheelDelta * zoomIntensity);
206
- const newScale = Math.max(0.5, Math.min(5, viewerData.scale * zoomFactor)); // Clamp scale
172
+ <div id="{self.widget_id}-info-panel" class="info-panel" style="display: block; margin-left: 20px; padding: 10px; width: 300px; max-height: 80vh; overflow-y: auto; border: 1px solid #eee; background-color: #f9f9f9;">
173
+ <h4 style="margin-top: 0; margin-bottom: 5px; border-bottom: 1px solid #ccc; padding-bottom: 5px;">Element Info</h4>
174
+ <pre id="{self.widget_id}-element-data" style="white-space: pre-wrap; word-break: break-all; font-size: 0.9em;"></pre>
175
+ </div>
207
176
 
208
- // Calculate mouse position relative to the outer container
209
- const rect = outerContainer.getBoundingClientRect();
210
- const mouseX = event.clientX - rect.left;
211
- const mouseY = event.clientY - rect.top;
177
+ </div>
178
+ """
179
+
180
+ # Display the HTML
181
+ display(HTML(container_html))
182
+
183
+ # Generate JavaScript to add interactivity
184
+ self._add_javascript()
185
+
186
+ def _add_javascript(self):
187
+ """Add JavaScript to make the viewer interactive"""
188
+ # Create JavaScript for element selection and SVG highlighting
189
+ js_code = """
190
+ (function() {
191
+ // Store widget ID in a variable to avoid issues with string templates
192
+ const widgetId = "%s";
212
193
 
213
- // Calculate the point in the content that the mouse is pointing to
214
- const pointX = (mouseX - viewerData.translateX) / viewerData.scale;
215
- const pointY = (mouseY - viewerData.translateY) / viewerData.scale;
194
+ // Initialize PDF viewer registry if it doesn't exist
195
+ if (!window.pdfViewerRegistry) {
196
+ window.pdfViewerRegistry = {};
197
+ }
216
198
 
217
- // Update scale
218
- viewerData.scale = newScale;
199
+ // Store PDF data for this widget
200
+ window.pdfViewerRegistry[widgetId] = {
201
+ initialData: %s,
202
+ selectedElement: null,
203
+ scale: 1.0, // Initial zoom scale
204
+ translateX: 0, // Initial pan X
205
+ translateY: 0, // Initial pan Y
206
+ isDragging: false, // Flag for panning
207
+ startX: 0, // Drag start X
208
+ startY: 0, // Drag start Y
209
+ startTranslateX: 0, // Translate X at drag start
210
+ startTranslateY: 0, // Translate Y at drag start
211
+ justDragged: false // Flag to differentiate click from drag completion
212
+ };
219
213
 
220
- // Calculate new translation to keep the pointed-at location fixed
221
- viewerData.translateX = mouseX - pointX * viewerData.scale;
222
- viewerData.translateY = mouseY - pointY * viewerData.scale;
214
+ // Get references to elements
215
+ const viewerData = window.pdfViewerRegistry[widgetId];
216
+ const outerContainer = document.querySelector(`#${widgetId} .pdf-outer-container`);
217
+ const zoomPanContainer = document.getElementById(`${widgetId}-zoom-pan-container`);
218
+ const elements = zoomPanContainer.querySelectorAll(".pdf-element");
219
+ const zoomInButton = document.getElementById(`${widgetId}-zoom-in`);
220
+ const zoomOutButton = document.getElementById(`${widgetId}-zoom-out`);
221
+ const resetButton = document.getElementById(`${widgetId}-reset-zoom`);
223
222
 
224
- applyTransform();
225
- }
226
-
227
- outerContainer.addEventListener('wheel', handleZoom);
228
-
229
- // --- Panning Logic ---
230
- const dragThreshold = 5; // Pixels to move before drag starts
231
-
232
- function handleMouseDown(event) {
233
- // Prevent default only if needed (e.g., text selection on image)
234
- if (event.target.tagName === 'IMG') {
235
- event.preventDefault();
236
- }
237
- // Allow mousedown events on elements to proceed for potential clicks
238
- // Record start position for potential drag
239
- viewerData.startX = event.clientX;
240
- viewerData.startY = event.clientY;
241
- // Store initial translate values to calculate relative movement
242
- viewerData.startTranslateX = viewerData.translateX;
243
- viewerData.startTranslateY = viewerData.translateY;
244
- // Don't set isDragging = true yet
245
- // Don't change pointerEvents yet
246
- }
247
-
248
- function handleMouseMove(event) {
249
- // Check if mouse button is actually down (browser inconsistencies)
250
- if (event.buttons !== 1) {
251
- if (viewerData.isDragging) {
252
- // Force drag end if button is released unexpectedly
253
- handleMouseUp(event);
254
- }
255
- return;
256
- }
257
-
258
- const currentX = event.clientX;
259
- const currentY = event.clientY;
260
- const deltaX = currentX - viewerData.startX;
261
- const deltaY = currentY - viewerData.startY;
262
-
263
- // If not already dragging, check if threshold is exceeded
264
- if (!viewerData.isDragging) {
265
- const movedDistance = Math.hypot(deltaX, deltaY);
266
- if (movedDistance > dragThreshold) {
267
- viewerData.isDragging = true;
268
- zoomPanContainer.style.cursor = 'grabbing';
269
- // Now disable pointer events on elements since a drag has started
270
- elements.forEach(el => el.style.pointerEvents = 'none');
271
- }
272
- }
273
-
274
- // If dragging, update transform
275
- if (viewerData.isDragging) {
276
- // Prevent text selection during drag
277
- event.preventDefault();
278
- viewerData.translateX = viewerData.startTranslateX + deltaX;
279
- viewerData.translateY = viewerData.startTranslateY + deltaY;
280
- applyTransform();
223
+ // --- Helper function to apply transform ---
224
+ function applyTransform() {
225
+ zoomPanContainer.style.transform = `translate(${viewerData.translateX}px, ${viewerData.translateY}px) scale(${viewerData.scale})`;
281
226
  }
282
- }
283
-
284
- function handleMouseUp(event) {
285
- const wasDragging = viewerData.isDragging;
286
227
 
287
- // Always reset cursor on mouse up
288
- zoomPanContainer.style.cursor = 'grab';
289
-
290
- if (wasDragging) {
291
- viewerData.isDragging = false;
292
- // Restore pointer events now that drag is finished
293
- elements.forEach(el => el.style.pointerEvents = 'auto');
228
+ // --- Zooming Logic ---
229
+ function handleZoom(event) {
230
+ event.preventDefault(); // Prevent default scroll
294
231
 
295
- // Set flag to indicate a drag just finished
296
- viewerData.justDragged = true;
297
- // Reset the flag after a minimal delay, allowing the click event to be ignored
298
- setTimeout(() => { viewerData.justDragged = false; }, 0);
299
-
300
- // IMPORTANT: Prevent this mouseup from triggering other default actions
301
- event.preventDefault();
302
- // Stop propagation might not be needed here if the click listener checks justDragged
303
- // event.stopPropagation();
304
- } else {
305
- // If it wasn't a drag, do nothing here.
306
- // The browser should naturally fire a 'click' event on the target element
307
- // which will be handled by the element's specific click listener
308
- // or the outerContainer's listener if it was on the background.
309
- }
310
- }
311
-
312
- // Mousedown starts the *potential* for a drag
313
- // Attach to outer container to catch drags starting anywhere inside
314
- outerContainer.addEventListener('mousedown', handleMouseDown);
315
-
316
- // Mousemove determines if it's *actually* a drag and updates position
317
- // Attach to window or document for smoother dragging even if mouse leaves outerContainer
318
- // Using outerContainer for now, might need adjustment if dragging feels jerky near edges
319
- outerContainer.addEventListener('mousemove', handleMouseMove);
320
-
321
- // Mouseup ends the drag *or* allows a click to proceed
322
- // Attach to window or document to ensure drag ends even if mouse released outside
323
- // Using outerContainer for now
324
- outerContainer.addEventListener('mouseup', handleMouseUp);
325
-
326
- // Stop dragging if mouse leaves the outer container entirely (optional but good practice)
327
- outerContainer.addEventListener('mouseleave', (event) => {
328
- // Only act if the primary mouse button is NOT pressed anymore when leaving
329
- if (viewerData.isDragging && event.buttons !== 1) {
330
- handleMouseUp(event);
331
- }
332
- });
333
-
334
- // --- Button Listeners ---
335
- zoomInButton.addEventListener('click', () => {
336
- const centerRect = outerContainer.getBoundingClientRect();
337
- const centerX = centerRect.width / 2;
338
- const centerY = centerRect.height / 2;
339
- const zoomFactor = 1.2;
340
- const newScale = Math.min(5, viewerData.scale * zoomFactor);
341
- const pointX = (centerX - viewerData.translateX) / viewerData.scale;
342
- const pointY = (centerY - viewerData.translateY) / viewerData.scale;
343
- viewerData.scale = newScale;
344
- viewerData.translateX = centerX - pointX * viewerData.scale;
345
- viewerData.translateY = centerY - pointY * viewerData.scale;
346
- applyTransform();
347
- });
348
-
349
- zoomOutButton.addEventListener('click', () => {
350
- const centerRect = outerContainer.getBoundingClientRect();
351
- const centerX = centerRect.width / 2;
352
- const centerY = centerRect.height / 2;
353
- const zoomFactor = 1 / 1.2;
354
- const newScale = Math.max(0.5, viewerData.scale * zoomFactor);
355
- const pointX = (centerX - viewerData.translateX) / viewerData.scale;
356
- const pointY = (centerY - viewerData.translateY) / viewerData.scale;
357
- viewerData.scale = newScale;
358
- viewerData.translateX = centerX - pointX * viewerData.scale;
359
- viewerData.translateY = centerY - pointY * viewerData.scale;
360
- applyTransform();
361
- });
362
-
363
- resetButton.addEventListener('click', () => {
364
- viewerData.scale = 1.0;
365
- viewerData.translateX = 0;
366
- viewerData.translateY = 0;
367
- applyTransform();
368
- // Also reset selection on zoom reset
369
- if (viewerData.selectedElement !== null) {
370
- resetElementStyle(viewerData.selectedElement);
371
- viewerData.selectedElement = null;
372
- // Optionally clear info panel
373
- // const elementData = document.getElementById(widgetId + "-element-data");
374
- // if (elementData) elementData.textContent = '';
375
- }
376
- });
377
-
378
- // --- Helper function to reset element style ---
379
- function resetElementStyle(elementIdx) {
380
- const el = zoomPanContainer.querySelector(`.pdf-element[data-element-id='${elementIdx}']`);
381
- const svgRect = document.querySelector(`#${widgetId} .svg-layer svg rect[data-element-id='${elementIdx}']`);
382
- if (!el) return;
383
-
384
- const viewer = window.pdfViewerRegistry[widgetId];
385
- const eType = viewer.initialData.elements[elementIdx].type || 'unknown';
386
-
387
- if (eType === 'text') {
388
- el.style.backgroundColor = "rgba(255, 255, 0, 0.3)";
389
- } else if (eType === 'image') {
390
- el.style.backgroundColor = "rgba(0, 128, 255, 0.3)";
391
- } else if (eType === 'figure') {
392
- el.style.backgroundColor = "rgba(255, 0, 255, 0.3)";
393
- } else if (eType === 'table') {
394
- el.style.backgroundColor = "rgba(0, 255, 0, 0.3)";
395
- } else {
396
- el.style.backgroundColor = "rgba(200, 200, 200, 0.3)";
232
+ const zoomIntensity = 0.1;
233
+ const wheelDelta = event.deltaY < 0 ? 1 : -1; // +1 for zoom in, -1 for zoom out
234
+ const zoomFactor = Math.exp(wheelDelta * zoomIntensity);
235
+ const newScale = Math.max(0.5, Math.min(5, viewerData.scale * zoomFactor)); // Clamp scale
236
+
237
+ // Calculate mouse position relative to the outer container
238
+ const rect = outerContainer.getBoundingClientRect();
239
+ const mouseX = event.clientX - rect.left;
240
+ const mouseY = event.clientY - rect.top;
241
+
242
+ // Calculate the point in the content that the mouse is pointing to
243
+ const pointX = (mouseX - viewerData.translateX) / viewerData.scale;
244
+ const pointY = (mouseY - viewerData.translateY) / viewerData.scale;
245
+
246
+ // Update scale
247
+ viewerData.scale = newScale;
248
+
249
+ // Calculate new translation to keep the pointed-at location fixed
250
+ viewerData.translateX = mouseX - pointX * viewerData.scale;
251
+ viewerData.translateY = mouseY - pointY * viewerData.scale;
252
+
253
+ applyTransform();
397
254
  }
398
- el.style.border = "1px dashed transparent";
255
+
256
+ outerContainer.addEventListener('wheel', handleZoom);
257
+
258
+ // --- Panning Logic ---
259
+ const dragThreshold = 5; // Pixels to move before drag starts
399
260
 
400
- if (svgRect) {
401
- svgRect.setAttribute("stroke", "rgba(255, 165, 0, 0.85)");
402
- svgRect.setAttribute("stroke-width", "1.5");
403
- }
404
- }
405
-
406
- // --- Helper function to set element style (selected/hover) ---
407
- function setElementHighlightStyle(elementIdx) {
408
- const el = zoomPanContainer.querySelector(`.pdf-element[data-element-id='${elementIdx}']`);
409
- const svgRect = document.querySelector(`#${widgetId} .svg-layer svg rect[data-element-id='${elementIdx}']`);
410
- if (!el) return;
411
-
412
- el.style.backgroundColor = "rgba(64, 158, 255, 0.15)";
413
- el.style.border = "2px solid rgba(64, 158, 255, 0.6)";
414
-
415
- if (svgRect) {
416
- svgRect.setAttribute("stroke", "rgba(64, 158, 255, 0.9)");
417
- svgRect.setAttribute("stroke-width", "2.5");
418
- }
419
- }
420
-
421
- // --- Background Click Listener (on outer container) ---
422
- outerContainer.addEventListener('click', (event) => {
423
- // Ignore click if it resulted from the end of a drag
424
- if (viewerData.justDragged) {
425
- return;
261
+ function handleMouseDown(event) {
262
+ // Prevent default only if needed (e.g., text selection on image)
263
+ if (event.target.tagName === 'IMG') {
264
+ event.preventDefault();
265
+ }
266
+ // Allow mousedown events on elements to proceed for potential clicks
267
+ // Record start position for potential drag
268
+ viewerData.startX = event.clientX;
269
+ viewerData.startY = event.clientY;
270
+ // Store initial translate values to calculate relative movement
271
+ viewerData.startTranslateX = viewerData.translateX;
272
+ viewerData.startTranslateY = viewerData.translateY;
273
+ // Don't set isDragging = true yet
274
+ // Don't change pointerEvents yet
426
275
  }
276
+
277
+ function handleMouseMove(event) {
278
+ // Check if mouse button is actually down (browser inconsistencies)
279
+ if (event.buttons !== 1) {
280
+ if (viewerData.isDragging) {
281
+ // Force drag end if button is released unexpectedly
282
+ handleMouseUp(event);
283
+ }
284
+ return;
285
+ }
427
286
 
428
- // If the click is on an element itself, let the element's click handler manage it.
429
- if (event.target.closest('.pdf-element')) {
430
- return;
431
- }
432
- // If dragging, don't deselect
433
- if (viewerData.isDragging) {
434
- return;
435
- }
287
+ const currentX = event.clientX;
288
+ const currentY = event.clientY;
289
+ const deltaX = currentX - viewerData.startX;
290
+ const deltaY = currentY - viewerData.startY;
291
+
292
+ // If not already dragging, check if threshold is exceeded
293
+ if (!viewerData.isDragging) {
294
+ const movedDistance = Math.hypot(deltaX, deltaY);
295
+ if (movedDistance > dragThreshold) {
296
+ viewerData.isDragging = true;
297
+ zoomPanContainer.style.cursor = 'grabbing';
298
+ // Now disable pointer events on elements since a drag has started
299
+ elements.forEach(el => el.style.pointerEvents = 'none');
300
+ }
301
+ }
436
302
 
437
- // If an element is selected, deselect it
438
- if (viewerData.selectedElement !== null) {
439
- resetElementStyle(viewerData.selectedElement);
440
- viewerData.selectedElement = null;
441
-
442
- // Optionally clear the info panel
443
- const infoPanel = document.getElementById(widgetId + "-info-panel");
444
- const elementData = document.getElementById(widgetId + "-element-data");
445
- if (infoPanel && elementData) {
446
- // infoPanel.style.display = "none"; // Or hide it
447
- elementData.textContent = ""; // Clear content
303
+ // If dragging, update transform
304
+ if (viewerData.isDragging) {
305
+ // Prevent text selection during drag
306
+ event.preventDefault();
307
+ viewerData.translateX = viewerData.startTranslateX + deltaX;
308
+ viewerData.translateY = viewerData.startTranslateY + deltaY;
309
+ applyTransform();
448
310
  }
449
311
  }
450
- });
451
-
452
- // Add click handlers to elements
453
- elements.forEach(function(el) {
454
- el.addEventListener("click", function(event) {
455
- // Stop propagation to prevent the background click handler from immediately deselecting.
456
- event.stopPropagation();
312
+
313
+ function handleMouseUp(event) {
314
+ const wasDragging = viewerData.isDragging;
457
315
 
458
- const elementIdx = parseInt(this.dataset.elementId);
459
- const viewer = window.pdfViewerRegistry[widgetId];
316
+ // Always reset cursor on mouse up
317
+ zoomPanContainer.style.cursor = 'grab';
460
318
 
461
- // If there was a previously selected element, reset its style
462
- if (viewer.selectedElement !== null && viewer.selectedElement !== elementIdx) {
463
- resetElementStyle(viewer.selectedElement);
319
+ if (wasDragging) {
320
+ viewerData.isDragging = false;
321
+ // Restore pointer events now that drag is finished
322
+ elements.forEach(el => el.style.pointerEvents = 'auto');
323
+
324
+ // Set flag to indicate a drag just finished
325
+ viewerData.justDragged = true;
326
+ // Reset the flag after a minimal delay, allowing the click event to be ignored
327
+ setTimeout(() => { viewerData.justDragged = false; }, 0);
328
+
329
+ // IMPORTANT: Prevent this mouseup from triggering other default actions
330
+ event.preventDefault();
331
+ // Stop propagation might not be needed here if the click listener checks justDragged
332
+ // event.stopPropagation();
333
+ } else {
334
+ // If it wasn't a drag, do nothing here.
335
+ // The browser should naturally fire a 'click' event on the target element
336
+ // which will be handled by the element's specific click listener
337
+ // or the outerContainer's listener if it was on the background.
338
+ }
339
+ }
340
+
341
+ // Mousedown starts the *potential* for a drag
342
+ // Attach to outer container to catch drags starting anywhere inside
343
+ outerContainer.addEventListener('mousedown', handleMouseDown);
344
+
345
+ // Mousemove determines if it's *actually* a drag and updates position
346
+ // Attach to window or document for smoother dragging even if mouse leaves outerContainer
347
+ // Using outerContainer for now, might need adjustment if dragging feels jerky near edges
348
+ outerContainer.addEventListener('mousemove', handleMouseMove);
349
+
350
+ // Mouseup ends the drag *or* allows a click to proceed
351
+ // Attach to window or document to ensure drag ends even if mouse released outside
352
+ // Using outerContainer for now
353
+ outerContainer.addEventListener('mouseup', handleMouseUp);
354
+
355
+ // Stop dragging if mouse leaves the outer container entirely (optional but good practice)
356
+ outerContainer.addEventListener('mouseleave', (event) => {
357
+ // Only act if the primary mouse button is NOT pressed anymore when leaving
358
+ if (viewerData.isDragging && event.buttons !== 1) {
359
+ handleMouseUp(event);
360
+ }
361
+ });
362
+
363
+ // --- Button Listeners ---
364
+ zoomInButton.addEventListener('click', () => {
365
+ const centerRect = outerContainer.getBoundingClientRect();
366
+ const centerX = centerRect.width / 2;
367
+ const centerY = centerRect.height / 2;
368
+ const zoomFactor = 1.2;
369
+ const newScale = Math.min(5, viewerData.scale * zoomFactor);
370
+ const pointX = (centerX - viewerData.translateX) / viewerData.scale;
371
+ const pointY = (centerY - viewerData.translateY) / viewerData.scale;
372
+ viewerData.scale = newScale;
373
+ viewerData.translateX = centerX - pointX * viewerData.scale;
374
+ viewerData.translateY = centerY - pointY * viewerData.scale;
375
+ applyTransform();
376
+ });
377
+
378
+ zoomOutButton.addEventListener('click', () => {
379
+ const centerRect = outerContainer.getBoundingClientRect();
380
+ const centerX = centerRect.width / 2;
381
+ const centerY = centerRect.height / 2;
382
+ const zoomFactor = 1 / 1.2;
383
+ const newScale = Math.max(0.5, viewerData.scale * zoomFactor);
384
+ const pointX = (centerX - viewerData.translateX) / viewerData.scale;
385
+ const pointY = (centerY - viewerData.translateY) / viewerData.scale;
386
+ viewerData.scale = newScale;
387
+ viewerData.translateX = centerX - pointX * viewerData.scale;
388
+ viewerData.translateY = centerY - pointY * viewerData.scale;
389
+ applyTransform();
390
+ });
391
+
392
+ resetButton.addEventListener('click', () => {
393
+ viewerData.scale = 1.0;
394
+ viewerData.translateX = 0;
395
+ viewerData.translateY = 0;
396
+ applyTransform();
397
+ // Also reset selection on zoom reset
398
+ if (viewerData.selectedElement !== null) {
399
+ resetElementStyle(viewerData.selectedElement);
400
+ viewerData.selectedElement = null;
401
+ // Optionally clear info panel
402
+ // const elementData = document.getElementById(widgetId + "-element-data");
403
+ // if (elementData) elementData.textContent = '';
464
404
  }
405
+ });
406
+
407
+ // --- Helper function to reset element style ---
408
+ function resetElementStyle(elementIdx) {
409
+ const el = zoomPanContainer.querySelector(`.pdf-element[data-element-id='${elementIdx}']`);
410
+ const svgRect = document.querySelector(`#${widgetId} .svg-layer svg rect[data-element-id='${elementIdx}']`);
411
+ if (!el) return;
465
412
 
466
- // If clicking the already selected element, deselect it (optional, uncomment if desired)
467
- /*
468
- if (viewer.selectedElement === elementIdx) {
469
- resetElementStyle(elementIdx);
470
- viewer.selectedElement = null;
471
- // Clear info panel maybe?
472
- const elementData = document.getElementById(widgetId + "-element-data");
473
- if (elementData) elementData.textContent = '';
474
- return; // Stop further processing
475
- }
476
- */
477
-
478
- // Store newly selected element
479
- viewer.selectedElement = elementIdx;
480
-
481
- // Highlight newly selected element
482
- setElementHighlightStyle(elementIdx);
483
-
484
- // Update info panel
485
- const infoPanel = document.getElementById(widgetId + "-info-panel");
486
- const elementData = document.getElementById(widgetId + "-element-data");
487
-
488
- if (infoPanel && elementData) {
489
- const element = viewer.initialData.elements[elementIdx];
490
- if (!element) { /* console.error(`[${widgetId}] Element data not found for index ${elementIdx}!`); */ return; }
491
- infoPanel.style.display = "block";
492
- elementData.textContent = JSON.stringify(element, null, 2);
413
+ const viewer = window.pdfViewerRegistry[widgetId];
414
+ const eType = viewer.initialData.elements[elementIdx].type || 'unknown';
415
+
416
+ if (eType === 'text') {
417
+ el.style.backgroundColor = "rgba(255, 255, 0, 0.3)";
418
+ } else if (eType === 'image') {
419
+ el.style.backgroundColor = "rgba(0, 128, 255, 0.3)";
420
+ } else if (eType === 'figure') {
421
+ el.style.backgroundColor = "rgba(255, 0, 255, 0.3)";
422
+ } else if (eType === 'table') {
423
+ el.style.backgroundColor = "rgba(0, 255, 0, 0.3)";
493
424
  } else {
494
- /* console.error(`[${widgetId}] Info panel or element data container not found via getElementById on click!`); */
425
+ el.style.backgroundColor = "rgba(200, 200, 200, 0.3)";
495
426
  }
496
- });
427
+ el.style.border = "1px dashed transparent";
428
+
429
+ if (svgRect) {
430
+ svgRect.setAttribute("stroke", "rgba(255, 165, 0, 0.85)");
431
+ svgRect.setAttribute("stroke-width", "1.5");
432
+ }
433
+ }
434
+
435
+ // --- Helper function to set element style (selected/hover) ---
436
+ function setElementHighlightStyle(elementIdx) {
437
+ const el = zoomPanContainer.querySelector(`.pdf-element[data-element-id='${elementIdx}']`);
438
+ const svgRect = document.querySelector(`#${widgetId} .svg-layer svg rect[data-element-id='${elementIdx}']`);
439
+ if (!el) return;
440
+
441
+ el.style.backgroundColor = "rgba(64, 158, 255, 0.15)";
442
+ el.style.border = "2px solid rgba(64, 158, 255, 0.6)";
443
+
444
+ if (svgRect) {
445
+ svgRect.setAttribute("stroke", "rgba(64, 158, 255, 0.9)");
446
+ svgRect.setAttribute("stroke-width", "2.5");
447
+ }
448
+ }
497
449
 
498
- // Add hover effects
499
- el.addEventListener("mouseenter", function() {
500
- // *** Only apply hover if NOTHING is selected ***
501
- const viewer = window.pdfViewerRegistry[widgetId];
502
- if (viewer.selectedElement !== null) {
503
- return; // Do nothing if an element is selected
450
+ // --- Background Click Listener (on outer container) ---
451
+ outerContainer.addEventListener('click', (event) => {
452
+ // Ignore click if it resulted from the end of a drag
453
+ if (viewerData.justDragged) {
454
+ return;
504
455
  }
505
- // Avoid hover effect while dragging
506
- if (viewer.isDragging) {
456
+
457
+ // If the click is on an element itself, let the element's click handler manage it.
458
+ if (event.target.closest('.pdf-element')) {
459
+ return;
460
+ }
461
+ // If dragging, don't deselect
462
+ if (viewerData.isDragging) {
507
463
  return;
508
464
  }
509
465
 
510
- const elementIdx = parseInt(this.dataset.elementId);
511
-
512
- // Apply hover styling
513
- setElementHighlightStyle(elementIdx);
514
-
515
- // Show element info on hover (only if nothing selected)
516
- const infoPanel = document.getElementById(widgetId + "-info-panel");
517
- const elementData = document.getElementById(widgetId + "-element-data");
518
-
519
- if (infoPanel && elementData) {
520
- const element = viewer.initialData.elements[elementIdx];
521
- if (!element) { /* console.error(`[${widgetId}] Element data not found for index ${elementIdx}!`); */ return; }
522
- infoPanel.style.display = "block";
523
- elementData.textContent = JSON.stringify(element, null, 2);
524
- } else {
525
- // Don't spam console on hover if it's not found initially
526
- // console.error(`[${widgetId}] Info panel or element data container not found via getElementById on hover!`);
466
+ // If an element is selected, deselect it
467
+ if (viewerData.selectedElement !== null) {
468
+ resetElementStyle(viewerData.selectedElement);
469
+ viewerData.selectedElement = null;
470
+
471
+ // Optionally clear the info panel
472
+ const infoPanel = document.getElementById(widgetId + "-info-panel");
473
+ const elementData = document.getElementById(widgetId + "-element-data");
474
+ if (infoPanel && elementData) {
475
+ // infoPanel.style.display = "none"; // Or hide it
476
+ elementData.textContent = ""; // Clear content
477
+ }
527
478
  }
528
479
  });
529
-
530
- el.addEventListener("mouseleave", function() {
531
- // *** Only reset hover if NOTHING is selected ***
532
- const viewer = window.pdfViewerRegistry[widgetId];
533
- if (viewer.selectedElement !== null) {
534
- return; // Do nothing if an element is selected
535
- }
536
- // Avoid hover effect while dragging
537
- if (viewer.isDragging) {
538
- return;
539
- }
540
480
 
541
- const elementIdx = parseInt(this.dataset.elementId);
481
+ // Add click handlers to elements
482
+ elements.forEach(function(el) {
483
+ el.addEventListener("click", function(event) {
484
+ // Stop propagation to prevent the background click handler from immediately deselecting.
485
+ event.stopPropagation();
486
+
487
+ const elementIdx = parseInt(this.dataset.elementId);
488
+ const viewer = window.pdfViewerRegistry[widgetId];
489
+
490
+ // If there was a previously selected element, reset its style
491
+ if (viewer.selectedElement !== null && viewer.selectedElement !== elementIdx) {
492
+ resetElementStyle(viewer.selectedElement);
493
+ }
494
+
495
+ // If clicking the already selected element, deselect it (optional, uncomment if desired)
496
+ /*
497
+ if (viewer.selectedElement === elementIdx) {
498
+ resetElementStyle(elementIdx);
499
+ viewer.selectedElement = null;
500
+ // Clear info panel maybe?
501
+ const elementData = document.getElementById(widgetId + "-element-data");
502
+ if (elementData) elementData.textContent = '';
503
+ return; // Stop further processing
504
+ }
505
+ */
506
+
507
+ // Store newly selected element
508
+ viewer.selectedElement = elementIdx;
509
+
510
+ // Highlight newly selected element
511
+ setElementHighlightStyle(elementIdx);
512
+
513
+ // Update info panel
514
+ const infoPanel = document.getElementById(widgetId + "-info-panel");
515
+ const elementData = document.getElementById(widgetId + "-element-data");
516
+
517
+ if (infoPanel && elementData) {
518
+ const element = viewer.initialData.elements[elementIdx];
519
+ if (!element) { /* console.error(`[${widgetId}] Element data not found for index ${elementIdx}!`); */ return; }
520
+ infoPanel.style.display = "block";
521
+ elementData.textContent = JSON.stringify(element, null, 2);
522
+ } else {
523
+ /* console.error(`[${widgetId}] Info panel or element data container not found via getElementById on click!`); */
524
+ }
525
+ });
542
526
 
543
- // Reset styling
544
- resetElementStyle(elementIdx);
545
-
546
- // Optionally hide/clear the info panel on mouse leave when nothing is selected
547
- // const infoPanel = document.getElementById(widgetId + "-info-panel");
548
- // const elementData = document.getElementById(widgetId + "-element-data");
549
- // if (infoPanel && elementData) {
550
- // elementData.textContent = '';
551
- // }
552
- });
553
- });
527
+ // Add hover effects
528
+ el.addEventListener("mouseenter", function() {
529
+ // *** Only apply hover if NOTHING is selected ***
530
+ const viewer = window.pdfViewerRegistry[widgetId];
531
+ if (viewer.selectedElement !== null) {
532
+ return; // Do nothing if an element is selected
533
+ }
534
+ // Avoid hover effect while dragging
535
+ if (viewer.isDragging) {
536
+ return;
537
+ }
538
+
539
+ const elementIdx = parseInt(this.dataset.elementId);
540
+
541
+ // Apply hover styling
542
+ setElementHighlightStyle(elementIdx);
554
543
 
555
- })();
556
- """ % (self.widget_id, json.dumps(self.pdf_data))
557
-
558
- # Add the JavaScript
559
- display(Javascript(js_code))
560
-
561
- def _repr_html_(self):
562
- """Return empty string as HTML has already been displayed"""
563
- return ""
564
-
565
- @classmethod
566
- def from_page(cls, page, on_element_click=None, include_attributes=None):
567
- """
568
- Create a viewer widget from a Page object.
569
-
570
- Args:
571
- page: A natural_pdf.core.page.Page object
572
- on_element_click: Optional callback function for element clicks
573
- include_attributes: Optional list of *additional* specific attributes to include.
574
- A default set of common/useful attributes is always included.
575
-
576
- Returns:
577
- SimpleInteractiveViewerWidget instance or None if image rendering fails.
578
- """
579
- # Get the page image
580
- import base64
581
- from io import BytesIO
582
- import json # Ensure json is imported
583
- from PIL import Image # Ensure Image is imported
584
-
585
- # Render page to image using the correct method and parameter
586
- scale = 1.0 # Define scale factor used for rendering
587
- try:
588
- img_object = page.to_image(resolution=int(72 * scale)) # Call to_image
589
- # Check if .original attribute exists, otherwise assume img_object is the PIL Image
590
- if hasattr(img_object, 'original') and isinstance(img_object.original, Image.Image):
591
- img = img_object.original
592
- elif isinstance(img_object, Image.Image):
593
- img = img_object
594
- else:
595
- # If it's neither, maybe it's the raw bytes? Try opening it.
596
- try:
597
- img = Image.open(BytesIO(img_object)).convert('RGB')
598
- except Exception:
599
- raise TypeError(f"page.to_image() returned unexpected type: {type(img_object)}")
600
- logger.debug(f"Successfully rendered page {page.index} using to_image()")
601
- except Exception as render_err:
602
- logger.error(f"Error rendering page {page.index} image for widget: {render_err}", exc_info=True)
603
- # Return None or raise the error? Let's raise for now to make it clear.
604
- raise ValueError(f"Failed to render page image: {render_err}") from render_err
605
-
606
- buffered = BytesIO()
607
- img.save(buffered, format="PNG")
608
- img_str = base64.b64encode(buffered.getvalue()).decode()
609
- image_uri = f"data:image/png;base64,{img_str}"
610
-
611
- # Convert elements to dict format
612
- elements = []
613
- # Use page.elements directly if available, otherwise fallback to find_all
614
- page_elements = getattr(page, 'elements', page.find_all('*'))
615
-
616
- # Filter out 'char' elements
617
- filtered_page_elements = [el for el in page_elements if getattr(el, 'type', '').lower() != 'char']
618
- logger.debug(f"Filtered out char elements, keeping {len(filtered_page_elements)} elements.")
619
-
620
- # Define a list of common/useful attributes (properties) to check for
621
- default_attributes_to_get = [
622
- 'text', 'fontname', 'size', 'bold', 'italic', 'color',
623
- 'linewidth', # For lines (pdfplumber uses 'linewidth')
624
- 'is_horizontal', 'is_vertical', # For lines
625
- 'source', 'confidence', # For text/OCR
626
- 'label', # Common for layout elements
627
- 'model', # Add the model name (engine)
628
- # Add any other common properties you expect from your elements
629
- 'upright', 'direction' # from pdfplumber chars/words
630
- ]
631
-
632
- for i, element in enumerate(filtered_page_elements):
633
- # Get original coordinates and calculated width/height (always present via base class)
634
- original_x0 = element.x0
635
- original_y0 = element.top
636
- original_x1 = element.x1
637
- original_y1 = element.bottom
638
- width = element.width
639
- height = element.height
640
-
641
- # Base element dict with required info
642
- elem_dict = {
643
- 'id': i,
644
- # Use the standardized .type property
645
- 'type': element.type,
646
- # Scaled coordinates for positioning in HTML/SVG
647
- 'x0': original_x0 * scale,
648
- 'y0': original_y0 * scale,
649
- 'x1': original_x1 * scale,
650
- 'y1': original_y1 * scale,
651
- 'width': width * scale,
652
- 'height': height * scale,
653
- }
654
-
655
- # --- Get Default Attributes --- #
656
- attributes_found = set()
657
- for attr_name in default_attributes_to_get:
658
- if hasattr(element, attr_name):
544
+ // Show element info on hover (only if nothing selected)
545
+ const infoPanel = document.getElementById(widgetId + "-info-panel");
546
+ const elementData = document.getElementById(widgetId + "-element-data");
547
+
548
+ if (infoPanel && elementData) {
549
+ const element = viewer.initialData.elements[elementIdx];
550
+ if (!element) { /* console.error(`[${widgetId}] Element data not found for index ${elementIdx}!`); */ return; }
551
+ infoPanel.style.display = "block";
552
+ elementData.textContent = JSON.stringify(element, null, 2);
553
+ } else {
554
+ // Don't spam console on hover if it's not found initially
555
+ // console.error(`[${widgetId}] Info panel or element data container not found via getElementById on hover!`);
556
+ }
557
+ });
558
+
559
+ el.addEventListener("mouseleave", function() {
560
+ // *** Only reset hover if NOTHING is selected ***
561
+ const viewer = window.pdfViewerRegistry[widgetId];
562
+ if (viewer.selectedElement !== null) {
563
+ return; // Do nothing if an element is selected
564
+ }
565
+ // Avoid hover effect while dragging
566
+ if (viewer.isDragging) {
567
+ return;
568
+ }
569
+
570
+ const elementIdx = parseInt(this.dataset.elementId);
571
+
572
+ // Reset styling
573
+ resetElementStyle(elementIdx);
574
+
575
+ // Optionally hide/clear the info panel on mouse leave when nothing is selected
576
+ // const infoPanel = document.getElementById(widgetId + "-info-panel");
577
+ // const elementData = document.getElementById(widgetId + "-element-data");
578
+ // if (infoPanel && elementData) {
579
+ // elementData.textContent = '';
580
+ // }
581
+ });
582
+ });
583
+
584
+ })();
585
+ """ % (
586
+ self.widget_id,
587
+ json.dumps(self.pdf_data),
588
+ )
589
+
590
+ # Add the JavaScript
591
+ display(Javascript(js_code))
592
+
593
+ def _repr_html_(self):
594
+ """Return empty string as HTML has already been displayed"""
595
+ return ""
596
+
597
+ @classmethod
598
+ def from_page(cls, page, on_element_click=None, include_attributes=None):
599
+ """
600
+ Create a viewer widget from a Page object.
601
+
602
+ Args:
603
+ page: A natural_pdf.core.page.Page object
604
+ on_element_click: Optional callback function for element clicks
605
+ include_attributes: Optional list of *additional* specific attributes to include.
606
+ A default set of common/useful attributes is always included.
607
+
608
+ Returns:
609
+ SimpleInteractiveViewerWidget instance or None if image rendering fails.
610
+ """
611
+ # Get the page image
612
+ import base64
613
+ import json # Ensure json is imported
614
+ from io import BytesIO
615
+
616
+ from PIL import Image # Ensure Image is imported
617
+
618
+ # Render page to image using the correct method and parameter
619
+ scale = 1.0 # Define scale factor used for rendering
620
+ try:
621
+ img_object = page.to_image(resolution=int(72 * scale)) # Call to_image
622
+ # Check if .original attribute exists, otherwise assume img_object is the PIL Image
623
+ if hasattr(img_object, "original") and isinstance(img_object.original, Image.Image):
624
+ img = img_object.original
625
+ elif isinstance(img_object, Image.Image):
626
+ img = img_object
627
+ else:
628
+ # If it's neither, maybe it's the raw bytes? Try opening it.
659
629
  try:
660
- value = getattr(element, attr_name)
661
- # Convert non-JSON serializable types to string
662
- processed_value = value
663
- if not isinstance(value, (str, int, float, bool, list, dict, tuple)) and value is not None:
664
- processed_value = str(value)
665
- elem_dict[attr_name] = processed_value
666
- attributes_found.add(attr_name)
667
- except Exception as e:
668
- logger.warning(f"Could not get or process default attribute '{attr_name}' for element {i} ({element.type}): {e}")
669
-
670
- # --- Get User-Requested Attributes (if any) --- #
671
- if include_attributes:
672
- for attr_name in include_attributes:
673
- # Only process if not already added and exists
674
- if attr_name not in attributes_found and hasattr(element, attr_name):
675
- try:
676
- value = getattr(element, attr_name)
677
- processed_value = value
678
- if not isinstance(value, (str, int, float, bool, list, dict, tuple)) and value is not None:
679
- processed_value = str(value)
680
- elem_dict[attr_name] = processed_value
681
- except Exception as e:
682
- logger.warning(f"Could not get or process requested attribute '{attr_name}' for element {i} ({element.type}): {e}")
683
- for attr_name in elem_dict:
684
- if isinstance(elem_dict[attr_name], float):
685
- elem_dict[attr_name] = round(elem_dict[attr_name], 2)
686
- elements.append(elem_dict)
687
-
688
- logger.debug(f"Prepared {len(elements)} elements for widget with scaled coordinates and curated attributes.")
689
-
690
- # Create and return widget
691
- # The actual JSON conversion happens when the data is sent to the frontend
692
- return cls(
693
- image_uri=image_uri,
694
- elements=elements
695
- )
696
-
697
- # Keep the original widget class for reference, but make it not register
698
- # by commenting out the decorator
699
- # @widgets.register
700
- class InteractiveViewerWidget(widgets.DOMWidget):
701
- """Jupyter widget for interactively viewing PDF page elements."""
702
- _view_name = Unicode('InteractiveViewerView').tag(sync=True)
703
- _view_module = Unicode('viewer_widget').tag(sync=True)
704
- _view_module_version = Unicode('^0.1.0').tag(sync=True)
705
-
706
- image_uri = Unicode('').tag(sync=True)
707
- page_dimensions = Dict({}).tag(sync=True)
708
- elements = List([]).tag(sync=True)
709
-
710
- def __init__(self, **kwargs):
711
- super().__init__(**kwargs)
712
- logger.debug("InteractiveViewerWidget initialized (Python).")
713
-
714
- # Example observer (optional)
715
- @observe('elements')
716
- def _elements_changed(self, change):
717
- # Only log if logger level allows
718
- if logger.isEnabledFor(logging.DEBUG):
719
- logger.debug(f"Python: Elements traitlet changed. New count: {len(change['new'])}")
720
- # Can add Python-side logic here if needed when elements change
721
- # print(f"Python: Elements traitlet changed. New count: {len(change['new'])}")
722
- pass
723
-
724
- # Example usage
725
- """
726
- Example usage:
727
-
728
- # Method 1: Using pdf_data dictionary
729
- viewer = SimpleInteractiveViewerWidget(pdf_data={
730
- 'page_image': 'data:image/png;base64,...', # Base64 encoded image
731
- 'elements': [
732
- {
733
- 'type': 'text',
734
- 'x0': 100,
735
- 'y0': 200,
736
- 'x1': 300,
737
- 'y1': 220,
738
- 'text': 'Sample text'
739
- }
740
- ]
741
- })
742
-
743
- # Method 2: Using keyword arguments
744
- viewer = SimpleInteractiveViewerWidget(
745
- image_uri='data:image/png;base64,...', # Base64 encoded image
746
- elements=[
747
- {
748
- 'type': 'text',
749
- 'x0': 100,
750
- 'y0': 200,
751
- 'x1': 300,
752
- 'y1': 220,
753
- 'text': 'Sample text'
754
- }
755
- ]
756
- )
757
-
758
- # Method 3: Using a Page object
759
- from natural_pdf.core.page import Page
760
- page = doc.pages[0] # Assuming 'doc' is a Document object
761
- viewer = SimpleInteractiveViewerWidget.from_page(page)
762
-
763
- # Display the widget
764
- viewer
765
- """
630
+ img = Image.open(BytesIO(img_object)).convert("RGB")
631
+ except Exception:
632
+ raise TypeError(
633
+ f"page.to_image() returned unexpected type: {type(img_object)}"
634
+ )
635
+ logger.debug(f"Successfully rendered page {page.index} using to_image()")
636
+ except Exception as render_err:
637
+ logger.error(
638
+ f"Error rendering page {page.index} image for widget: {render_err}",
639
+ exc_info=True,
640
+ )
641
+ # Return None or raise the error? Let's raise for now to make it clear.
642
+ raise ValueError(f"Failed to render page image: {render_err}") from render_err
643
+
644
+ buffered = BytesIO()
645
+ img.save(buffered, format="PNG")
646
+ img_str = base64.b64encode(buffered.getvalue()).decode()
647
+ image_uri = f"data:image/png;base64,{img_str}"
648
+
649
+ # Convert elements to dict format
650
+ elements = []
651
+ # Use page.elements directly if available, otherwise fallback to find_all
652
+ page_elements = getattr(page, "elements", page.find_all("*"))
653
+
654
+ # Filter out 'char' elements
655
+ filtered_page_elements = [
656
+ el for el in page_elements if getattr(el, "type", "").lower() != "char"
657
+ ]
658
+ logger.debug(
659
+ f"Filtered out char elements, keeping {len(filtered_page_elements)} elements."
660
+ )
661
+
662
+ # Define a list of common/useful attributes (properties) to check for
663
+ default_attributes_to_get = [
664
+ "text",
665
+ "fontname",
666
+ "size",
667
+ "bold",
668
+ "italic",
669
+ "color",
670
+ "linewidth", # For lines (pdfplumber uses 'linewidth')
671
+ "is_horizontal",
672
+ "is_vertical", # For lines
673
+ "source",
674
+ "confidence", # For text/OCR
675
+ "label", # Common for layout elements
676
+ "model", # Add the model name (engine)
677
+ # Add any other common properties you expect from your elements
678
+ "upright",
679
+ "direction", # from pdfplumber chars/words
680
+ ]
681
+
682
+ for i, element in enumerate(filtered_page_elements):
683
+ # Get original coordinates and calculated width/height (always present via base class)
684
+ original_x0 = element.x0
685
+ original_y0 = element.top
686
+ original_x1 = element.x1
687
+ original_y1 = element.bottom
688
+ width = element.width
689
+ height = element.height
690
+
691
+ # Base element dict with required info
692
+ elem_dict = {
693
+ "id": i,
694
+ # Use the standardized .type property
695
+ "type": element.type,
696
+ # Scaled coordinates for positioning in HTML/SVG
697
+ "x0": original_x0 * scale,
698
+ "y0": original_y0 * scale,
699
+ "x1": original_x1 * scale,
700
+ "y1": original_y1 * scale,
701
+ "width": width * scale,
702
+ "height": height * scale,
703
+ }
704
+
705
+ # --- Get Default Attributes --- #
706
+ attributes_found = set()
707
+ for attr_name in default_attributes_to_get:
708
+ if hasattr(element, attr_name):
709
+ try:
710
+ value = getattr(element, attr_name)
711
+ # Convert non-JSON serializable types to string
712
+ processed_value = value
713
+ if (
714
+ not isinstance(value, (str, int, float, bool, list, dict, tuple))
715
+ and value is not None
716
+ ):
717
+ processed_value = str(value)
718
+ elem_dict[attr_name] = processed_value
719
+ attributes_found.add(attr_name)
720
+ except Exception as e:
721
+ logger.warning(
722
+ f"Could not get or process default attribute '{attr_name}' for element {i} ({element.type}): {e}"
723
+ )
724
+
725
+ # --- Get User-Requested Attributes (if any) --- #
726
+ if include_attributes:
727
+ for attr_name in include_attributes:
728
+ # Only process if not already added and exists
729
+ if attr_name not in attributes_found and hasattr(element, attr_name):
730
+ try:
731
+ value = getattr(element, attr_name)
732
+ processed_value = value
733
+ if (
734
+ not isinstance(
735
+ value, (str, int, float, bool, list, dict, tuple)
736
+ )
737
+ and value is not None
738
+ ):
739
+ processed_value = str(value)
740
+ elem_dict[attr_name] = processed_value
741
+ except Exception as e:
742
+ logger.warning(
743
+ f"Could not get or process requested attribute '{attr_name}' for element {i} ({element.type}): {e}"
744
+ )
745
+ for attr_name in elem_dict:
746
+ if isinstance(elem_dict[attr_name], float):
747
+ elem_dict[attr_name] = round(elem_dict[attr_name], 2)
748
+ elements.append(elem_dict)
749
+
750
+ logger.debug(
751
+ f"Prepared {len(elements)} elements for widget with scaled coordinates and curated attributes."
752
+ )
753
+
754
+ # Create and return widget
755
+ # The actual JSON conversion happens when the data is sent to the frontend
756
+ return cls(image_uri=image_uri, elements=elements)
757
+
758
+ # Keep the original widget class for reference, but make it not register
759
+ # by commenting out the decorator
760
+ # @widgets.register
761
+ class InteractiveViewerWidget(widgets.DOMWidget):
762
+ """Jupyter widget for interactively viewing PDF page elements."""
763
+
764
+ _view_name = Unicode("InteractiveViewerView").tag(sync=True)
765
+ _view_module = Unicode("viewer_widget").tag(sync=True)
766
+ _view_module_version = Unicode("^0.1.0").tag(sync=True)
767
+
768
+ image_uri = Unicode("").tag(sync=True)
769
+ page_dimensions = Dict({}).tag(sync=True)
770
+ elements = List([]).tag(sync=True)
771
+
772
+ def __init__(self, **kwargs):
773
+ super().__init__(**kwargs)
774
+ logger.debug("InteractiveViewerWidget initialized (Python).")
775
+
776
+ # Example observer (optional)
777
+ @observe("elements")
778
+ def _elements_changed(self, change):
779
+ # Only log if logger level allows
780
+ if logger.isEnabledFor(logging.DEBUG):
781
+ logger.debug(f"Python: Elements traitlet changed. New count: {len(change['new'])}")
782
+ # Can add Python-side logic here if needed when elements change
783
+ # print(f"Python: Elements traitlet changed. New count: {len(change['new'])}")
784
+ pass
785
+
786
+ except ImportError:
787
+ logger.info(
788
+ "Optional dependency 'ipywidgets' not found. Interactive viewer widgets will not be defined."
789
+ )
790
+ # Ensure class variables are None if import fails
791
+ SimpleInteractiveViewerWidget = None
792
+ InteractiveViewerWidget = None
793
+ _IPYWIDGETS_AVAILABLE = False # Explicitly set flag to False here too
794
+
795
+ # Example usage - kept outside the try/except as comments
796
+ # ... (existing example usage comments) ...