natural-pdf 0.1.11__py3-none-any.whl → 0.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. natural_pdf/__init__.py +7 -2
  2. natural_pdf/analyzers/shape_detection_mixin.py +1092 -0
  3. natural_pdf/analyzers/text_options.py +9 -1
  4. natural_pdf/analyzers/text_structure.py +371 -58
  5. natural_pdf/classification/manager.py +3 -4
  6. natural_pdf/collections/pdf_collection.py +19 -39
  7. natural_pdf/core/element_manager.py +11 -1
  8. natural_pdf/core/highlighting_service.py +146 -75
  9. natural_pdf/core/page.py +287 -188
  10. natural_pdf/core/pdf.py +57 -42
  11. natural_pdf/elements/base.py +51 -0
  12. natural_pdf/elements/collections.py +362 -67
  13. natural_pdf/elements/line.py +5 -0
  14. natural_pdf/elements/region.py +396 -23
  15. natural_pdf/exporters/data/__init__.py +0 -0
  16. natural_pdf/exporters/data/pdf.ttf +0 -0
  17. natural_pdf/exporters/data/sRGB.icc +0 -0
  18. natural_pdf/exporters/hocr.py +40 -61
  19. natural_pdf/exporters/hocr_font.py +7 -13
  20. natural_pdf/exporters/original_pdf.py +10 -13
  21. natural_pdf/exporters/paddleocr.py +51 -11
  22. natural_pdf/exporters/searchable_pdf.py +0 -10
  23. natural_pdf/flows/__init__.py +12 -0
  24. natural_pdf/flows/collections.py +533 -0
  25. natural_pdf/flows/element.py +382 -0
  26. natural_pdf/flows/flow.py +216 -0
  27. natural_pdf/flows/region.py +458 -0
  28. natural_pdf/search/__init__.py +65 -52
  29. natural_pdf/search/lancedb_search_service.py +325 -0
  30. natural_pdf/search/numpy_search_service.py +255 -0
  31. natural_pdf/search/searchable_mixin.py +25 -71
  32. natural_pdf/selectors/parser.py +163 -8
  33. natural_pdf/widgets/viewer.py +22 -31
  34. {natural_pdf-0.1.11.dist-info → natural_pdf-0.1.13.dist-info}/METADATA +55 -49
  35. {natural_pdf-0.1.11.dist-info → natural_pdf-0.1.13.dist-info}/RECORD +38 -30
  36. {natural_pdf-0.1.11.dist-info → natural_pdf-0.1.13.dist-info}/WHEEL +1 -1
  37. natural_pdf/search/haystack_search_service.py +0 -687
  38. natural_pdf/search/haystack_utils.py +0 -474
  39. natural_pdf/utils/tqdm_utils.py +0 -51
  40. {natural_pdf-0.1.11.dist-info → natural_pdf-0.1.13.dist-info}/licenses/LICENSE +0 -0
  41. {natural_pdf-0.1.11.dist-info → natural_pdf-0.1.13.dist-info}/top_level.txt +0 -0
@@ -4,7 +4,6 @@ from abc import ABC, abstractmethod
4
4
  from typing import TYPE_CHECKING, Any, Dict, Generator, Iterable, List, Optional, Type, Union
5
5
 
6
6
  # Now import the flag from the canonical source - this import should always work
7
- from .haystack_utils import HAS_HAYSTACK_EXTRAS
8
7
 
9
8
  DEFAULT_SEARCH_COLLECTION_NAME = "default_collection"
10
9
 
@@ -108,7 +107,6 @@ class SearchableMixin(ABC):
108
107
  logger.info(
109
108
  f"Attaching provided SearchService instance (Collection: '{getattr(service, 'collection_name', '<Unknown>')}')."
110
109
  )
111
- # TODO: Add stricter type check? isinstance(service, SearchServiceProtocol) requires runtime_checkable
112
110
  self._search_service = service
113
111
  else:
114
112
  # Create new service
@@ -125,28 +123,17 @@ class SearchableMixin(ABC):
125
123
  logger.info(
126
124
  f"Creating new SearchService: name='{effective_collection_name}', persist={effective_persist}, model={embedding_model or 'default'}"
127
125
  )
128
- try:
129
- service_args = {
130
- "collection_name": effective_collection_name,
131
- "persist": effective_persist,
132
- **kwargs,
133
- }
134
- if embedding_model:
135
- service_args["embedding_model"] = embedding_model
136
- self._search_service = get_search_service(**service_args)
137
- except ImportError as ie: # Catch the specific ImportError first
138
- logger.error(f"Failed to create SearchService due to missing dependency: {ie}")
139
- raise ie # Re-raise the original ImportError
140
- except Exception as e:
141
- logger.error(
142
- f"Failed to create SearchService due to unexpected error: {e}", exc_info=True
143
- )
144
- # Keep the RuntimeError for other unexpected creation errors
145
- raise RuntimeError(
146
- "Could not create SearchService instance due to an unexpected error."
147
- ) from e
126
+
127
+ # Direct creation without try/except
128
+ service_args = {
129
+ "collection_name": effective_collection_name,
130
+ "persist": effective_persist,
131
+ **kwargs,
132
+ }
133
+ if embedding_model:
134
+ service_args["embedding_model"] = embedding_model
135
+ self._search_service = get_search_service(**service_args)
148
136
 
149
- # --- Optional Immediate Indexing (with safety check for persistent) ---
150
137
  if index:
151
138
  if not self._search_service: # Should not happen if logic above is correct
152
139
  raise RuntimeError(
@@ -176,8 +163,6 @@ class SearchableMixin(ABC):
176
163
  logger.warning(
177
164
  f"Proceeding with index=True and force_reindex=True for persistent index '{collection_name}'. Existing data will be deleted."
178
165
  )
179
- # else: # Not persistent, safe to proceed without existence check
180
- # logger.debug("Proceeding with index=True for non-persistent index.")
181
166
 
182
167
  # Proceed with indexing if checks passed or not applicable
183
168
  logger.info(
@@ -197,12 +182,8 @@ class SearchableMixin(ABC):
197
182
  f"Starting internal indexing process into SearchService collection '{collection_name}'..."
198
183
  )
199
184
 
200
- # Use the abstract method to get items
201
- try:
202
- indexable_items = list(self.get_indexable_items()) # Consume iterator
203
- except Exception as e:
204
- logger.error(f"Error calling get_indexable_items: {e}", exc_info=True)
205
- raise RuntimeError("Failed to retrieve indexable items for indexing.") from e
185
+ # Get indexable items without try/except
186
+ indexable_items = list(self.get_indexable_items()) # Consume iterator
206
187
 
207
188
  if not indexable_items:
208
189
  logger.warning(
@@ -211,27 +192,19 @@ class SearchableMixin(ABC):
211
192
  return
212
193
 
213
194
  logger.info(f"Prepared {len(indexable_items)} indexable items for indexing.")
214
- try:
215
- logger.debug(
216
- f"Calling index() on SearchService for collection '{collection_name}' (force_reindex={force_reindex})."
217
- )
218
- self._search_service.index(
219
- documents=indexable_items,
220
- embedder_device=embedder_device,
221
- force_reindex=force_reindex,
222
- )
223
- logger.info(
224
- f"Successfully completed indexing into SearchService collection '{collection_name}'."
225
- )
226
- except IndexConfigurationError as ice:
227
- logger.error(
228
- f"Indexing failed due to configuration error in collection '{collection_name}': {ice}",
229
- exc_info=True,
230
- )
231
- raise # Re-raise specific error
232
- except Exception as e: # Catch other indexing errors from the service
233
- logger.error(f"Indexing failed for collection '{collection_name}': {e}", exc_info=True)
234
- raise RuntimeError(f"Indexing failed for collection '{collection_name}'.") from e
195
+ logger.debug(
196
+ f"Calling index() on SearchService for collection '{collection_name}' (force_reindex={force_reindex})."
197
+ )
198
+
199
+ # Call index without try/except
200
+ self._search_service.index(
201
+ documents=indexable_items,
202
+ embedder_device=embedder_device,
203
+ force_reindex=force_reindex,
204
+ )
205
+ logger.info(
206
+ f"Successfully completed indexing into SearchService collection '{collection_name}'."
207
+ )
235
208
 
236
209
  def index_for_search(
237
210
  self,
@@ -254,14 +227,12 @@ class SearchableMixin(ABC):
254
227
  Returns:
255
228
  Self for method chaining.
256
229
  """
257
- # --- Ensure Service is Initialized (Use Default if Needed) ---
258
230
  if not self._search_service:
259
231
  logger.info(
260
232
  "Search service not initialized prior to index_for_search. Initializing default in-memory service."
261
233
  )
262
234
  self.init_search() # Call init with defaults
263
235
 
264
- # --- Perform Indexing ---
265
236
  self._perform_indexing(force_reindex=force_reindex, embedder_device=embedder_device)
266
237
  return self
267
238
 
@@ -289,7 +260,6 @@ class SearchableMixin(ABC):
289
260
  RuntimeError: If no search service is configured or provided, or if search fails.
290
261
  FileNotFoundError: If the collection managed by the service does not exist.
291
262
  """
292
- # --- Determine which Search Service to use ---
293
263
  effective_service = search_service or self._search_service
294
264
  if not effective_service:
295
265
  raise RuntimeError(
@@ -302,21 +272,9 @@ class SearchableMixin(ABC):
302
272
  f"Searching collection '{collection_name}' via {type(effective_service).__name__}..."
303
273
  )
304
274
 
305
- # --- Prepare Query and Options ---
306
275
  query_input = query
307
- # Example: Handle Region query - maybe move this logic into HaystackSearchService.search?
308
- # If we keep it here, it makes the mixin less generic.
309
- # Let's assume the SearchService handles the query type appropriately for now.
310
- # if isinstance(query, Region):
311
- # logger.debug("Query is a Region object. Extracting text.")
312
- # query_input = query.extract_text()
313
- # if not query_input or query_input.isspace():
314
- # logger.warning("Region provided for query has no extractable text.")
315
- # return []
316
-
317
276
  effective_options = options if options is not None else TextSearchOptions()
318
277
 
319
- # --- Call SearchService Search Method ---
320
278
  try:
321
279
  results = effective_service.search(
322
280
  query=query_input,
@@ -336,7 +294,6 @@ class SearchableMixin(ABC):
336
294
  # Consider wrapping in a SearchError?
337
295
  raise RuntimeError(f"Search failed in collection '{collection_name}'.") from e
338
296
 
339
- # --- NEW Sync Method ---
340
297
  def sync_index(
341
298
  self,
342
299
  strategy: str = "full", # 'full' (add/update/delete) or 'upsert_only'
@@ -378,7 +335,6 @@ class SearchableMixin(ABC):
378
335
  )
379
336
  summary = {"added": 0, "updated": 0, "deleted": 0, "skipped": 0}
380
337
 
381
- # --- Check Service Capabilities for 'full' sync ---
382
338
  if strategy == "full":
383
339
  required_methods = ["list_documents", "delete_documents"]
384
340
  missing_methods = [m for m in required_methods if not hasattr(self._search_service, m)]
@@ -388,7 +344,6 @@ class SearchableMixin(ABC):
388
344
  f"is missing required methods for 'full' sync strategy: {', '.join(missing_methods)}"
389
345
  )
390
346
 
391
- # --- 1. Get Desired State (from current collection) ---
392
347
  desired_state: Dict[str, Indexable] = {} # {id: item}
393
348
  desired_hashes: Dict[str, Optional[str]] = {} # {id: hash or None}
394
349
  try:
@@ -426,7 +381,6 @@ class SearchableMixin(ABC):
426
381
 
427
382
  logger.info(f"Desired state contains {len(desired_state)} indexable items.")
428
383
 
429
- # --- 2. Handle Different Strategies ---
430
384
  if strategy == "upsert_only":
431
385
  # Simple case: just index everything, let the service handle upserts
432
386
  items_to_index = list(desired_state.values())
@@ -71,6 +71,91 @@ def safe_parse_color(value_str: str) -> tuple:
71
71
  return (0, 0, 0)
72
72
 
73
73
 
74
+ def _split_top_level_or(selector: str) -> List[str]:
75
+ """
76
+ Split a selector string on top-level OR operators (| or ,) only.
77
+
78
+ Respects parsing contexts and does not split when | or , appear inside:
79
+ - Quoted strings (both single and double quotes)
80
+ - Parentheses (for pseudo-class arguments like :not(...))
81
+ - Square brackets (for attribute selectors like [attr="value"])
82
+
83
+ Args:
84
+ selector: The selector string to split
85
+
86
+ Returns:
87
+ List of selector parts. If no top-level OR operators found, returns [selector].
88
+
89
+ Examples:
90
+ >>> _split_top_level_or('text:contains("a|b")|text:bold')
91
+ ['text:contains("a|b")', 'text:bold']
92
+
93
+ >>> _split_top_level_or('text:contains("hello,world")')
94
+ ['text:contains("hello,world")']
95
+ """
96
+ if not selector or not isinstance(selector, str):
97
+ return [selector] if selector else []
98
+
99
+ parts = []
100
+ current_part = ""
101
+ i = 0
102
+
103
+ # Parsing state
104
+ in_double_quotes = False
105
+ in_single_quotes = False
106
+ paren_depth = 0
107
+ bracket_depth = 0
108
+
109
+ while i < len(selector):
110
+ char = selector[i]
111
+
112
+ # Handle escape sequences in quotes
113
+ if i > 0 and selector[i-1] == '\\':
114
+ current_part += char
115
+ i += 1
116
+ continue
117
+
118
+ # Handle quote state changes
119
+ if char == '"' and not in_single_quotes:
120
+ in_double_quotes = not in_double_quotes
121
+ elif char == "'" and not in_double_quotes:
122
+ in_single_quotes = not in_single_quotes
123
+
124
+ # Handle parentheses and brackets only when not in quotes
125
+ elif not in_double_quotes and not in_single_quotes:
126
+ if char == '(':
127
+ paren_depth += 1
128
+ elif char == ')':
129
+ paren_depth -= 1
130
+ elif char == '[':
131
+ bracket_depth += 1
132
+ elif char == ']':
133
+ bracket_depth -= 1
134
+
135
+ # Check for top-level OR operators
136
+ elif (char == '|' or char == ',') and paren_depth == 0 and bracket_depth == 0:
137
+ # Found a top-level OR operator
138
+ part = current_part.strip()
139
+ if part: # Only add non-empty parts
140
+ parts.append(part)
141
+ current_part = ""
142
+ i += 1
143
+ continue
144
+
145
+ # Add character to current part
146
+ current_part += char
147
+ i += 1
148
+
149
+ # Add the final part
150
+ final_part = current_part.strip()
151
+ if final_part:
152
+ parts.append(final_part)
153
+
154
+ # If we only found one part, return it as a single-element list
155
+ # If we found multiple parts, those are the OR-separated parts
156
+ return parts if parts else [selector]
157
+
158
+
74
159
  def parse_selector(selector: str) -> Dict[str, Any]:
75
160
  """
76
161
  Parse a CSS-like selector string into a structured selector object.
@@ -80,12 +165,28 @@ def parse_selector(selector: str) -> Dict[str, Any]:
80
165
  - Attribute presence (e.g., '[data-id]')
81
166
  - Attribute value checks with various operators (e.g., '[count=5]', '[name*="bold"]'')
82
167
  - Pseudo-classes (e.g., ':contains("Total")', ':empty', ':not(...)')
168
+ - OR operators (e.g., 'text:contains("A")|text:bold', 'sel1,sel2')
83
169
 
84
170
  Args:
85
171
  selector: CSS-like selector string
86
172
 
87
173
  Returns:
88
- Dict representing the parsed selector
174
+ Dict representing the parsed selector, or compound selector with OR logic
175
+
176
+ Examples:
177
+ >>> parse_selector('text:contains("hello")') # Single selector
178
+ {'type': 'text', 'pseudo_classes': [{'name': 'contains', 'args': 'hello'}], ...}
179
+
180
+ >>> parse_selector('text:contains("A")|text:bold') # OR with pipe
181
+ {'type': 'or', 'selectors': [...]}
182
+
183
+ >>> parse_selector('text:contains("A"),line[width>5]') # OR with comma
184
+ {'type': 'or', 'selectors': [...]}
185
+
186
+ Note:
187
+ OR operators work with all selector types except spatial pseudo-classes
188
+ (:above, :below, :near, :left-of, :right-of) which require page context.
189
+ Spatial relationships within OR selectors are not currently supported.
89
190
  """
90
191
  result = {
91
192
  "type": "any",
@@ -100,6 +201,36 @@ def parse_selector(selector: str) -> Dict[str, Any]:
100
201
 
101
202
  selector = selector.strip()
102
203
 
204
+ # --- Handle OR operators first (| or ,) ---
205
+ # Check if selector contains OR operators at the top level only
206
+ # (not inside quotes, parentheses, or brackets)
207
+ or_parts = _split_top_level_or(selector)
208
+
209
+ # If we found OR parts, parse each one recursively and return compound selector
210
+ if len(or_parts) > 1:
211
+ parsed_selectors = []
212
+ for part in or_parts:
213
+ try:
214
+ parsed_selectors.append(parse_selector(part))
215
+ except (ValueError, TypeError) as e:
216
+ logger.warning(f"Skipping invalid OR selector part '{part}': {e}")
217
+ continue
218
+
219
+ if len(parsed_selectors) > 1:
220
+ return {
221
+ "type": "or",
222
+ "selectors": parsed_selectors
223
+ }
224
+ elif len(parsed_selectors) == 1:
225
+ # Only one valid part, return it directly
226
+ return parsed_selectors[0]
227
+ else:
228
+ # No valid parts, return default
229
+ logger.warning(f"No valid parts found in OR selector '{original_selector_for_error}', returning default selector")
230
+ return result
231
+
232
+ # --- Continue with single selector parsing (existing logic) ---
233
+
103
234
  # --- Handle wildcard selector explicitly ---
104
235
  if selector == "*":
105
236
  # Wildcard matches any type, already the default.
@@ -109,12 +240,6 @@ def parse_selector(selector: str) -> Dict[str, Any]:
109
240
 
110
241
  # 1. Extract type (optional, at the beginning)
111
242
  # Only run if selector wasn't '*'
112
- if selector:
113
- type_match = re.match(r"^([a-zA-Z_\-]+)", selector)
114
- if type_match:
115
- result["type"] = type_match.group(1).lower()
116
- selector = selector[len(type_match.group(0)) :].strip()
117
- # Only run if selector wasn't '*'
118
243
  if selector:
119
244
  type_match = re.match(r"^([a-zA-Z_\-]+)", selector)
120
245
  if type_match:
@@ -597,12 +722,42 @@ def selector_to_filter_func(selector: Dict[str, Any], **kwargs) -> Callable[[Any
597
722
  To inspect the individual filters, call `_build_filter_list` directly.
598
723
 
599
724
  Args:
600
- selector: Parsed selector dictionary
725
+ selector: Parsed selector dictionary (single or compound OR selector)
601
726
  **kwargs: Additional filter parameters (e.g., regex, case).
602
727
 
603
728
  Returns:
604
729
  Function that takes an element and returns True if it matches the selector.
605
730
  """
731
+ # Handle compound OR selectors
732
+ if selector.get("type") == "or":
733
+ sub_selectors = selector.get("selectors", [])
734
+ if not sub_selectors:
735
+ # Empty OR selector, return a function that never matches
736
+ return lambda element: False
737
+
738
+ # Create filter functions for each sub-selector
739
+ sub_filter_funcs = []
740
+ for sub_selector in sub_selectors:
741
+ sub_filter_funcs.append(selector_to_filter_func(sub_selector, **kwargs))
742
+
743
+ if logger.isEnabledFor(logging.DEBUG):
744
+ logger.debug(f"Creating OR filter with {len(sub_filter_funcs)} sub-selectors")
745
+
746
+ # Return OR combination - element matches if ANY sub-selector matches
747
+ def or_filter(element):
748
+ for func in sub_filter_funcs:
749
+ try:
750
+ if func(element):
751
+ return True
752
+ except Exception as e:
753
+ logger.error(f"Error applying OR sub-filter to element: {e}", exc_info=True)
754
+ # Continue to next sub-filter on error
755
+ continue
756
+ return False
757
+
758
+ return or_filter
759
+
760
+ # Handle single selectors (existing logic)
606
761
  filter_list = _build_filter_list(selector, **kwargs)
607
762
 
608
763
  if logger.isEnabledFor(logging.DEBUG):
@@ -31,20 +31,6 @@ try:
31
31
  from PIL import Image
32
32
  from traitlets import Dict, List, Unicode, observe
33
33
 
34
- # --- Read JS code from file (only needed if widgets are defined) --- #
35
- _MODULE_DIR = os.path.dirname(__file__)
36
- _FRONTEND_JS_PATH = os.path.join(_MODULE_DIR, "frontend", "viewer.js")
37
- try:
38
- with open(_FRONTEND_JS_PATH, "r", encoding="utf-8") as f:
39
- _FRONTEND_JS_CODE = f.read()
40
- logger.debug(f"Successfully read frontend JS from: {_FRONTEND_JS_PATH}")
41
- except FileNotFoundError:
42
- logger.error(f"Frontend JS file not found at {_FRONTEND_JS_PATH}. Widget will likely fail.")
43
- _FRONTEND_JS_CODE = "console.error('Frontend JS file not found! Widget cannot load.');"
44
- except Exception as e:
45
- logger.error(f"Error reading frontend JS file {_FRONTEND_JS_PATH}: {e}")
46
- _FRONTEND_JS_CODE = f"console.error('Error reading frontend JS file: {e}');"
47
-
48
34
  # --- Define Widget Classes ONLY if ipywidgets is available ---
49
35
  class SimpleInteractiveViewerWidget(widgets.DOMWidget):
50
36
  def __init__(self, pdf_data=None, **kwargs):
@@ -631,7 +617,7 @@ try:
631
617
 
632
618
  # Filter out 'char' elements
633
619
  filtered_page_elements = [
634
- el for el in page_elements if getattr(el, "type", "").lower() != "char"
620
+ el for el in page_elements if str(getattr(el, "type", "")).lower() != "char"
635
621
  ]
636
622
  logger.debug(
637
623
  f"Filtered out char elements, keeping {len(filtered_page_elements)} elements."
@@ -659,19 +645,21 @@ try:
659
645
 
660
646
  for i, element in enumerate(filtered_page_elements):
661
647
  # Get original coordinates and calculated width/height (always present via base class)
648
+ # Assuming 'element' is always an object with these attributes now
662
649
  original_x0 = element.x0
663
650
  original_y0 = element.top
664
651
  original_x1 = element.x1
665
652
  original_y1 = element.bottom
666
653
  width = element.width
667
654
  height = element.height
655
+ current_element_type = element.type # Direct attribute access
668
656
  scale = 1.0
669
657
 
670
658
  # Base element dict with required info
671
659
  elem_dict = {
672
660
  "id": i,
673
661
  # Use the standardized .type property
674
- "type": element.type,
662
+ "type": current_element_type,
675
663
  # Scaled coordinates for positioning in HTML/SVG
676
664
  "x0": original_x0 * scale,
677
665
  "y0": original_y0 * scale,
@@ -684,21 +672,24 @@ try:
684
672
  # --- Get Default Attributes --- #
685
673
  attributes_found = set()
686
674
  for attr_name in default_attributes_to_get:
675
+ # Assuming 'element' is always an object
687
676
  if hasattr(element, attr_name):
688
677
  try:
689
- value = getattr(element, attr_name)
678
+ value_to_process = getattr(element, attr_name)
690
679
  # Convert non-JSON serializable types to string
691
- processed_value = value
680
+ processed_value = value_to_process
692
681
  if (
693
- not isinstance(value, (str, int, float, bool, list, dict, tuple))
694
- and value is not None
682
+ not isinstance(
683
+ value_to_process, (str, int, float, bool, list, dict, tuple)
684
+ )
685
+ and value_to_process is not None
695
686
  ):
696
- processed_value = str(value)
687
+ processed_value = str(value_to_process)
697
688
  elem_dict[attr_name] = processed_value
698
689
  attributes_found.add(attr_name)
699
690
  except Exception as e:
700
691
  logger.warning(
701
- f"Could not get or process default attribute '{attr_name}' for element {i} ({element.type}): {e}"
692
+ f"Could not get or process default attribute '{attr_name}' for element {i} ({current_element_type}): {e}"
702
693
  )
703
694
 
704
695
  # --- Get User-Requested Attributes (if any) --- #
@@ -707,23 +698,23 @@ try:
707
698
  # Only process if not already added and exists
708
699
  if attr_name not in attributes_found and hasattr(element, attr_name):
709
700
  try:
710
- value = getattr(element, attr_name)
711
- processed_value = value
701
+ value_to_process = getattr(element, attr_name)
702
+ processed_value = value_to_process
712
703
  if (
713
704
  not isinstance(
714
- value, (str, int, float, bool, list, dict, tuple)
705
+ value_to_process, (str, int, float, bool, list, dict, tuple)
715
706
  )
716
- and value is not None
707
+ and value_to_process is not None
717
708
  ):
718
- processed_value = str(value)
709
+ processed_value = str(value_to_process)
719
710
  elem_dict[attr_name] = processed_value
720
711
  except Exception as e:
721
712
  logger.warning(
722
- f"Could not get or process requested attribute '{attr_name}' for element {i} ({element.type}): {e}"
713
+ f"Could not get or process requested attribute '{attr_name}' for element {i} ({current_element_type}): {e}"
723
714
  )
724
- for attr_name in elem_dict:
725
- if isinstance(elem_dict[attr_name], float):
726
- elem_dict[attr_name] = round(elem_dict[attr_name], 2)
715
+ for attr_name_val in elem_dict: # Renamed to avoid conflict
716
+ if isinstance(elem_dict[attr_name_val], float):
717
+ elem_dict[attr_name_val] = round(elem_dict[attr_name_val], 2)
727
718
  elements.append(elem_dict)
728
719
 
729
720
  logger.debug(