PyPI - notionary - Versions diffs - 0.1.24__py3-none-any.whl → 0.1.26__py3-none-any.whl - Mend

notionary 0.1.24py3-none-any.whl → 0.1.26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

notionary/elements/audio_element.py +41 -38
notionary/elements/bookmark_element.py +36 -27
notionary/elements/bulleted_list_element.py +28 -21
notionary/elements/callout_element.py +39 -31
notionary/elements/code_block_element.py +38 -26
notionary/elements/divider_element.py +29 -18
notionary/elements/embed_element.py +37 -28
notionary/elements/heading_element.py +39 -24
notionary/elements/image_element.py +33 -24
notionary/elements/mention_element.py +40 -29
notionary/elements/notion_block_element.py +13 -31
notionary/elements/numbered_list_element.py +29 -20
notionary/elements/paragraph_element.py +37 -31
notionary/elements/prompts/element_prompt_content.py +91 -8
notionary/elements/prompts/synthax_prompt_builder.py +64 -17
notionary/elements/qoute_element.py +72 -74
notionary/elements/registry/block_element_registry.py +1 -1
notionary/elements/registry/block_element_registry_builder.py +6 -9
notionary/elements/table_element.py +49 -36
notionary/elements/text_inline_formatter.py +23 -15
notionary/elements/{todo_lists.py → todo_element.py} +34 -25
notionary/elements/toggle_element.py +184 -108
notionary/elements/toggleable_heading_element.py +269 -0
notionary/elements/video_element.py +37 -28
notionary/page/content/page_content_manager.py +5 -8
notionary/page/markdown_to_notion_converter.py +269 -274
notionary/page/notion_page.py +1 -1
notionary/page/notion_to_markdown_converter.py +20 -95
{notionary-0.1.24.dist-info → notionary-0.1.26.dist-info}/METADATA +1 -1
notionary-0.1.26.dist-info/RECORD +58 -0
{notionary-0.1.24.dist-info → notionary-0.1.26.dist-info}/WHEEL +1 -1
notionary/elements/column_element.py +0 -307
notionary-0.1.24.dist-info/RECORD +0 -58
{notionary-0.1.24.dist-info → notionary-0.1.26.dist-info}/licenses/LICENSE +0 -0
{notionary-0.1.24.dist-info → notionary-0.1.26.dist-info}/top_level.txt +0 -0

notionary/page/markdown_to_notion_converter.py CHANGED Viewed

@@ -7,180 +7,157 @@ from notionary.elements.registry.block_element_registry_builder import (
 class MarkdownToNotionConverter:
+    """Converts Markdown text to Notion API block format with support for pipe syntax for nested structures."""
     SPACER_MARKER = "<!-- spacer -->"
-    MULTILINE_CONTENT_MARKER = "<!-- REMOVED_MULTILINE_CONTENT -->"
-    TOGGLE_MARKER = "<!-- toggle_content -->"
-    TOGGLE_MARKER_PREFIX = "<!-- toggle_"
-    TOGGLE_MARKER_SUFFIX = " -->"
+    TOGGLE_ELEMENT_TYPES = ["ToggleElement", "ToggleableHeadingElement"]
+    PIPE_CONTENT_PATTERN = r"^\|\s?(.*)$"
     def __init__(self, block_registry: Optional[BlockElementRegistry] = None):
-        """
-        Initialize the MarkdownToNotionConverter.
-        Args:
-            block_registry: Optional registry of Notion block elements
-        """
+        """Initialize the converter with an optional custom block registry."""
         self._block_registry = (
             block_registry or BlockElementRegistryBuilder().create_full_registry()
         )
-        self._setup_element_callbacks()
-    def _setup_element_callbacks(self) -> None:
-        """Registriert den Converter als Callback für Elemente, die ihn benötigen."""
+    def convert(self, markdown_text: str) -> List[Dict[str, Any]]:
+        """Convert markdown text to Notion API block format."""
+        if not markdown_text:
+            return []
-        for element in self._block_registry.get_elements():
-            if hasattr(element, "set_converter_callback"):
-                element.set_converter_callback(self.convert)
+        # Collect all blocks with their positions in the text
+        all_blocks_with_positions = self._collect_all_blocks_with_positions(
+            markdown_text
+        )
-    def convert(self, markdown_text: str) -> List[Dict[str, Any]]:
-        """
-        Convert markdown text to Notion API block format.
+        # Sort all blocks by their position in the text
+        all_blocks_with_positions.sort(key=lambda x: x[0])
-        Args:
-            markdown_text: The markdown text to convert
+        # Extract just the blocks without position information
+        blocks = [block for _, _, block in all_blocks_with_positions]
-        Returns:
-            List of Notion blocks
-        """
-        if not markdown_text:
-            return []
+        # Process spacing between blocks
+        return self._process_block_spacing(blocks)
-        # We'll process all blocks in order, preserving their original positions
+    def _collect_all_blocks_with_positions(
+        self, markdown_text: str
+    ) -> List[Tuple[int, int, Dict[str, Any]]]:
+        """Collect all blocks with their positions in the text."""
         all_blocks = []
-        # First, identify all toggle blocks
-        toggle_blocks = self._identify_toggle_blocks(markdown_text)
-        # If we have toggles, process them and extract positions
-        if toggle_blocks:
-            all_blocks.extend(toggle_blocks)
+        # Process toggleable elements first (both Toggle and ToggleableHeading)
+        toggleable_blocks = self._identify_toggleable_blocks(markdown_text)
         # Process other multiline elements
-        multiline_blocks = self._identify_multiline_blocks(markdown_text, toggle_blocks)
-        if multiline_blocks:
-            all_blocks.extend(multiline_blocks)
-        # Process remaining text line by line
-        line_blocks = self._process_text_lines(
-            markdown_text, toggle_blocks + multiline_blocks
+        multiline_blocks = self._identify_multiline_blocks(
+            markdown_text, toggleable_blocks
         )
-        if line_blocks:
-            all_blocks.extend(line_blocks)
-        # Sort all blocks by their position in the text
-        all_blocks.sort(key=lambda x: x[0])
+        # Process remaining text line by line
+        processed_blocks = toggleable_blocks + multiline_blocks
+        line_blocks = self._process_text_lines(markdown_text, processed_blocks)
-        # Extract just the blocks without position information
-        blocks = [block for _, _, block in all_blocks]
+        # Combine all blocks
+        all_blocks.extend(toggleable_blocks)
+        all_blocks.extend(multiline_blocks)
+        all_blocks.extend(line_blocks)
-        # Process spacing between blocks
-        return self._process_block_spacing(blocks)
+        return all_blocks
-    def _identify_toggle_blocks(
+    def _identify_toggleable_blocks(
         self, text: str
     ) -> List[Tuple[int, int, Dict[str, Any]]]:
-        """
-        Identify all toggle blocks in the text without replacing them.
+        """Identify all toggleable blocks (Toggle and ToggleableHeading) in the text."""
+        toggleable_blocks = []
+        # Find all toggleable elements
+        toggleable_elements = self._get_toggleable_elements()
-        Args:
-            text: The text to process
+        if not toggleable_elements:
+            return []
+        # Process each toggleable element type
+        for element in toggleable_elements:
+            if hasattr(element, "find_matches"):
+                # Find matches with context awareness
+                matches = element.find_matches(text, self.convert, context_aware=True)
+                if matches:
+                    toggleable_blocks.extend(matches)
-        Returns:
-            List of (start_pos, end_pos, block) tuples
-        """
-        # Find toggle element in registry
-        toggle_element = None
+        return toggleable_blocks
+    def _get_toggleable_elements(self):
+        """Return all toggleable elements from the registry."""
+        toggleable_elements = []
         for element in self._block_registry.get_elements():
             if (
                 element.is_multiline()
                 and hasattr(element, "match_markdown")
-                and element.__name__ == "ToggleElement"
+                and element.__name__ in self.TOGGLE_ELEMENT_TYPES
             ):
-                toggle_element = element
-                break
-        if not toggle_element:
-            return []
-        # Use the find_matches method with context awareness
-        # Pass the converter's convert method as a callback to process nested content
-        toggle_blocks = toggle_element.find_matches(
-            text, self.convert, context_aware=True
-        )
-        return toggle_blocks
+                toggleable_elements.append(element)
+        return toggleable_elements
     def _identify_multiline_blocks(
         self, text: str, exclude_blocks: List[Tuple[int, int, Dict[str, Any]]]
     ) -> List[Tuple[int, int, Dict[str, Any]]]:
-        """
-        Identify all multiline blocks (except toggle blocks) without altering the text.
-        Args:
-            text: The text to process
-            exclude_blocks: Blocks to exclude (e.g., already identified toggle blocks)
-        Returns:
-            List of (start_pos, end_pos, block) tuples
-        """
-        # Get all multiline elements except ToggleElement
-        multiline_elements = [
-            element
-            for element in self._block_registry.get_multiline_elements()
-            if element.__name__ != "ToggleElement"
-        ]
+        """Identify all multiline blocks (except toggleable blocks)."""
+        # Get all multiline elements except toggleable ones
+        multiline_elements = self._get_non_toggleable_multiline_elements()
         if not multiline_elements:
             return []
-        # Create a set of ranges to exclude
-        exclude_ranges = set()
-        for start, end, _ in exclude_blocks:
-            exclude_ranges.update(range(start, end + 1))
+        # Create set of positions to exclude
+        excluded_ranges = self._create_excluded_position_set(exclude_blocks)
         multiline_blocks = []
         for element in multiline_elements:
             if not hasattr(element, "find_matches"):
                 continue
-            # Find all matches for this element
-            if hasattr(element, "set_converter_callback"):
-                matches = element.find_matches(text, self.convert)
-            else:
-                matches = element.find_matches(text)
+            matches = element.find_matches(text)
             if not matches:
                 continue
-            # Add only blocks that don't overlap with excluded ranges
-            for start, end, block in matches:
-                # Check if this block overlaps with any excluded range
-                if any(start <= i <= end for i in exclude_ranges):
+            # Add blocks that don't overlap with excluded positions
+            for start_pos, end_pos, block in matches:
+                if self._overlaps_with_excluded_positions(
+                    start_pos, end_pos, excluded_ranges
+                ):
                     continue
-                multiline_blocks.append((start, end, block))
+                multiline_blocks.append((start_pos, end_pos, block))
         return multiline_blocks
+    def _get_non_toggleable_multiline_elements(self):
+        """Get multiline elements that are not toggleable elements."""
+        return [
+            element
+            for element in self._block_registry.get_multiline_elements()
+            if element.__name__ not in self.TOGGLE_ELEMENT_TYPES
+        ]
+    def _create_excluded_position_set(self, exclude_blocks):
+        """Create a set of positions to exclude based on block ranges."""
+        excluded_positions = set()
+        for start_pos, end_pos, _ in exclude_blocks:
+            excluded_positions.update(range(start_pos, end_pos + 1))
+        return excluded_positions
+    def _overlaps_with_excluded_positions(self, start_pos, end_pos, excluded_positions):
+        """Check if a range overlaps with any excluded positions."""
+        return any(pos in excluded_positions for pos in range(start_pos, end_pos + 1))
     def _process_text_lines(
         self, text: str, exclude_blocks: List[Tuple[int, int, Dict[str, Any]]]
     ) -> List[Tuple[int, int, Dict[str, Any]]]:
-        """
-        Process text line by line, excluding ranges already processed.
-        Args:
-            text: The text to process
-            exclude_blocks: Blocks to exclude (e.g., already identified toggle and multiline blocks)
-        Returns:
-            List of (start_pos, end_pos, block) tuples
-        """
+        """Process text line by line, excluding already processed ranges and handling pipe syntax lines."""
         if not text:
             return []
-        # Create a set of excluded positions
-        exclude_positions = set()
-        for start, end, _ in exclude_blocks:
-            exclude_positions.update(range(start, end + 1))
+        # Create set of excluded positions
+        excluded_positions = self._create_excluded_position_set(exclude_blocks)
         line_blocks = []
         lines = text.split("\n")
@@ -194,210 +171,245 @@ class MarkdownToNotionConverter:
             line_length = len(line) + 1  # +1 for newline
             line_end = current_pos + line_length - 1
-            # Skip lines that are part of excluded blocks
-            if any(current_pos <= pos <= line_end for pos in exclude_positions):
+            # Skip excluded lines and pipe syntax lines (they're part of toggleable content)
+            if self._overlaps_with_excluded_positions(
+                current_pos, line_end, excluded_positions
+            ) or self._is_pipe_syntax_line(line):
                 current_pos += line_length
                 continue
-            # Check for spacer marker
-            if self._is_spacer_marker(line):
-                line_blocks.append(
-                    (
-                        current_pos,
-                        current_pos + line_length - 1,
-                        self._create_empty_paragraph(),
-                    )
-                )
-                current_pos += line_length
-                continue
+            processed = self._process_line(
+                line,
+                current_pos,
+                line_end,
+                line_blocks,
+                current_paragraph,
+                paragraph_start,
+                in_todo_sequence,
+            )
-            # Process todos first to keep them grouped
-            todo_block = self._extract_todo_item(line)
-            if todo_block:
-                self._handle_todo_item(
-                    todo_block,
-                    line_length,
-                    current_pos,
-                    current_paragraph,
-                    paragraph_start,
-                    line_blocks,
-                    in_todo_sequence,
-                )
-                in_todo_sequence = True
-                current_pos += line_length
-                continue
+            current_pos = processed["current_pos"]
+            current_paragraph = processed["current_paragraph"]
+            paragraph_start = processed["paragraph_start"]
+            in_todo_sequence = processed["in_todo_sequence"]
-            if in_todo_sequence:
-                in_todo_sequence = False
+        # Process remaining paragraph
+        self._process_paragraph(
+            current_paragraph, paragraph_start, current_pos, line_blocks
+        )
-            if not line.strip():
-                self._process_paragraph_if_present(
-                    current_paragraph, paragraph_start, current_pos, line_blocks
-                )
-                current_paragraph = []
-                current_pos += line_length
-                continue
+        return line_blocks
-            special_block = self._extract_special_block(line)
-            if special_block:
-                self._process_paragraph_if_present(
-                    current_paragraph, paragraph_start, current_pos, line_blocks
-                )
-                line_blocks.append(
-                    (current_pos, current_pos + line_length - 1, special_block)
-                )
-                current_paragraph = []
-                current_pos += line_length
-                continue
+    def _is_pipe_syntax_line(self, line: str) -> bool:
+        """Check if a line uses pipe syntax (for nested content)."""
+        import re
-            # Handle as part of paragraph
-            if not current_paragraph:
-                paragraph_start = current_pos
-            current_paragraph.append(line)
-            current_pos += line_length
+        return bool(re.match(self.PIPE_CONTENT_PATTERN, line))
-        # Process any remaining paragraph content
-        self._process_paragraph_if_present(
-            current_paragraph, paragraph_start, current_pos, line_blocks
-        )
+    def _process_line(
+        self,
+        line: str,
+        current_pos: int,
+        line_end: int,
+        line_blocks: List[Tuple[int, int, Dict[str, Any]]],
+        current_paragraph: List[str],
+        paragraph_start: int,
+        in_todo_sequence: bool,
+    ) -> Dict[str, Any]:
+        """Process a single line of text."""
+        line_length = len(line) + 1  # +1 for newline
+        # Check for spacer
+        if self._is_spacer_line(line):
+            line_blocks.append((current_pos, line_end, self._create_empty_paragraph()))
+            return self._update_line_state(
+                current_pos + line_length,
+                current_paragraph,
+                paragraph_start,
+                in_todo_sequence,
+            )
-        return line_blocks
+        # Handle todo items
+        todo_block = self._extract_todo_item(line)
+        if todo_block:
+            return self._process_todo_line(
+                todo_block,
+                current_pos,
+                line_end,
+                line_blocks,
+                current_paragraph,
+                paragraph_start,
+                in_todo_sequence,
+                line_length,
+            )
-    def _is_spacer_marker(self, line: str) -> bool:
-        """Check if a line is a spacer marker."""
-        return line.strip() == self.SPACER_MARKER
+        if in_todo_sequence:
+            in_todo_sequence = False
-    def _extract_todo_item(self, line: str) -> Optional[Dict[str, Any]]:
-        """
-        Try to extract a todo item from a line.
+        # Handle empty lines
+        if not line.strip():
+            self._process_paragraph(
+                current_paragraph, paragraph_start, current_pos, line_blocks
+            )
+            return self._update_line_state(
+                current_pos + line_length, [], paragraph_start, False
+            )
-        Returns:
-            Todo block if line is a todo item, None otherwise
-        """
-        for element in self._block_registry.get_elements():
-            if (
-                not element.is_multiline()
-                and hasattr(element, "match_markdown")
-                and element.__name__ == "TodoElement"
-                and element.match_markdown(line)
-            ):
-                return element.markdown_to_notion(line)
-        return None
+        # Handle special blocks
+        special_block = self._extract_special_block(line)
+        if special_block:
+            self._process_paragraph(
+                current_paragraph, paragraph_start, current_pos, line_blocks
+            )
+            line_blocks.append((current_pos, line_end, special_block))
+            return self._update_line_state(
+                current_pos + line_length, [], paragraph_start, False
+            )
+        # Handle as paragraph
+        if not current_paragraph:
+            paragraph_start = current_pos
+        current_paragraph.append(line)
-    def _handle_todo_item(
+        return self._update_line_state(
+            current_pos + line_length,
+            current_paragraph,
+            paragraph_start,
+            in_todo_sequence,
+        )
+    def _is_spacer_line(self, line: str) -> bool:
+        """Check if a line is a spacer marker."""
+        return line.strip() == self.SPACER_MARKER
+    def _process_todo_line(
         self,
         todo_block: Dict[str, Any],
-        line_length: int,
         current_pos: int,
+        line_end: int,
+        line_blocks: List[Tuple[int, int, Dict[str, Any]]],
         current_paragraph: List[str],
         paragraph_start: int,
-        line_blocks: List[Tuple[int, int, Dict[str, Any]]],
         in_todo_sequence: bool,
-    ) -> None:
-        """Handle a todo item line."""
-        # If we were building a paragraph, finish it before starting todos
+        line_length: int,
+    ) -> Dict[str, Any]:
+        """Process a line that contains a todo item."""
+        # Finish paragraph if needed
         if not in_todo_sequence and current_paragraph:
-            self._process_paragraph_if_present(
+            self._process_paragraph(
                 current_paragraph, paragraph_start, current_pos, line_blocks
             )
-            current_paragraph.clear()
-        line_blocks.append((current_pos, current_pos + line_length - 1, todo_block))
+        line_blocks.append((current_pos, line_end, todo_block))
+        return self._update_line_state(
+            current_pos + line_length, [], paragraph_start, True
+        )
+    def _update_line_state(
+        self,
+        current_pos: int,
+        current_paragraph: List[str],
+        paragraph_start: int,
+        in_todo_sequence: bool,
+    ) -> Dict[str, Any]:
+        """Update and return the state after processing a line."""
+        return {
+            "current_pos": current_pos,
+            "current_paragraph": current_paragraph,
+            "paragraph_start": paragraph_start,
+            "in_todo_sequence": in_todo_sequence,
+        }
+    def _extract_todo_item(self, line: str) -> Optional[Dict[str, Any]]:
+        """Extract a todo item from a line if possible."""
+        todo_elements = [
+            element
+            for element in self._block_registry.get_elements()
+            if not element.is_multiline() and element.__name__ == "TodoElement"
+        ]
+        for element in todo_elements:
+            if element.match_markdown(line):
+                return element.markdown_to_notion(line)
+        return None
     def _extract_special_block(self, line: str) -> Optional[Dict[str, Any]]:
-        """
-        Try to extract a special block (not paragraph) from a line.
+        """Extract a special block (not paragraph) from a line if possible."""
+        non_multiline_elements = [
+            element
+            for element in self._block_registry.get_elements()
+            if not element.is_multiline()
+        ]
-        Returns:
-            Block if line is a special block, None otherwise
-        """
-        for element in self._block_registry.get_elements():
-            if (
-                not element.is_multiline()
-                and hasattr(element, "match_markdown")
-                and element.match_markdown(line)
-            ):
+        for element in non_multiline_elements:
+            if element.match_markdown(line):
                 block = element.markdown_to_notion(line)
                 if block and block.get("type") != "paragraph":
                     return block
         return None
-    def _process_paragraph_if_present(
+    def _process_paragraph(
         self,
         paragraph_lines: List[str],
         start_pos: int,
         end_pos: int,
         blocks: List[Tuple[int, int, Dict[str, Any]]],
     ) -> None:
-        """
-        Process a paragraph and add it to the blocks list if valid.
-        Args:
-            paragraph_lines: Lines that make up the paragraph
-            start_pos: Starting position of the paragraph
-            end_pos: Ending position of the paragraph
-            blocks: List to add the processed paragraph block to
-        """
+        """Process a paragraph and add it to blocks if valid."""
         if not paragraph_lines:
             return
         paragraph_text = "\n".join(paragraph_lines)
         block = self._block_registry.markdown_to_notion(paragraph_text)
-        if not block:
-            return
-        blocks.append((start_pos, end_pos, block))
+        if block:
+            blocks.append((start_pos, end_pos, block))
     def _process_block_spacing(
         self, blocks: List[Dict[str, Any]]
     ) -> List[Dict[str, Any]]:
-        """
-        Process blocks and add spacing only where no explicit spacer is present.
-        Args:
-            blocks: List of Notion blocks
-        Returns:
-            List of Notion blocks with processed spacing
-        """
+        """Add spacing between blocks where needed."""
         if not blocks:
             return blocks
         final_blocks = []
-        i = 0
-        while i < len(blocks):
-            current_block = blocks[i]
+        for block_index, current_block in enumerate(blocks):
             final_blocks.append(current_block)
-            # Check if this is a multiline element that needs spacing
+            # Only add spacing after multiline blocks
             if not self._is_multiline_block_type(current_block.get("type")):
-                i += 1
                 continue
-            # Check if the next block is already a spacer
-            if i + 1 < len(blocks) and self._is_empty_paragraph(blocks[i + 1]):
-                # Next block is already a spacer, don't add another
-                pass
-            else:
-                # No explicit spacer found, add one automatically
+            # Check if we need to add a spacer
+            if self._needs_spacer_after_block(blocks, block_index):
                 final_blocks.append(self._create_empty_paragraph())
-            i += 1
         return final_blocks
-    def _is_multiline_block_type(self, block_type: str) -> bool:
-        """
-        Check if a block type corresponds to a multiline element.
+    def _needs_spacer_after_block(
+        self, blocks: List[Dict[str, Any]], block_index: int
+    ) -> bool:
+        """Determine if we need to add a spacer after the current block."""
+        # Check if this is the last block (no need for spacer)
+        if block_index + 1 >= len(blocks):
+            return False
+        # Check if next block is already a spacer
+        next_block = blocks[block_index + 1]
+        if self._is_empty_paragraph(next_block):
+            return False
-        Args:
-            block_type: The type of block to check
+        # No spacer needed
+        return True
-        Returns:
-            True if the block type is a multiline element, False otherwise
-        """
+    def _create_empty_paragraph(self):
+        """Create an empty paragraph block."""
+        return {"type": "paragraph", "paragraph": {"rich_text": []}}
+    def _is_multiline_block_type(self, block_type: str) -> bool:
+        """Check if a block type corresponds to a multiline element."""
         if not block_type:
             return False
@@ -416,26 +428,9 @@ class MarkdownToNotionConverter:
         return False
     def _is_empty_paragraph(self, block: Dict[str, Any]) -> bool:
-        """
-        Check if a block is an empty paragraph.
-        Args:
-            block: The block to check
-        Returns:
-            True if it's an empty paragraph, False otherwise
-        """
+        """Check if a block is an empty paragraph."""
         if block.get("type") != "paragraph":
             return False
         rich_text = block.get("paragraph", {}).get("rich_text", [])
         return not rich_text or len(rich_text) == 0
-    def _create_empty_paragraph(self) -> Dict[str, Any]:
-        """
-        Create an empty paragraph block.
-        Returns:
-            Empty paragraph block dictionary
-        """
-        return {"type": "paragraph", "paragraph": {"rich_text": []}}

notionary 0.1.24__py3-none-any.whl → 0.1.26__py3-none-any.whl

notionary 0.1.24py3-none-any.whl → 0.1.26py3-none-any.whl