notionary 0.2.18__py3-none-any.whl → 0.2.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- notionary/__init__.py +8 -4
- notionary/base_notion_client.py +3 -1
- notionary/blocks/__init__.py +2 -91
- notionary/blocks/_bootstrap.py +263 -0
- notionary/blocks/audio/__init__.py +8 -2
- notionary/blocks/audio/audio_element.py +42 -104
- notionary/blocks/audio/audio_markdown_node.py +3 -1
- notionary/blocks/audio/audio_models.py +6 -55
- notionary/blocks/base_block_element.py +30 -0
- notionary/blocks/bookmark/__init__.py +9 -2
- notionary/blocks/bookmark/bookmark_element.py +46 -139
- notionary/blocks/bookmark/bookmark_markdown_node.py +3 -1
- notionary/blocks/bookmark/bookmark_models.py +15 -0
- notionary/blocks/breadcrumbs/__init__.py +17 -0
- notionary/blocks/breadcrumbs/breadcrumb_element.py +39 -0
- notionary/blocks/breadcrumbs/breadcrumb_markdown_node.py +32 -0
- notionary/blocks/breadcrumbs/breadcrumb_models.py +12 -0
- notionary/blocks/bulleted_list/__init__.py +12 -2
- notionary/blocks/bulleted_list/bulleted_list_element.py +40 -55
- notionary/blocks/bulleted_list/bulleted_list_markdown_node.py +2 -1
- notionary/blocks/bulleted_list/bulleted_list_models.py +18 -0
- notionary/blocks/callout/__init__.py +9 -2
- notionary/blocks/callout/callout_element.py +40 -89
- notionary/blocks/callout/callout_markdown_node.py +3 -1
- notionary/blocks/callout/callout_models.py +33 -0
- notionary/blocks/child_database/__init__.py +7 -0
- notionary/blocks/child_database/child_database_models.py +19 -0
- notionary/blocks/child_page/__init__.py +9 -0
- notionary/blocks/child_page/child_page_models.py +12 -0
- notionary/blocks/{shared/block_client.py → client.py} +55 -54
- notionary/blocks/code/__init__.py +6 -2
- notionary/blocks/code/code_element.py +53 -187
- notionary/blocks/code/code_markdown_node.py +13 -13
- notionary/blocks/code/code_models.py +94 -0
- notionary/blocks/column/__init__.py +25 -1
- notionary/blocks/column/column_element.py +40 -314
- notionary/blocks/column/column_list_element.py +37 -0
- notionary/blocks/column/column_list_markdown_node.py +50 -0
- notionary/blocks/column/column_markdown_node.py +59 -0
- notionary/blocks/column/column_models.py +26 -0
- notionary/blocks/divider/__init__.py +9 -2
- notionary/blocks/divider/divider_element.py +26 -49
- notionary/blocks/divider/divider_markdown_node.py +2 -1
- notionary/blocks/divider/divider_models.py +12 -0
- notionary/blocks/embed/__init__.py +9 -2
- notionary/blocks/embed/embed_element.py +47 -114
- notionary/blocks/embed/embed_markdown_node.py +3 -1
- notionary/blocks/embed/embed_models.py +14 -0
- notionary/blocks/equation/__init__.py +14 -0
- notionary/blocks/equation/equation_element.py +80 -0
- notionary/blocks/equation/equation_element_markdown_node.py +36 -0
- notionary/blocks/equation/equation_models.py +11 -0
- notionary/blocks/file/__init__.py +25 -0
- notionary/blocks/file/file_element.py +93 -0
- notionary/blocks/file/file_element_markdown_node.py +35 -0
- notionary/blocks/file/file_element_models.py +39 -0
- notionary/blocks/heading/__init__.py +16 -2
- notionary/blocks/heading/heading_element.py +67 -72
- notionary/blocks/heading/heading_markdown_node.py +2 -1
- notionary/blocks/heading/heading_models.py +29 -0
- notionary/blocks/image_block/__init__.py +13 -0
- notionary/blocks/image_block/image_element.py +84 -0
- notionary/blocks/{image → image_block}/image_markdown_node.py +3 -1
- notionary/blocks/image_block/image_models.py +10 -0
- notionary/blocks/models.py +172 -0
- notionary/blocks/numbered_list/__init__.py +12 -2
- notionary/blocks/numbered_list/numbered_list_element.py +33 -58
- notionary/blocks/numbered_list/numbered_list_markdown_node.py +3 -1
- notionary/blocks/numbered_list/numbered_list_models.py +17 -0
- notionary/blocks/paragraph/__init__.py +12 -2
- notionary/blocks/paragraph/paragraph_element.py +27 -69
- notionary/blocks/paragraph/paragraph_markdown_node.py +2 -1
- notionary/blocks/paragraph/paragraph_models.py +16 -0
- notionary/blocks/pdf/__init__.py +13 -0
- notionary/blocks/pdf/pdf_element.py +91 -0
- notionary/blocks/pdf/pdf_markdown_node.py +35 -0
- notionary/blocks/pdf/pdf_models.py +11 -0
- notionary/blocks/quote/__init__.py +11 -2
- notionary/blocks/quote/quote_element.py +31 -65
- notionary/blocks/quote/quote_markdown_node.py +4 -1
- notionary/blocks/quote/quote_models.py +18 -0
- notionary/blocks/registry/__init__.py +4 -0
- notionary/blocks/registry/block_registry.py +75 -91
- notionary/blocks/registry/block_registry_builder.py +107 -59
- notionary/blocks/rich_text/__init__.py +33 -0
- notionary/blocks/rich_text/rich_text_models.py +188 -0
- notionary/blocks/rich_text/text_inline_formatter.py +125 -0
- notionary/blocks/table/__init__.py +16 -2
- notionary/blocks/table/table_element.py +48 -241
- notionary/blocks/table/table_markdown_node.py +2 -1
- notionary/blocks/table/table_models.py +28 -0
- notionary/blocks/table_of_contents/__init__.py +19 -0
- notionary/blocks/table_of_contents/table_of_contents_element.py +51 -0
- notionary/blocks/table_of_contents/table_of_contents_markdown_node.py +35 -0
- notionary/blocks/table_of_contents/table_of_contents_models.py +18 -0
- notionary/blocks/todo/__init__.py +9 -2
- notionary/blocks/todo/todo_element.py +38 -95
- notionary/blocks/todo/todo_markdown_node.py +2 -1
- notionary/blocks/todo/todo_models.py +19 -0
- notionary/blocks/toggle/__init__.py +13 -3
- notionary/blocks/toggle/toggle_element.py +57 -264
- notionary/blocks/toggle/toggle_markdown_node.py +24 -14
- notionary/blocks/toggle/toggle_models.py +17 -0
- notionary/blocks/toggleable_heading/__init__.py +6 -2
- notionary/blocks/toggleable_heading/toggleable_heading_element.py +74 -244
- notionary/blocks/toggleable_heading/toggleable_heading_markdown_node.py +26 -18
- notionary/blocks/types.py +61 -0
- notionary/blocks/video/__init__.py +8 -2
- notionary/blocks/video/video_element.py +67 -143
- notionary/blocks/video/video_element_models.py +10 -0
- notionary/blocks/video/video_markdown_node.py +3 -1
- notionary/database/client.py +3 -8
- notionary/database/database.py +13 -14
- notionary/database/database_filter_builder.py +2 -2
- notionary/database/database_provider.py +5 -4
- notionary/database/models.py +337 -0
- notionary/database/notion_database.py +6 -7
- notionary/file_upload/client.py +5 -7
- notionary/file_upload/models.py +2 -1
- notionary/file_upload/notion_file_upload.py +2 -3
- notionary/markdown/markdown_builder.py +722 -0
- notionary/markdown/markdown_document_model.py +228 -0
- notionary/{blocks → markdown}/markdown_node.py +1 -0
- notionary/models/notion_database_response.py +0 -338
- notionary/page/client.py +9 -10
- notionary/page/models.py +327 -0
- notionary/page/notion_page.py +99 -52
- notionary/page/notion_text_length_utils.py +119 -0
- notionary/page/{content/page_content_writer.py → page_content_writer.py} +88 -38
- notionary/page/reader/handler/__init__.py +17 -0
- notionary/page/reader/handler/base_block_renderer.py +44 -0
- notionary/page/reader/handler/block_processing_context.py +35 -0
- notionary/page/reader/handler/block_rendering_context.py +43 -0
- notionary/page/reader/handler/column_list_renderer.py +51 -0
- notionary/page/reader/handler/column_renderer.py +60 -0
- notionary/page/reader/handler/line_renderer.py +60 -0
- notionary/page/reader/handler/toggle_renderer.py +69 -0
- notionary/page/reader/handler/toggleable_heading_renderer.py +89 -0
- notionary/page/reader/page_content_retriever.py +69 -0
- notionary/page/search_filter_builder.py +2 -1
- notionary/page/writer/handler/__init__.py +22 -0
- notionary/page/writer/handler/code_handler.py +100 -0
- notionary/page/writer/handler/column_handler.py +141 -0
- notionary/page/writer/handler/column_list_handler.py +139 -0
- notionary/page/writer/handler/line_handler.py +35 -0
- notionary/page/writer/handler/line_processing_context.py +54 -0
- notionary/page/writer/handler/regular_line_handler.py +92 -0
- notionary/page/writer/handler/table_handler.py +130 -0
- notionary/page/writer/handler/toggle_handler.py +153 -0
- notionary/page/writer/handler/toggleable_heading_handler.py +167 -0
- notionary/page/writer/markdown_to_notion_converter.py +76 -0
- notionary/telemetry/__init__.py +2 -2
- notionary/telemetry/service.py +4 -3
- notionary/user/__init__.py +2 -2
- notionary/user/base_notion_user.py +2 -1
- notionary/user/client.py +2 -3
- notionary/user/models.py +1 -0
- notionary/user/notion_bot_user.py +4 -5
- notionary/user/notion_user.py +3 -4
- notionary/user/notion_user_manager.py +3 -2
- notionary/user/notion_user_provider.py +1 -1
- notionary/util/__init__.py +3 -2
- notionary/util/fuzzy.py +2 -1
- notionary/util/logging_mixin.py +2 -2
- notionary/util/singleton_metaclass.py +1 -1
- notionary/workspace.py +3 -2
- {notionary-0.2.18.dist-info → notionary-0.2.21.dist-info}/METADATA +12 -8
- notionary-0.2.21.dist-info/RECORD +185 -0
- notionary/blocks/document/__init__.py +0 -7
- notionary/blocks/document/document_element.py +0 -102
- notionary/blocks/document/document_markdown_node.py +0 -31
- notionary/blocks/image/__init__.py +0 -7
- notionary/blocks/image/image_element.py +0 -151
- notionary/blocks/markdown_builder.py +0 -356
- notionary/blocks/mention/__init__.py +0 -7
- notionary/blocks/mention/mention_element.py +0 -229
- notionary/blocks/mention/mention_markdown_node.py +0 -38
- notionary/blocks/prompts/element_prompt_builder.py +0 -83
- notionary/blocks/prompts/element_prompt_content.py +0 -41
- notionary/blocks/shared/__init__.py +0 -0
- notionary/blocks/shared/models.py +0 -710
- notionary/blocks/shared/notion_block_element.py +0 -37
- notionary/blocks/shared/text_inline_formatter.py +0 -262
- notionary/blocks/shared/text_inline_formatter_new.py +0 -139
- notionary/blocks/toggleable_heading/toggleable_heading_models.py +0 -0
- notionary/database/models/page_result.py +0 -10
- notionary/models/notion_block_response.py +0 -264
- notionary/models/notion_page_response.py +0 -78
- notionary/models/search_response.py +0 -0
- notionary/page/__init__.py +0 -0
- notionary/page/content/notion_text_length_utils.py +0 -87
- notionary/page/content/page_content_retriever.py +0 -52
- notionary/page/formatting/line_processor.py +0 -153
- notionary/page/formatting/markdown_to_notion_converter.py +0 -153
- notionary/page/markdown_syntax_prompt_generator.py +0 -114
- notionary/page/notion_to_markdown_converter.py +0 -179
- notionary/page/properites/property_value_extractor.py +0 -0
- notionary-0.2.18.dist-info/RECORD +0 -149
- /notionary/{blocks/document/document_models.py → markdown/___init__.py} +0 -0
- /notionary/{blocks/image/image_models.py → markdown/makdown_document_model.py} +0 -0
- /notionary/page/{content/markdown_whitespace_processor.py → markdown_whitespace_processor.py} +0 -0
- /notionary/{blocks/mention/mention_models.py → page/reader/handler/context.py} +0 -0
- {notionary-0.2.18.dist-info → notionary-0.2.21.dist-info}/LICENSE +0 -0
- {notionary-0.2.18.dist-info → notionary-0.2.21.dist-info}/WHEEL +0 -0
@@ -0,0 +1,119 @@
|
|
1
|
+
"""
|
2
|
+
Utility functions for handling Notion API text length limitations.
|
3
|
+
|
4
|
+
This module provides functions to fix text content that exceeds Notion's
|
5
|
+
rich_text character limit of 2000 characters per element.
|
6
|
+
|
7
|
+
Resolves API errors like:
|
8
|
+
"validation_error - body.children[79].toggle.children[2].paragraph.rich_text[0].text.content.length
|
9
|
+
should be ≤ 2000, instead was 2162."
|
10
|
+
"""
|
11
|
+
|
12
|
+
import logging
|
13
|
+
import re
|
14
|
+
|
15
|
+
from notionary.blocks.models import BlockCreateRequest
|
16
|
+
|
17
|
+
logger = logging.getLogger(__name__)
|
18
|
+
|
19
|
+
|
20
|
+
def fix_blocks_content_length(
|
21
|
+
blocks: list[BlockCreateRequest], max_text_length: int = 1900
|
22
|
+
) -> list[BlockCreateRequest]:
|
23
|
+
"""Check each block and ensure text content doesn't exceed Notion's limit."""
|
24
|
+
fixed_blocks: list[BlockCreateRequest] = []
|
25
|
+
|
26
|
+
flattened_blocks = _flatten_blocks(blocks)
|
27
|
+
|
28
|
+
for block in flattened_blocks:
|
29
|
+
fixed_block = _fix_single_block_content(block, max_text_length)
|
30
|
+
fixed_blocks.append(fixed_block)
|
31
|
+
return fixed_blocks
|
32
|
+
|
33
|
+
|
34
|
+
def _fix_single_block_content(
|
35
|
+
block: BlockCreateRequest, max_text_length: int
|
36
|
+
) -> BlockCreateRequest:
|
37
|
+
"""Fix content length in a single block and its children recursively."""
|
38
|
+
block_copy = block.model_copy(deep=True)
|
39
|
+
_fix_block_rich_text_direct(block_copy, max_text_length)
|
40
|
+
|
41
|
+
return block_copy
|
42
|
+
|
43
|
+
|
44
|
+
def _fix_block_rich_text_direct(
|
45
|
+
block: BlockCreateRequest, max_text_length: int
|
46
|
+
) -> None:
|
47
|
+
"""Fix rich text content directly on the Pydantic object."""
|
48
|
+
block_content = _get_block_content(block)
|
49
|
+
if not block_content:
|
50
|
+
return
|
51
|
+
if hasattr(block_content, "rich_text") and block_content.rich_text:
|
52
|
+
_fix_rich_text_objects_direct(block_content.rich_text, max_text_length)
|
53
|
+
if hasattr(block_content, "children") and block_content.children:
|
54
|
+
for child in block_content.children:
|
55
|
+
_fix_block_rich_text_direct(child, max_text_length)
|
56
|
+
|
57
|
+
|
58
|
+
def _get_block_content(block: BlockCreateRequest):
|
59
|
+
"""Get the actual content object from a create block dynamically."""
|
60
|
+
# Get all attributes that don't start with underscore and aren't methods
|
61
|
+
for attr_name in dir(block):
|
62
|
+
if attr_name.startswith("_") or attr_name in [
|
63
|
+
"model_copy",
|
64
|
+
"model_dump",
|
65
|
+
"model_validate",
|
66
|
+
]:
|
67
|
+
continue
|
68
|
+
|
69
|
+
attr_value = getattr(block, attr_name, None)
|
70
|
+
|
71
|
+
# Skip None values, strings (like 'type'), and callable methods
|
72
|
+
if attr_value is None or isinstance(attr_value, str) or callable(attr_value):
|
73
|
+
continue
|
74
|
+
|
75
|
+
# If it's an object with rich_text or children, it's likely our content
|
76
|
+
if hasattr(attr_value, "rich_text") or hasattr(attr_value, "children"):
|
77
|
+
return attr_value
|
78
|
+
|
79
|
+
return None
|
80
|
+
|
81
|
+
|
82
|
+
def _fix_rich_text_objects_direct(rich_text_list: list, max_text_length: int) -> None:
|
83
|
+
"""Fix rich text objects directly without dict conversion."""
|
84
|
+
if not rich_text_list:
|
85
|
+
return
|
86
|
+
|
87
|
+
for rich_text_item in rich_text_list:
|
88
|
+
if not rich_text_item:
|
89
|
+
continue
|
90
|
+
|
91
|
+
# Check if this is a text type rich text object
|
92
|
+
if (
|
93
|
+
hasattr(rich_text_item, "text")
|
94
|
+
and rich_text_item.text
|
95
|
+
and hasattr(rich_text_item.text, "content")
|
96
|
+
):
|
97
|
+
|
98
|
+
content = rich_text_item.text.content
|
99
|
+
if content and len(content) > max_text_length:
|
100
|
+
logger.warning(
|
101
|
+
"Truncating text content from %d to %d chars",
|
102
|
+
len(content),
|
103
|
+
max_text_length,
|
104
|
+
)
|
105
|
+
# Direct assignment - no parsing needed!
|
106
|
+
rich_text_item.text.content = content[:max_text_length]
|
107
|
+
|
108
|
+
|
109
|
+
def _flatten_blocks(blocks: list) -> list[BlockCreateRequest]:
|
110
|
+
"""Flatten nested block lists."""
|
111
|
+
flattened = []
|
112
|
+
for item in blocks:
|
113
|
+
if isinstance(item, list):
|
114
|
+
# Rekursiv flatten für nested lists
|
115
|
+
flattened.extend(_flatten_blocks(item))
|
116
|
+
else:
|
117
|
+
# Normal block
|
118
|
+
flattened.append(item)
|
119
|
+
return flattened
|
@@ -1,16 +1,14 @@
|
|
1
|
-
from typing import Optional
|
2
|
-
|
3
|
-
from notionary.blocks import
|
4
|
-
from notionary.blocks.
|
5
|
-
from notionary.models
|
6
|
-
from notionary.
|
7
|
-
|
8
|
-
|
9
|
-
from notionary.page.
|
10
|
-
from notionary.page.
|
11
|
-
|
12
|
-
)
|
13
|
-
|
1
|
+
from typing import Callable, Optional, Union
|
2
|
+
|
3
|
+
from notionary.blocks.client import NotionBlockClient
|
4
|
+
from notionary.blocks.divider import DividerElement
|
5
|
+
from notionary.blocks.models import Block
|
6
|
+
from notionary.blocks.registry.block_registry import BlockRegistry
|
7
|
+
from notionary.blocks.table_of_contents import TableOfContentsElement
|
8
|
+
from notionary.markdown.markdown_builder import MarkdownBuilder
|
9
|
+
from notionary.page.markdown_whitespace_processor import MarkdownWhitespaceProcessor
|
10
|
+
from notionary.page.reader.page_content_retriever import PageContentRetriever
|
11
|
+
from notionary.page.writer.markdown_to_notion_converter import MarkdownToNotionConverter
|
14
12
|
from notionary.util import LoggingMixin
|
15
13
|
|
16
14
|
|
@@ -24,53 +22,97 @@ class PageContentWriter(LoggingMixin):
|
|
24
22
|
block_registry=block_registry
|
25
23
|
)
|
26
24
|
|
27
|
-
|
28
|
-
|
25
|
+
self._content_retriever = PageContentRetriever(block_registry=block_registry)
|
26
|
+
|
27
|
+
async def append_markdown(
|
28
|
+
self,
|
29
|
+
content: Union[str, Callable[[MarkdownBuilder], MarkdownBuilder]],
|
30
|
+
*,
|
31
|
+
append_divider: bool = True,
|
32
|
+
prepend_table_of_contents: bool = False,
|
33
|
+
) -> Optional[str]:
|
34
|
+
"""
|
35
|
+
Append markdown content to a Notion page using either text or builder callback.
|
36
|
+
|
37
|
+
Args:
|
38
|
+
content: Either raw markdown text OR a callback function that receives a MarkdownBuilder
|
39
|
+
append_divider: Whether to append a divider
|
40
|
+
prepend_table_of_contents: Whether to prepend table of contents
|
41
|
+
|
42
|
+
Returns:
|
43
|
+
str: The processed markdown content that was appended (None if failed)
|
44
|
+
"""
|
45
|
+
|
46
|
+
if isinstance(content, str):
|
47
|
+
final_markdown = content
|
48
|
+
elif callable(content):
|
49
|
+
builder = MarkdownBuilder()
|
50
|
+
content(builder)
|
51
|
+
final_markdown = builder.build()
|
52
|
+
else:
|
53
|
+
raise ValueError(
|
54
|
+
"content must be either a string or a callable that takes a MarkdownBuilder"
|
55
|
+
)
|
56
|
+
|
57
|
+
# Add optional components
|
58
|
+
if prepend_table_of_contents:
|
59
|
+
self._ensure_table_of_contents_exists_in_registry()
|
60
|
+
final_markdown = "[toc]\n\n" + final_markdown
|
61
|
+
|
29
62
|
if append_divider:
|
30
|
-
|
63
|
+
self._ensure_divider_exists_in_registry()
|
64
|
+
final_markdown = final_markdown + "\n\n---\n"
|
31
65
|
|
32
|
-
|
66
|
+
processed_markdown = self._process_markdown_whitespace(final_markdown)
|
33
67
|
|
34
68
|
try:
|
35
|
-
blocks = self._markdown_to_notion_converter.convert(
|
36
|
-
|
37
|
-
fixed_blocks = fix_blocks_content_length(blocks)
|
69
|
+
blocks = self._markdown_to_notion_converter.convert(processed_markdown)
|
38
70
|
|
39
71
|
result = await self._block_client.append_block_children(
|
40
|
-
block_id=self.page_id, children=
|
72
|
+
block_id=self.page_id, children=blocks
|
41
73
|
)
|
42
|
-
self.logger.debug("Append block children result: %r", result)
|
43
|
-
return bool(result)
|
44
|
-
except Exception as e:
|
45
|
-
import traceback
|
46
74
|
|
47
|
-
|
48
|
-
"
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
75
|
+
if result:
|
76
|
+
self.logger.debug("Successfully appended %d blocks", len(blocks))
|
77
|
+
return processed_markdown
|
78
|
+
else:
|
79
|
+
self.logger.error("Failed to append blocks")
|
80
|
+
return None
|
53
81
|
|
54
|
-
|
55
|
-
|
82
|
+
except Exception as e:
|
83
|
+
self.logger.error("Error appending markdown: %s", str(e), exc_info=True)
|
84
|
+
return None
|
85
|
+
|
86
|
+
async def clear_page_content(self) -> Optional[str]:
|
87
|
+
"""Clear all content of the page and return deleted content as markdown."""
|
56
88
|
try:
|
57
89
|
children_response = await self._block_client.get_block_children(
|
58
90
|
block_id=self.page_id
|
59
91
|
)
|
60
92
|
|
61
93
|
if not children_response or not children_response.results:
|
62
|
-
return
|
94
|
+
return None
|
63
95
|
|
96
|
+
# Use PageContentRetriever for sophisticated markdown conversion
|
97
|
+
deleted_content = self._content_retriever._convert_blocks_to_markdown(
|
98
|
+
children_response.results, indent_level=0
|
99
|
+
)
|
100
|
+
|
101
|
+
# Delete blocks
|
64
102
|
success = True
|
65
103
|
for block in children_response.results:
|
66
104
|
block_success = await self._delete_block_with_children(block)
|
67
105
|
if not block_success:
|
68
106
|
success = False
|
69
107
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
return
|
108
|
+
if not success:
|
109
|
+
self.logger.warning("Some blocks could not be deleted")
|
110
|
+
|
111
|
+
return deleted_content if deleted_content else None
|
112
|
+
|
113
|
+
except Exception:
|
114
|
+
self.logger.error("Error clearing page content", exc_info=True)
|
115
|
+
return None
|
74
116
|
|
75
117
|
async def _delete_block_with_children(self, block: Block) -> bool:
|
76
118
|
"""Delete a block and all its children recursively."""
|
@@ -149,3 +191,11 @@ class PageContentWriter(LoggingMixin):
|
|
149
191
|
|
150
192
|
processor = MarkdownWhitespaceProcessor()
|
151
193
|
return processor.process_lines(lines)
|
194
|
+
|
195
|
+
def _ensure_table_of_contents_exists_in_registry(self) -> None:
|
196
|
+
"""Ensure TableOfContents is registered in the block registry."""
|
197
|
+
self.block_registry.register(TableOfContentsElement)
|
198
|
+
|
199
|
+
def _ensure_divider_exists_in_registry(self) -> None:
|
200
|
+
"""Ensure DividerBlock is registered in the block registry."""
|
201
|
+
self.block_registry.register(DividerElement)
|
@@ -0,0 +1,17 @@
|
|
1
|
+
from .base_block_renderer import BlockHandler
|
2
|
+
from .block_rendering_context import BlockRenderingContext
|
3
|
+
from .column_list_renderer import ColumnListRenderer
|
4
|
+
from .column_renderer import ColumnRenderer
|
5
|
+
from .line_renderer import LineRenderer
|
6
|
+
from .toggle_renderer import ToggleRenderer
|
7
|
+
from .toggleable_heading_renderer import ToggleableHeadingRenderer
|
8
|
+
|
9
|
+
__all__ = [
|
10
|
+
"BlockHandler",
|
11
|
+
"BlockRenderingContext",
|
12
|
+
"ColumnListRenderer",
|
13
|
+
"ColumnRenderer",
|
14
|
+
"LineRenderer",
|
15
|
+
"ToggleRenderer",
|
16
|
+
"ToggleableHeadingRenderer",
|
17
|
+
]
|
@@ -0,0 +1,44 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from abc import ABC, abstractmethod
|
4
|
+
from typing import Optional
|
5
|
+
|
6
|
+
from notionary.page.reader.handler.block_rendering_context import BlockRenderingContext
|
7
|
+
|
8
|
+
|
9
|
+
class BlockHandler(ABC):
|
10
|
+
"""Abstract base class for block handlers."""
|
11
|
+
|
12
|
+
def __init__(self):
|
13
|
+
self._next_handler: Optional[BlockHandler] = None
|
14
|
+
|
15
|
+
def set_next(self, handler: BlockHandler) -> BlockHandler:
|
16
|
+
"""Set the next handler in the chain."""
|
17
|
+
self._next_handler = handler
|
18
|
+
return handler
|
19
|
+
|
20
|
+
def handle(self, context: BlockRenderingContext) -> None:
|
21
|
+
"""Handle the block or pass to next handler."""
|
22
|
+
if self._can_handle(context):
|
23
|
+
self._process(context)
|
24
|
+
elif self._next_handler:
|
25
|
+
self._next_handler.handle(context)
|
26
|
+
|
27
|
+
@abstractmethod
|
28
|
+
def _can_handle(self, context: BlockRenderingContext) -> bool:
|
29
|
+
"""Check if this handler can process the current block."""
|
30
|
+
pass
|
31
|
+
|
32
|
+
@abstractmethod
|
33
|
+
def _process(self, context: BlockRenderingContext) -> None:
|
34
|
+
"""Process the block and update context."""
|
35
|
+
pass
|
36
|
+
|
37
|
+
def _indent_text(self, text: str, spaces: int = 4) -> str:
|
38
|
+
"""Indent each line of text with specified number of spaces."""
|
39
|
+
if not text:
|
40
|
+
return text
|
41
|
+
|
42
|
+
indent = " " * spaces
|
43
|
+
lines = text.split("\n")
|
44
|
+
return "\n".join(f"{indent}{line}" if line.strip() else line for line in lines)
|
@@ -0,0 +1,35 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from dataclasses import dataclass
|
4
|
+
from typing import Optional
|
5
|
+
|
6
|
+
from notionary.blocks.models import Block
|
7
|
+
from notionary.blocks.registry.block_registry import BlockRegistry
|
8
|
+
|
9
|
+
|
10
|
+
@dataclass
|
11
|
+
class BlockProcessingContext:
|
12
|
+
"""Context for processing blocks during markdown conversion."""
|
13
|
+
|
14
|
+
block: Block
|
15
|
+
indent_level: int
|
16
|
+
block_registry: BlockRegistry
|
17
|
+
|
18
|
+
# Result
|
19
|
+
markdown_result: Optional[str] = None
|
20
|
+
children_result: Optional[str] = None
|
21
|
+
was_processed: bool = False
|
22
|
+
|
23
|
+
def has_children(self) -> bool:
|
24
|
+
"""Check if block has children that need processing."""
|
25
|
+
return (
|
26
|
+
self.block.has_children
|
27
|
+
and self.block.children is not None
|
28
|
+
and len(self.block.children) > 0
|
29
|
+
)
|
30
|
+
|
31
|
+
def get_children_blocks(self) -> list[Block]:
|
32
|
+
"""Get the children blocks safely."""
|
33
|
+
if self.has_children():
|
34
|
+
return self.block.children
|
35
|
+
return []
|
@@ -0,0 +1,43 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from dataclasses import dataclass
|
4
|
+
from typing import Callable, Optional
|
5
|
+
|
6
|
+
from notionary.blocks.models import Block
|
7
|
+
from notionary.blocks.registry.block_registry import BlockRegistry
|
8
|
+
|
9
|
+
|
10
|
+
@dataclass
|
11
|
+
class BlockRenderingContext:
|
12
|
+
"""Context for processing blocks during markdown conversion."""
|
13
|
+
|
14
|
+
block: Block
|
15
|
+
indent_level: int
|
16
|
+
block_registry: BlockRegistry
|
17
|
+
convert_children_callback: Optional[Callable[[list[Block], int], str]] = None
|
18
|
+
|
19
|
+
# Result
|
20
|
+
markdown_result: Optional[str] = None
|
21
|
+
children_result: Optional[str] = None
|
22
|
+
was_processed: bool = False
|
23
|
+
|
24
|
+
def has_children(self) -> bool:
|
25
|
+
"""Check if block has children that need processing."""
|
26
|
+
return (
|
27
|
+
self.block.has_children
|
28
|
+
and self.block.children is not None
|
29
|
+
and len(self.block.children) > 0
|
30
|
+
)
|
31
|
+
|
32
|
+
def get_children_blocks(self) -> list[Block]:
|
33
|
+
"""Get the children blocks safely."""
|
34
|
+
if self.has_children():
|
35
|
+
return self.block.children
|
36
|
+
return []
|
37
|
+
|
38
|
+
def convert_children_to_markdown(self, indent_level: int = 0) -> str:
|
39
|
+
"""Convert children blocks to markdown using the callback."""
|
40
|
+
if not self.has_children() or not self.convert_children_callback:
|
41
|
+
return ""
|
42
|
+
|
43
|
+
return self.convert_children_callback(self.get_children_blocks(), indent_level)
|
@@ -0,0 +1,51 @@
|
|
1
|
+
from notionary.blocks.column.column_list_element import ColumnListElement
|
2
|
+
from notionary.page.reader.handler import BlockHandler, BlockRenderingContext
|
3
|
+
|
4
|
+
|
5
|
+
class ColumnListRenderer(BlockHandler):
|
6
|
+
"""Handles column list blocks with their column children."""
|
7
|
+
|
8
|
+
def _can_handle(self, context: BlockRenderingContext) -> bool:
|
9
|
+
return ColumnListElement.match_notion(context.block)
|
10
|
+
|
11
|
+
def _process(self, context: BlockRenderingContext) -> None:
|
12
|
+
# Create column list start line
|
13
|
+
column_list_start = "::: columns"
|
14
|
+
|
15
|
+
# Apply indentation if needed
|
16
|
+
if context.indent_level > 0:
|
17
|
+
column_list_start = self._indent_text(
|
18
|
+
column_list_start, spaces=context.indent_level * 4
|
19
|
+
)
|
20
|
+
|
21
|
+
# Process children if they exist
|
22
|
+
children_markdown = ""
|
23
|
+
if context.has_children():
|
24
|
+
# Import here to avoid circular dependency
|
25
|
+
from notionary.page.reader.page_content_retriever import (
|
26
|
+
PageContentRetriever,
|
27
|
+
)
|
28
|
+
|
29
|
+
# Create a temporary retriever to process children
|
30
|
+
retriever = PageContentRetriever(context.block_registry)
|
31
|
+
children_markdown = retriever._convert_blocks_to_markdown(
|
32
|
+
context.get_children_blocks(),
|
33
|
+
indent_level=0, # No indentation for content inside column lists
|
34
|
+
)
|
35
|
+
|
36
|
+
# Create column list end line
|
37
|
+
column_list_end = ":::"
|
38
|
+
if context.indent_level > 0:
|
39
|
+
column_list_end = self._indent_text(
|
40
|
+
column_list_end, spaces=context.indent_level * 4
|
41
|
+
)
|
42
|
+
|
43
|
+
# Combine column list with children content
|
44
|
+
if children_markdown:
|
45
|
+
context.markdown_result = (
|
46
|
+
f"{column_list_start}\n{children_markdown}\n{column_list_end}"
|
47
|
+
)
|
48
|
+
else:
|
49
|
+
context.markdown_result = f"{column_list_start}\n{column_list_end}"
|
50
|
+
|
51
|
+
context.was_processed = True
|
@@ -0,0 +1,60 @@
|
|
1
|
+
from notionary.blocks.column.column_element import ColumnElement
|
2
|
+
from notionary.page.reader.handler import BlockHandler, BlockRenderingContext
|
3
|
+
|
4
|
+
|
5
|
+
class ColumnRenderer(BlockHandler):
|
6
|
+
"""Handles individual column blocks with their children content."""
|
7
|
+
|
8
|
+
def _can_handle(self, context: BlockRenderingContext) -> bool:
|
9
|
+
return ColumnElement.match_notion(context.block)
|
10
|
+
|
11
|
+
def _process(self, context: BlockRenderingContext) -> None:
|
12
|
+
# Get the column start line with potential width ratio
|
13
|
+
column_start = self._extract_column_start(context.block)
|
14
|
+
|
15
|
+
# Apply indentation if needed
|
16
|
+
if context.indent_level > 0:
|
17
|
+
column_start = self._indent_text(
|
18
|
+
column_start, spaces=context.indent_level * 4
|
19
|
+
)
|
20
|
+
|
21
|
+
# Process children if they exist
|
22
|
+
children_markdown = ""
|
23
|
+
if context.has_children():
|
24
|
+
# Import here to avoid circular dependency
|
25
|
+
from notionary.page.reader.page_content_retriever import (
|
26
|
+
PageContentRetriever,
|
27
|
+
)
|
28
|
+
|
29
|
+
# Create a temporary retriever to process children
|
30
|
+
retriever = PageContentRetriever(context.block_registry)
|
31
|
+
children_markdown = retriever._convert_blocks_to_markdown(
|
32
|
+
context.get_children_blocks(),
|
33
|
+
indent_level=0, # No indentation for content inside columns
|
34
|
+
)
|
35
|
+
|
36
|
+
# Create column end line
|
37
|
+
column_end = ":::"
|
38
|
+
if context.indent_level > 0:
|
39
|
+
column_end = self._indent_text(column_end, spaces=context.indent_level * 4)
|
40
|
+
|
41
|
+
# Combine column with children content
|
42
|
+
if children_markdown:
|
43
|
+
context.markdown_result = (
|
44
|
+
f"{column_start}\n{children_markdown}\n{column_end}"
|
45
|
+
)
|
46
|
+
else:
|
47
|
+
context.markdown_result = f"{column_start}\n{column_end}"
|
48
|
+
|
49
|
+
context.was_processed = True
|
50
|
+
|
51
|
+
def _extract_column_start(self, block) -> str:
|
52
|
+
"""Extract column start line with potential width ratio."""
|
53
|
+
if not block.column:
|
54
|
+
return "::: column"
|
55
|
+
|
56
|
+
width_ratio = block.column.width_ratio
|
57
|
+
if width_ratio:
|
58
|
+
return f"::: column {width_ratio}"
|
59
|
+
else:
|
60
|
+
return "::: column"
|
@@ -0,0 +1,60 @@
|
|
1
|
+
from notionary.page.reader.handler import BlockHandler, BlockRenderingContext
|
2
|
+
|
3
|
+
|
4
|
+
class LineRenderer(BlockHandler):
|
5
|
+
"""Handles all regular blocks that don't need special parent/children processing."""
|
6
|
+
|
7
|
+
def _can_handle(self, context: BlockRenderingContext) -> bool:
|
8
|
+
# Always can handle - this is the fallback handler
|
9
|
+
return True
|
10
|
+
|
11
|
+
def _process(self, context: BlockRenderingContext) -> None:
|
12
|
+
# Convert the block itself
|
13
|
+
block_markdown = context.block_registry.notion_to_markdown(context.block)
|
14
|
+
|
15
|
+
# If block has no direct markdown, either return empty or process children
|
16
|
+
if not block_markdown:
|
17
|
+
if not context.has_children():
|
18
|
+
context.markdown_result = ""
|
19
|
+
context.was_processed = True
|
20
|
+
return
|
21
|
+
|
22
|
+
# Import here to avoid circular dependency and process children
|
23
|
+
from notionary.page.reader.page_content_retriever import (
|
24
|
+
PageContentRetriever,
|
25
|
+
)
|
26
|
+
|
27
|
+
retriever = PageContentRetriever(context.block_registry)
|
28
|
+
children_markdown = retriever._convert_blocks_to_markdown(
|
29
|
+
context.get_children_blocks(), indent_level=context.indent_level + 1
|
30
|
+
)
|
31
|
+
context.markdown_result = children_markdown
|
32
|
+
context.was_processed = True
|
33
|
+
return
|
34
|
+
|
35
|
+
# Apply indentation if needed
|
36
|
+
if context.indent_level > 0:
|
37
|
+
block_markdown = self._indent_text(
|
38
|
+
block_markdown, spaces=context.indent_level * 4
|
39
|
+
)
|
40
|
+
|
41
|
+
# If there are no children, return the block markdown directly
|
42
|
+
if not context.has_children():
|
43
|
+
context.markdown_result = block_markdown
|
44
|
+
context.was_processed = True
|
45
|
+
return
|
46
|
+
|
47
|
+
# Otherwise process children and combine
|
48
|
+
from notionary.page.reader.page_content_retriever import PageContentRetriever
|
49
|
+
|
50
|
+
retriever = PageContentRetriever(context.block_registry)
|
51
|
+
children_markdown = retriever._convert_blocks_to_markdown(
|
52
|
+
context.get_children_blocks(), indent_level=context.indent_level + 1
|
53
|
+
)
|
54
|
+
|
55
|
+
context.markdown_result = (
|
56
|
+
f"{block_markdown}\n{children_markdown}"
|
57
|
+
if children_markdown
|
58
|
+
else block_markdown
|
59
|
+
)
|
60
|
+
context.was_processed = True
|
@@ -0,0 +1,69 @@
|
|
1
|
+
from notionary.blocks.toggle.toggle_element import ToggleElement
|
2
|
+
from notionary.page.reader.handler import BlockHandler, BlockRenderingContext
|
3
|
+
|
4
|
+
|
5
|
+
class ToggleRenderer(BlockHandler):
|
6
|
+
"""Handles toggle blocks with their children content."""
|
7
|
+
|
8
|
+
def _can_handle(self, context: BlockRenderingContext) -> bool:
|
9
|
+
return ToggleElement.match_notion(context.block)
|
10
|
+
|
11
|
+
def _process(self, context: BlockRenderingContext) -> None:
|
12
|
+
# Get the toggle title from the block
|
13
|
+
toggle_title = self._extract_toggle_title(context.block)
|
14
|
+
|
15
|
+
if not toggle_title:
|
16
|
+
return
|
17
|
+
|
18
|
+
# Create toggle start line
|
19
|
+
toggle_start = f"+++ {toggle_title}"
|
20
|
+
|
21
|
+
# Apply indentation if needed
|
22
|
+
if context.indent_level > 0:
|
23
|
+
toggle_start = self._indent_text(
|
24
|
+
toggle_start, spaces=context.indent_level * 4
|
25
|
+
)
|
26
|
+
|
27
|
+
# Process children if they exist
|
28
|
+
children_markdown = ""
|
29
|
+
if context.has_children():
|
30
|
+
# Import here to avoid circular dependency
|
31
|
+
from notionary.page.reader.page_content_retriever import (
|
32
|
+
PageContentRetriever,
|
33
|
+
)
|
34
|
+
|
35
|
+
# Create a temporary retriever to process children
|
36
|
+
retriever = PageContentRetriever(context.block_registry)
|
37
|
+
children_markdown = retriever._convert_blocks_to_markdown(
|
38
|
+
context.get_children_blocks(),
|
39
|
+
indent_level=0, # No indentation for content inside toggles
|
40
|
+
)
|
41
|
+
|
42
|
+
# Create toggle end line
|
43
|
+
toggle_end = "+++"
|
44
|
+
if context.indent_level > 0:
|
45
|
+
toggle_end = self._indent_text(toggle_end, spaces=context.indent_level * 4)
|
46
|
+
|
47
|
+
# Combine toggle with children content
|
48
|
+
if children_markdown:
|
49
|
+
context.markdown_result = (
|
50
|
+
f"{toggle_start}\n{children_markdown}\n{toggle_end}"
|
51
|
+
)
|
52
|
+
else:
|
53
|
+
context.markdown_result = f"{toggle_start}\n{toggle_end}"
|
54
|
+
|
55
|
+
context.was_processed = True
|
56
|
+
|
57
|
+
def _extract_toggle_title(self, block) -> str:
|
58
|
+
"""Extract toggle title from the block."""
|
59
|
+
if not block.toggle or not block.toggle.rich_text:
|
60
|
+
return ""
|
61
|
+
|
62
|
+
title = ""
|
63
|
+
for text_obj in block.toggle.rich_text:
|
64
|
+
if hasattr(text_obj, "plain_text"):
|
65
|
+
title += text_obj.plain_text or ""
|
66
|
+
elif hasattr(text_obj, "text") and hasattr(text_obj.text, "content"):
|
67
|
+
title += text_obj.text.content or ""
|
68
|
+
|
69
|
+
return title.strip()
|