notionary 0.2.21__py3-none-any.whl → 0.2.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- notionary/blocks/_bootstrap.py +9 -1
- notionary/blocks/audio/audio_element.py +53 -28
- notionary/blocks/audio/audio_markdown_node.py +10 -4
- notionary/blocks/base_block_element.py +15 -3
- notionary/blocks/bookmark/bookmark_element.py +39 -36
- notionary/blocks/bookmark/bookmark_markdown_node.py +16 -17
- notionary/blocks/breadcrumbs/breadcrumb_element.py +2 -2
- notionary/blocks/bulleted_list/bulleted_list_element.py +21 -4
- notionary/blocks/callout/callout_element.py +20 -4
- notionary/blocks/child_database/__init__.py +11 -4
- notionary/blocks/child_database/child_database_element.py +61 -0
- notionary/blocks/child_database/child_database_models.py +7 -14
- notionary/blocks/child_page/child_page_element.py +94 -0
- notionary/blocks/client.py +0 -1
- notionary/blocks/code/code_element.py +51 -2
- notionary/blocks/code/code_markdown_node.py +52 -1
- notionary/blocks/column/column_element.py +9 -3
- notionary/blocks/column/column_list_element.py +18 -3
- notionary/blocks/divider/divider_element.py +3 -11
- notionary/blocks/embed/embed_element.py +27 -6
- notionary/blocks/equation/equation_element.py +94 -41
- notionary/blocks/equation/equation_element_markdown_node.py +8 -9
- notionary/blocks/file/file_element.py +56 -37
- notionary/blocks/file/file_element_markdown_node.py +9 -7
- notionary/blocks/guards.py +22 -0
- notionary/blocks/heading/heading_element.py +23 -4
- notionary/blocks/image_block/image_element.py +43 -38
- notionary/blocks/image_block/image_markdown_node.py +10 -5
- notionary/blocks/mixins/captions/__init__.py +4 -0
- notionary/blocks/mixins/captions/caption_markdown_node_mixin.py +31 -0
- notionary/blocks/mixins/captions/caption_mixin.py +92 -0
- notionary/blocks/models.py +3 -1
- notionary/blocks/numbered_list/numbered_list_element.py +21 -4
- notionary/blocks/paragraph/paragraph_element.py +21 -5
- notionary/blocks/pdf/pdf_element.py +47 -41
- notionary/blocks/pdf/pdf_markdown_node.py +9 -7
- notionary/blocks/quote/quote_element.py +26 -9
- notionary/blocks/quote/quote_markdown_node.py +2 -2
- notionary/blocks/registry/block_registry.py +1 -46
- notionary/blocks/registry/block_registry_builder.py +8 -0
- notionary/blocks/rich_text/name_to_id_resolver.py +205 -0
- notionary/blocks/rich_text/rich_text_models.py +62 -29
- notionary/blocks/rich_text/text_inline_formatter.py +432 -101
- notionary/blocks/syntax_prompt_builder.py +137 -0
- notionary/blocks/table/table_element.py +110 -9
- notionary/blocks/table_of_contents/table_of_contents_element.py +19 -2
- notionary/blocks/todo/todo_element.py +21 -4
- notionary/blocks/toggle/toggle_element.py +19 -3
- notionary/blocks/toggle/toggle_markdown_node.py +1 -1
- notionary/blocks/toggleable_heading/toggleable_heading_element.py +19 -4
- notionary/blocks/types.py +69 -0
- notionary/blocks/video/video_element.py +44 -39
- notionary/blocks/video/video_markdown_node.py +10 -5
- notionary/database/client.py +23 -0
- notionary/file_upload/models.py +2 -2
- notionary/markdown/markdown_builder.py +34 -27
- notionary/page/client.py +26 -6
- notionary/page/notion_page.py +37 -6
- notionary/page/page_content_deleting_service.py +117 -0
- notionary/page/page_content_writer.py +89 -113
- notionary/page/page_context.py +65 -0
- notionary/page/reader/handler/__init__.py +2 -0
- notionary/page/reader/handler/base_block_renderer.py +4 -4
- notionary/page/reader/handler/block_rendering_context.py +5 -0
- notionary/page/reader/handler/line_renderer.py +16 -3
- notionary/page/reader/handler/numbered_list_renderer.py +85 -0
- notionary/page/reader/page_content_retriever.py +17 -5
- notionary/page/writer/handler/__init__.py +2 -0
- notionary/page/writer/handler/code_handler.py +12 -40
- notionary/page/writer/handler/column_handler.py +12 -12
- notionary/page/writer/handler/column_list_handler.py +13 -13
- notionary/page/writer/handler/equation_handler.py +74 -0
- notionary/page/writer/handler/line_handler.py +4 -4
- notionary/page/writer/handler/regular_line_handler.py +31 -37
- notionary/page/writer/handler/table_handler.py +8 -72
- notionary/page/writer/handler/toggle_handler.py +14 -12
- notionary/page/writer/handler/toggleable_heading_handler.py +22 -16
- notionary/page/writer/markdown_to_notion_converter.py +28 -9
- notionary/page/writer/markdown_to_notion_converter_context.py +30 -0
- notionary/page/writer/markdown_to_notion_formatting_post_processor.py +73 -0
- notionary/page/writer/markdown_to_notion_post_processor.py +0 -0
- notionary/page/writer/markdown_to_notion_text_length_post_processor.py +0 -0
- notionary/page/writer/notion_text_length_processor.py +150 -0
- notionary/telemetry/service.py +0 -1
- notionary/user/notion_user_manager.py +22 -95
- notionary/util/concurrency_limiter.py +0 -0
- notionary/workspace.py +4 -4
- notionary-0.2.22.dist-info/METADATA +237 -0
- {notionary-0.2.21.dist-info → notionary-0.2.22.dist-info}/RECORD +92 -77
- notionary/page/markdown_whitespace_processor.py +0 -80
- notionary/page/notion_text_length_utils.py +0 -119
- notionary/user/notion_user_provider.py +0 -1
- notionary-0.2.21.dist-info/METADATA +0 -229
- /notionary/page/reader/handler/{context.py → equation_renderer.py} +0 -0
- {notionary-0.2.21.dist-info → notionary-0.2.22.dist-info}/LICENSE +0 -0
- {notionary-0.2.21.dist-info → notionary-0.2.22.dist-info}/WHEEL +0 -0
@@ -2,12 +2,9 @@ from typing import Callable, Optional, Union
|
|
2
2
|
|
3
3
|
from notionary.blocks.client import NotionBlockClient
|
4
4
|
from notionary.blocks.divider import DividerElement
|
5
|
-
from notionary.blocks.models import Block
|
6
5
|
from notionary.blocks.registry.block_registry import BlockRegistry
|
7
6
|
from notionary.blocks.table_of_contents import TableOfContentsElement
|
8
7
|
from notionary.markdown.markdown_builder import MarkdownBuilder
|
9
|
-
from notionary.page.markdown_whitespace_processor import MarkdownWhitespaceProcessor
|
10
|
-
from notionary.page.reader.page_content_retriever import PageContentRetriever
|
11
8
|
from notionary.page.writer.markdown_to_notion_converter import MarkdownToNotionConverter
|
12
9
|
from notionary.util import LoggingMixin
|
13
10
|
|
@@ -22,8 +19,6 @@ class PageContentWriter(LoggingMixin):
|
|
22
19
|
block_registry=block_registry
|
23
20
|
)
|
24
21
|
|
25
|
-
self._content_retriever = PageContentRetriever(block_registry=block_registry)
|
26
|
-
|
27
22
|
async def append_markdown(
|
28
23
|
self,
|
29
24
|
content: Union[str, Callable[[MarkdownBuilder], MarkdownBuilder]],
|
@@ -33,14 +28,6 @@ class PageContentWriter(LoggingMixin):
|
|
33
28
|
) -> Optional[str]:
|
34
29
|
"""
|
35
30
|
Append markdown content to a Notion page using either text or builder callback.
|
36
|
-
|
37
|
-
Args:
|
38
|
-
content: Either raw markdown text OR a callback function that receives a MarkdownBuilder
|
39
|
-
append_divider: Whether to append a divider
|
40
|
-
prepend_table_of_contents: Whether to prepend table of contents
|
41
|
-
|
42
|
-
Returns:
|
43
|
-
str: The processed markdown content that was appended (None if failed)
|
44
31
|
"""
|
45
32
|
|
46
33
|
if isinstance(content, str):
|
@@ -66,7 +53,9 @@ class PageContentWriter(LoggingMixin):
|
|
66
53
|
processed_markdown = self._process_markdown_whitespace(final_markdown)
|
67
54
|
|
68
55
|
try:
|
69
|
-
blocks = self._markdown_to_notion_converter.convert(
|
56
|
+
blocks = await self._markdown_to_notion_converter.convert(
|
57
|
+
processed_markdown
|
58
|
+
)
|
70
59
|
|
71
60
|
result = await self._block_client.append_block_children(
|
72
61
|
block_id=self.page_id, children=blocks
|
@@ -83,114 +72,101 @@ class PageContentWriter(LoggingMixin):
|
|
83
72
|
self.logger.error("Error appending markdown: %s", str(e), exc_info=True)
|
84
73
|
return None
|
85
74
|
|
86
|
-
|
87
|
-
"""
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
)
|
92
|
-
|
93
|
-
if not children_response or not children_response.results:
|
94
|
-
return None
|
95
|
-
|
96
|
-
# Use PageContentRetriever for sophisticated markdown conversion
|
97
|
-
deleted_content = self._content_retriever._convert_blocks_to_markdown(
|
98
|
-
children_response.results, indent_level=0
|
99
|
-
)
|
100
|
-
|
101
|
-
# Delete blocks
|
102
|
-
success = True
|
103
|
-
for block in children_response.results:
|
104
|
-
block_success = await self._delete_block_with_children(block)
|
105
|
-
if not block_success:
|
106
|
-
success = False
|
107
|
-
|
108
|
-
if not success:
|
109
|
-
self.logger.warning("Some blocks could not be deleted")
|
110
|
-
|
111
|
-
return deleted_content if deleted_content else None
|
112
|
-
|
113
|
-
except Exception:
|
114
|
-
self.logger.error("Error clearing page content", exc_info=True)
|
115
|
-
return None
|
116
|
-
|
117
|
-
async def _delete_block_with_children(self, block: Block) -> bool:
|
118
|
-
"""Delete a block and all its children recursively."""
|
119
|
-
if not block.id:
|
120
|
-
self.logger.error("Block has no valid ID")
|
121
|
-
return False
|
122
|
-
|
123
|
-
self.logger.debug("Deleting block: %s (type: %s)", block.id, block.type)
|
124
|
-
|
125
|
-
try:
|
126
|
-
if block.has_children and not await self._delete_block_children(block):
|
127
|
-
return False
|
128
|
-
|
129
|
-
return await self._delete_single_block(block)
|
130
|
-
|
131
|
-
except Exception as e:
|
132
|
-
self.logger.error("Failed to delete block %s: %s", block.id, str(e))
|
133
|
-
return False
|
134
|
-
|
135
|
-
async def _delete_block_children(self, block: Block) -> bool:
|
136
|
-
"""Delete all children of a block."""
|
137
|
-
self.logger.debug("Block %s has children, deleting children first", block.id)
|
75
|
+
def _process_markdown_whitespace(self, markdown_text: str) -> str:
|
76
|
+
"""Process markdown text to normalize whitespace while preserving code blocks."""
|
77
|
+
lines = markdown_text.split("\n")
|
78
|
+
if not lines:
|
79
|
+
return ""
|
138
80
|
|
139
|
-
|
140
|
-
children_blocks = await self._block_client.get_all_block_children(block.id)
|
81
|
+
return self._process_whitespace_lines(lines)
|
141
82
|
|
142
|
-
|
143
|
-
|
144
|
-
|
83
|
+
def _process_whitespace_lines(self, lines: list[str]) -> str:
|
84
|
+
"""Process all lines and return the processed markdown."""
|
85
|
+
processed_lines = []
|
86
|
+
in_code_block = False
|
87
|
+
current_code_block = []
|
145
88
|
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
89
|
+
for line in lines:
|
90
|
+
processed_lines, in_code_block, current_code_block = (
|
91
|
+
self._process_single_line(
|
92
|
+
line, processed_lines, in_code_block, current_code_block
|
93
|
+
)
|
150
94
|
)
|
151
95
|
|
152
|
-
|
153
|
-
for child_block in children_blocks:
|
154
|
-
if not await self._delete_block_with_children(child_block):
|
155
|
-
self.logger.error(
|
156
|
-
"Failed to delete child block: %s", child_block.id
|
157
|
-
)
|
158
|
-
return False
|
159
|
-
|
160
|
-
self.logger.debug(
|
161
|
-
"Successfully deleted all children of block: %s", block.id
|
162
|
-
)
|
163
|
-
return True
|
96
|
+
return "\n".join(processed_lines)
|
164
97
|
|
165
|
-
|
166
|
-
|
167
|
-
|
98
|
+
def _process_single_line(
|
99
|
+
self,
|
100
|
+
line: str,
|
101
|
+
processed_lines: list[str],
|
102
|
+
in_code_block: bool,
|
103
|
+
current_code_block: list[str],
|
104
|
+
) -> tuple[list[str], bool, list[str]]:
|
105
|
+
"""Process a single line and return updated state."""
|
106
|
+
if self._is_code_block_marker(line):
|
107
|
+
return self._handle_code_block_marker(
|
108
|
+
line, processed_lines, in_code_block, current_code_block
|
168
109
|
)
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
"""Delete a single block."""
|
173
|
-
deleted_block: Optional[Block] = await self._block_client.delete_block(block.id)
|
174
|
-
|
175
|
-
if deleted_block is None:
|
176
|
-
self.logger.error("Failed to delete block: %s", block.id)
|
177
|
-
return False
|
178
|
-
|
179
|
-
if deleted_block.archived or deleted_block.in_trash:
|
180
|
-
self.logger.debug("Successfully deleted/archived block: %s", block.id)
|
181
|
-
return True
|
110
|
+
if in_code_block:
|
111
|
+
current_code_block.append(line)
|
112
|
+
return processed_lines, in_code_block, current_code_block
|
182
113
|
else:
|
183
|
-
|
184
|
-
return
|
114
|
+
processed_lines.append(line.lstrip())
|
115
|
+
return processed_lines, in_code_block, current_code_block
|
185
116
|
|
186
|
-
def
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
117
|
+
def _handle_code_block_marker(
|
118
|
+
self,
|
119
|
+
line: str,
|
120
|
+
processed_lines: list[str],
|
121
|
+
in_code_block: bool,
|
122
|
+
current_code_block: list[str],
|
123
|
+
) -> tuple[list[str], bool, list[str]]:
|
124
|
+
"""Handle code block start/end markers."""
|
125
|
+
if not in_code_block:
|
126
|
+
return self._start_code_block(line, processed_lines)
|
127
|
+
else:
|
128
|
+
return self._end_code_block(processed_lines, current_code_block)
|
129
|
+
|
130
|
+
def _start_code_block(
|
131
|
+
self, line: str, processed_lines: list[str]
|
132
|
+
) -> tuple[list[str], bool, list[str]]:
|
133
|
+
"""Start a new code block."""
|
134
|
+
processed_lines.append(self._normalize_code_block_start(line))
|
135
|
+
return processed_lines, True, []
|
136
|
+
|
137
|
+
def _end_code_block(
|
138
|
+
self, processed_lines: list[str], current_code_block: list[str]
|
139
|
+
) -> tuple[list[str], bool, list[str]]:
|
140
|
+
"""End the current code block."""
|
141
|
+
processed_lines.extend(self._normalize_code_block_content(current_code_block))
|
142
|
+
processed_lines.append("```")
|
143
|
+
return processed_lines, False, []
|
144
|
+
|
145
|
+
def _is_code_block_marker(self, line: str) -> bool:
|
146
|
+
"""Check if line is a code block marker."""
|
147
|
+
return line.lstrip().startswith("```")
|
148
|
+
|
149
|
+
def _normalize_code_block_start(self, line: str) -> str:
|
150
|
+
"""Normalize code block opening marker."""
|
151
|
+
language = line.lstrip().replace("```", "", 1).strip()
|
152
|
+
return "```" + language
|
153
|
+
|
154
|
+
def _normalize_code_block_content(self, code_lines: list[str]) -> list[str]:
|
155
|
+
"""Normalize code block indentation."""
|
156
|
+
if not code_lines:
|
157
|
+
return []
|
158
|
+
|
159
|
+
# Find minimum indentation from non-empty lines
|
160
|
+
non_empty_lines = [line for line in code_lines if line.strip()]
|
161
|
+
if not non_empty_lines:
|
162
|
+
return [""] * len(code_lines)
|
163
|
+
|
164
|
+
min_indent = min(len(line) - len(line.lstrip()) for line in non_empty_lines)
|
165
|
+
if min_indent == 0:
|
166
|
+
return code_lines
|
167
|
+
|
168
|
+
# Remove common indentation
|
169
|
+
return ["" if not line.strip() else line[min_indent:] for line in code_lines]
|
194
170
|
|
195
171
|
def _ensure_table_of_contents_exists_in_registry(self) -> None:
|
196
172
|
"""Ensure TableOfContents is registered in the block registry."""
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# notionary/blocks/context/page_context.py
|
2
|
+
from __future__ import annotations
|
3
|
+
|
4
|
+
from typing import TYPE_CHECKING, Optional
|
5
|
+
from dataclasses import dataclass
|
6
|
+
from contextvars import ContextVar
|
7
|
+
|
8
|
+
if TYPE_CHECKING:
|
9
|
+
from notionary.database.client import NotionDatabaseClient
|
10
|
+
from notionary.file_upload import NotionFileUploadClient
|
11
|
+
|
12
|
+
|
13
|
+
@dataclass(frozen=True)
|
14
|
+
class PageContextProvider:
|
15
|
+
"""Context object that provides dependencies for block conversion operations."""
|
16
|
+
|
17
|
+
page_id: str
|
18
|
+
database_client: NotionDatabaseClient
|
19
|
+
file_upload_client: NotionFileUploadClient
|
20
|
+
|
21
|
+
|
22
|
+
# Context variable
|
23
|
+
_page_context: ContextVar[Optional[PageContextProvider]] = ContextVar(
|
24
|
+
"page_context", default=None
|
25
|
+
)
|
26
|
+
|
27
|
+
|
28
|
+
def get_page_context() -> PageContextProvider:
|
29
|
+
"""Get current page context or raise if not available."""
|
30
|
+
context = _page_context.get()
|
31
|
+
if context is None:
|
32
|
+
raise RuntimeError(
|
33
|
+
"No page context available. Use 'async with page_context(...)'"
|
34
|
+
)
|
35
|
+
return context
|
36
|
+
|
37
|
+
|
38
|
+
def get_page_context_optional() -> Optional[PageContextProvider]:
|
39
|
+
"""Get current page context or None if not available."""
|
40
|
+
return _page_context.get()
|
41
|
+
|
42
|
+
|
43
|
+
class page_context:
|
44
|
+
"""Async-only context manager for page operations."""
|
45
|
+
|
46
|
+
def __init__(self, provider: PageContextProvider):
|
47
|
+
self.provider = provider
|
48
|
+
self._token = None
|
49
|
+
|
50
|
+
def _set_context(self) -> PageContextProvider:
|
51
|
+
"""Helper to set context and return provider."""
|
52
|
+
self._token = _page_context.set(self.provider)
|
53
|
+
return self.provider
|
54
|
+
|
55
|
+
def _reset_context(self) -> None:
|
56
|
+
"""Helper to reset context."""
|
57
|
+
if self._token is not None:
|
58
|
+
_page_context.reset(self._token)
|
59
|
+
|
60
|
+
async def __aenter__(self) -> PageContextProvider:
|
61
|
+
return self._set_context()
|
62
|
+
|
63
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
64
|
+
self._reset_context()
|
65
|
+
return False
|
@@ -3,6 +3,7 @@ from .block_rendering_context import BlockRenderingContext
|
|
3
3
|
from .column_list_renderer import ColumnListRenderer
|
4
4
|
from .column_renderer import ColumnRenderer
|
5
5
|
from .line_renderer import LineRenderer
|
6
|
+
from .numbered_list_renderer import NumberedListRenderer
|
6
7
|
from .toggle_renderer import ToggleRenderer
|
7
8
|
from .toggleable_heading_renderer import ToggleableHeadingRenderer
|
8
9
|
|
@@ -12,6 +13,7 @@ __all__ = [
|
|
12
13
|
"ColumnListRenderer",
|
13
14
|
"ColumnRenderer",
|
14
15
|
"LineRenderer",
|
16
|
+
"NumberedListRenderer",
|
15
17
|
"ToggleRenderer",
|
16
18
|
"ToggleableHeadingRenderer",
|
17
19
|
]
|
@@ -17,12 +17,12 @@ class BlockHandler(ABC):
|
|
17
17
|
self._next_handler = handler
|
18
18
|
return handler
|
19
19
|
|
20
|
-
def handle(self, context: BlockRenderingContext) -> None:
|
20
|
+
async def handle(self, context: BlockRenderingContext) -> None:
|
21
21
|
"""Handle the block or pass to next handler."""
|
22
22
|
if self._can_handle(context):
|
23
|
-
self._process(context)
|
23
|
+
await self._process(context)
|
24
24
|
elif self._next_handler:
|
25
|
-
self._next_handler.handle(context)
|
25
|
+
await self._next_handler.handle(context)
|
26
26
|
|
27
27
|
@abstractmethod
|
28
28
|
def _can_handle(self, context: BlockRenderingContext) -> bool:
|
@@ -30,7 +30,7 @@ class BlockHandler(ABC):
|
|
30
30
|
pass
|
31
31
|
|
32
32
|
@abstractmethod
|
33
|
-
def _process(self, context: BlockRenderingContext) -> None:
|
33
|
+
async def _process(self, context: BlockRenderingContext) -> None:
|
34
34
|
"""Process the block and update context."""
|
35
35
|
pass
|
36
36
|
|
@@ -16,6 +16,11 @@ class BlockRenderingContext:
|
|
16
16
|
block_registry: BlockRegistry
|
17
17
|
convert_children_callback: Optional[Callable[[list[Block], int], str]] = None
|
18
18
|
|
19
|
+
# For batch processing
|
20
|
+
all_blocks: Optional[list[Block]] = None
|
21
|
+
current_block_index: Optional[int] = None
|
22
|
+
blocks_consumed: int = 0
|
23
|
+
|
19
24
|
# Result
|
20
25
|
markdown_result: Optional[str] = None
|
21
26
|
children_result: Optional[str] = None
|
@@ -1,3 +1,7 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import Optional
|
4
|
+
|
1
5
|
from notionary.page.reader.handler import BlockHandler, BlockRenderingContext
|
2
6
|
|
3
7
|
|
@@ -8,9 +12,9 @@ class LineRenderer(BlockHandler):
|
|
8
12
|
# Always can handle - this is the fallback handler
|
9
13
|
return True
|
10
14
|
|
11
|
-
def _process(self, context: BlockRenderingContext) -> None:
|
12
|
-
# Convert the block itself
|
13
|
-
block_markdown =
|
15
|
+
async def _process(self, context: BlockRenderingContext) -> None:
|
16
|
+
# Convert the block itself using direct element iteration
|
17
|
+
block_markdown = await self._convert_block_to_markdown(context)
|
14
18
|
|
15
19
|
# If block has no direct markdown, either return empty or process children
|
16
20
|
if not block_markdown:
|
@@ -58,3 +62,12 @@ class LineRenderer(BlockHandler):
|
|
58
62
|
else block_markdown
|
59
63
|
)
|
60
64
|
context.was_processed = True
|
65
|
+
|
66
|
+
async def _convert_block_to_markdown(
|
67
|
+
self, context: BlockRenderingContext
|
68
|
+
) -> Optional[str]:
|
69
|
+
"""Convert a Notion block to markdown using registered elements."""
|
70
|
+
for element in context.block_registry.get_elements():
|
71
|
+
if element.match_notion(context.block):
|
72
|
+
return await element.notion_to_markdown(context.block)
|
73
|
+
return None
|
@@ -0,0 +1,85 @@
|
|
1
|
+
from notionary.blocks.models import Block, BlockType
|
2
|
+
from notionary.blocks.registry.block_registry import BlockRegistry
|
3
|
+
from notionary.page.reader.handler.base_block_renderer import BlockHandler
|
4
|
+
from notionary.page.reader.handler.block_rendering_context import BlockRenderingContext
|
5
|
+
|
6
|
+
|
7
|
+
class NumberedListRenderer(BlockHandler):
|
8
|
+
"""Handles numbered list items with sequential numbering."""
|
9
|
+
|
10
|
+
def _can_handle(self, context: BlockRenderingContext) -> bool:
|
11
|
+
"""Check if this is a numbered list item."""
|
12
|
+
return (
|
13
|
+
context.block.type == BlockType.NUMBERED_LIST_ITEM
|
14
|
+
and context.block.numbered_list_item is not None
|
15
|
+
)
|
16
|
+
|
17
|
+
async def _process(self, context: BlockRenderingContext) -> None:
|
18
|
+
"""Process numbered list item with sequential numbering."""
|
19
|
+
if context.all_blocks is None or context.current_block_index is None:
|
20
|
+
await self._process_single_item(context, 1)
|
21
|
+
return
|
22
|
+
|
23
|
+
items, blocks_to_skip = self._collect_numbered_list_items(context)
|
24
|
+
|
25
|
+
markdown_parts = []
|
26
|
+
for i, item_context in enumerate(items, 1):
|
27
|
+
item_markdown = await self._process_single_item(item_context, i)
|
28
|
+
if item_markdown:
|
29
|
+
markdown_parts.append(item_markdown)
|
30
|
+
|
31
|
+
# Set result and mark how many blocks to skip
|
32
|
+
if markdown_parts:
|
33
|
+
context.markdown_result = "\n".join(markdown_parts)
|
34
|
+
context.was_processed = True
|
35
|
+
context.blocks_consumed = blocks_to_skip
|
36
|
+
|
37
|
+
def _collect_numbered_list_items(
|
38
|
+
self, context: BlockRenderingContext
|
39
|
+
) -> tuple[list[BlockRenderingContext], int]:
|
40
|
+
"""Collect all consecutive numbered list items starting from current position."""
|
41
|
+
items = []
|
42
|
+
current_index = context.current_block_index
|
43
|
+
all_blocks = context.all_blocks
|
44
|
+
|
45
|
+
# Start with current block
|
46
|
+
items.append(context)
|
47
|
+
blocks_processed = 1
|
48
|
+
|
49
|
+
# Look ahead for more numbered list items
|
50
|
+
for i in range(current_index + 1, len(all_blocks)):
|
51
|
+
block = all_blocks[i]
|
52
|
+
|
53
|
+
# Check if it's a numbered list item
|
54
|
+
if (
|
55
|
+
block.type == BlockType.NUMBERED_LIST_ITEM
|
56
|
+
and block.numbered_list_item is not None
|
57
|
+
):
|
58
|
+
|
59
|
+
# Create context for this item
|
60
|
+
item_context = BlockRenderingContext(
|
61
|
+
block=block,
|
62
|
+
indent_level=context.indent_level,
|
63
|
+
block_registry=context.block_registry,
|
64
|
+
convert_children_callback=context.convert_children_callback,
|
65
|
+
)
|
66
|
+
items.append(item_context)
|
67
|
+
blocks_processed += 1
|
68
|
+
else:
|
69
|
+
# Not a numbered list item - stop collecting
|
70
|
+
break
|
71
|
+
|
72
|
+
return items, blocks_processed
|
73
|
+
|
74
|
+
async def _process_single_item(
|
75
|
+
self, context: BlockRenderingContext, number: int
|
76
|
+
) -> str:
|
77
|
+
"""Process a single numbered list item with the given number."""
|
78
|
+
from notionary.blocks.rich_text.text_inline_formatter import TextInlineFormatter
|
79
|
+
|
80
|
+
rich_text = context.block.numbered_list_item.rich_text
|
81
|
+
content = await TextInlineFormatter.extract_text_with_formatting(rich_text)
|
82
|
+
|
83
|
+
# Apply indentation
|
84
|
+
indent = " " * context.indent_level
|
85
|
+
return f"{indent}{number}. {content}"
|
@@ -5,6 +5,7 @@ from notionary.page.reader.handler import (
|
|
5
5
|
ColumnListRenderer,
|
6
6
|
ColumnRenderer,
|
7
7
|
LineRenderer,
|
8
|
+
NumberedListRenderer,
|
8
9
|
ToggleableHeadingRenderer,
|
9
10
|
ToggleRenderer,
|
10
11
|
)
|
@@ -27,7 +28,7 @@ class PageContentRetriever(LoggingMixin):
|
|
27
28
|
Retrieve page content and convert it to Markdown.
|
28
29
|
Uses the chain of responsibility pattern for scalable block processing.
|
29
30
|
"""
|
30
|
-
return self._convert_blocks_to_markdown(blocks, indent_level=0)
|
31
|
+
return await self._convert_blocks_to_markdown(blocks, indent_level=0)
|
31
32
|
|
32
33
|
def _setup_handler_chain(self) -> None:
|
33
34
|
"""Setup the chain of handlers in priority order."""
|
@@ -35,16 +36,19 @@ class PageContentRetriever(LoggingMixin):
|
|
35
36
|
toggleable_heading_handler = ToggleableHeadingRenderer()
|
36
37
|
column_list_handler = ColumnListRenderer()
|
37
38
|
column_handler = ColumnRenderer()
|
39
|
+
numbered_list_handler = NumberedListRenderer()
|
38
40
|
regular_handler = LineRenderer()
|
39
41
|
|
40
42
|
# Chain handlers - most specific first
|
41
43
|
toggle_handler.set_next(toggleable_heading_handler).set_next(
|
42
44
|
column_list_handler
|
43
|
-
).set_next(column_handler).set_next(
|
45
|
+
).set_next(column_handler).set_next(numbered_list_handler).set_next(
|
46
|
+
regular_handler
|
47
|
+
)
|
44
48
|
|
45
49
|
self._handler_chain = toggle_handler
|
46
50
|
|
47
|
-
def _convert_blocks_to_markdown(
|
51
|
+
async def _convert_blocks_to_markdown(
|
48
52
|
self, blocks: list[Block], indent_level: int = 0
|
49
53
|
) -> str:
|
50
54
|
"""Convert blocks to Markdown using the handler chain."""
|
@@ -52,18 +56,26 @@ class PageContentRetriever(LoggingMixin):
|
|
52
56
|
return ""
|
53
57
|
|
54
58
|
markdown_parts = []
|
59
|
+
i = 0
|
55
60
|
|
56
|
-
|
61
|
+
while i < len(blocks):
|
62
|
+
block = blocks[i]
|
57
63
|
context = BlockRenderingContext(
|
58
64
|
block=block,
|
59
65
|
indent_level=indent_level,
|
60
66
|
block_registry=self._block_registry,
|
67
|
+
all_blocks=blocks,
|
68
|
+
current_block_index=i,
|
69
|
+
convert_children_callback=self._convert_blocks_to_markdown,
|
61
70
|
)
|
62
71
|
|
63
|
-
self._handler_chain.handle(context)
|
72
|
+
await self._handler_chain.handle(context)
|
64
73
|
|
65
74
|
if context.was_processed and context.markdown_result:
|
66
75
|
markdown_parts.append(context.markdown_result)
|
67
76
|
|
77
|
+
# Skip additional blocks if they were consumed by batch processing
|
78
|
+
i += max(1, context.blocks_consumed)
|
79
|
+
|
68
80
|
separator = "\n\n" if indent_level == 0 else "\n"
|
69
81
|
return separator.join(markdown_parts)
|
@@ -1,6 +1,7 @@
|
|
1
1
|
from .code_handler import CodeHandler
|
2
2
|
from .column_handler import ColumnHandler
|
3
3
|
from .column_list_handler import ColumnListHandler
|
4
|
+
from .equation_handler import EquationHandler
|
4
5
|
from .line_handler import LineHandler
|
5
6
|
from .line_processing_context import LineProcessingContext, ParentBlockContext
|
6
7
|
from .regular_line_handler import RegularLineHandler
|
@@ -19,4 +20,5 @@ __all__ = [
|
|
19
20
|
"TableHandler",
|
20
21
|
"RegularLineHandler",
|
21
22
|
"CodeHandler",
|
23
|
+
"EquationHandler",
|
22
24
|
]
|
@@ -1,7 +1,6 @@
|
|
1
1
|
import re
|
2
2
|
|
3
3
|
from notionary.blocks.code.code_element import CodeElement
|
4
|
-
from notionary.blocks.rich_text.rich_text_models import RichTextObject
|
5
4
|
from notionary.page.writer.handler.line_handler import (
|
6
5
|
LineHandler,
|
7
6
|
LineProcessingContext,
|
@@ -27,9 +26,9 @@ class CodeHandler(LineHandler):
|
|
27
26
|
return False
|
28
27
|
return self._is_code_start(context)
|
29
28
|
|
30
|
-
def _process(self, context: LineProcessingContext) -> None:
|
29
|
+
async def _process(self, context: LineProcessingContext) -> None:
|
31
30
|
if self._is_code_start(context):
|
32
|
-
self._process_complete_code_block(context)
|
31
|
+
await self._process_complete_code_block(context)
|
33
32
|
self._mark_processed(context)
|
34
33
|
|
35
34
|
def _is_code_start(self, context: LineProcessingContext) -> bool:
|
@@ -40,33 +39,19 @@ class CodeHandler(LineHandler):
|
|
40
39
|
"""Check if we're currently inside any parent context (toggle, heading, etc.)."""
|
41
40
|
return len(context.parent_stack) > 0
|
42
41
|
|
43
|
-
def _process_complete_code_block(
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
# Create base code block
|
49
|
-
result = CodeElement.markdown_to_notion(f"```{language}")
|
50
|
-
if not result:
|
51
|
-
return
|
52
|
-
|
53
|
-
block = result[0] if isinstance(result, list) else result
|
54
|
-
|
42
|
+
async def _process_complete_code_block(
|
43
|
+
self, context: LineProcessingContext
|
44
|
+
) -> None:
|
45
|
+
"""Process the entire code block in one go using CodeElement."""
|
55
46
|
code_lines, lines_to_consume = self._collect_code_lines(context)
|
56
47
|
|
57
|
-
|
48
|
+
block = CodeElement.create_from_markdown_block(
|
49
|
+
opening_line=context.line, code_lines=code_lines
|
50
|
+
)
|
58
51
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
context.result_blocks.append(block)
|
63
|
-
|
64
|
-
def _extract_fence_info(self, line: str) -> tuple[str, str]:
|
65
|
-
"""Extract the language and optional caption from a code fence."""
|
66
|
-
match = self._code_start_pattern.match(line.strip())
|
67
|
-
lang = match.group(1) if match and match.group(1) else ""
|
68
|
-
cap = match.group(2) if match and match.group(2) else ""
|
69
|
-
return lang, cap
|
52
|
+
if block:
|
53
|
+
context.lines_consumed = lines_to_consume
|
54
|
+
context.result_blocks.append(block)
|
70
55
|
|
71
56
|
def _collect_code_lines(
|
72
57
|
self, context: LineProcessingContext
|
@@ -85,16 +70,3 @@ class CodeHandler(LineHandler):
|
|
85
70
|
"""Mark context as processed and continue."""
|
86
71
|
context.was_processed = True
|
87
72
|
context.should_continue = True
|
88
|
-
|
89
|
-
def _set_block_content(self, block, code_lines: list[str]) -> None:
|
90
|
-
"""Set the code rich_text content on the block."""
|
91
|
-
if not code_lines:
|
92
|
-
return
|
93
|
-
content = "\n".join(code_lines)
|
94
|
-
block.code.rich_text = [RichTextObject.for_code_block(content)]
|
95
|
-
|
96
|
-
def _set_block_caption(self, block, caption: str) -> None:
|
97
|
-
"""Append caption to the code block if provided."""
|
98
|
-
if not caption:
|
99
|
-
return
|
100
|
-
block.code.caption.append(RichTextObject.for_code_block(caption))
|