notionary 0.2.19__py3-none-any.whl → 0.2.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- notionary/__init__.py +8 -4
- notionary/base_notion_client.py +3 -1
- notionary/blocks/__init__.py +2 -91
- notionary/blocks/_bootstrap.py +271 -0
- notionary/blocks/audio/__init__.py +8 -2
- notionary/blocks/audio/audio_element.py +69 -106
- notionary/blocks/audio/audio_markdown_node.py +13 -5
- notionary/blocks/audio/audio_models.py +6 -55
- notionary/blocks/base_block_element.py +42 -0
- notionary/blocks/bookmark/__init__.py +9 -2
- notionary/blocks/bookmark/bookmark_element.py +49 -139
- notionary/blocks/bookmark/bookmark_markdown_node.py +19 -18
- notionary/blocks/bookmark/bookmark_models.py +15 -0
- notionary/blocks/breadcrumbs/__init__.py +17 -0
- notionary/blocks/breadcrumbs/breadcrumb_element.py +39 -0
- notionary/blocks/breadcrumbs/breadcrumb_markdown_node.py +32 -0
- notionary/blocks/breadcrumbs/breadcrumb_models.py +12 -0
- notionary/blocks/bulleted_list/__init__.py +12 -2
- notionary/blocks/bulleted_list/bulleted_list_element.py +55 -53
- notionary/blocks/bulleted_list/bulleted_list_markdown_node.py +2 -1
- notionary/blocks/bulleted_list/bulleted_list_models.py +18 -0
- notionary/blocks/callout/__init__.py +9 -2
- notionary/blocks/callout/callout_element.py +53 -86
- notionary/blocks/callout/callout_markdown_node.py +3 -1
- notionary/blocks/callout/callout_models.py +33 -0
- notionary/blocks/child_database/__init__.py +14 -0
- notionary/blocks/child_database/child_database_element.py +61 -0
- notionary/blocks/child_database/child_database_models.py +12 -0
- notionary/blocks/child_page/__init__.py +9 -0
- notionary/blocks/child_page/child_page_element.py +94 -0
- notionary/blocks/child_page/child_page_models.py +12 -0
- notionary/blocks/{shared/block_client.py → client.py} +54 -54
- notionary/blocks/code/__init__.py +6 -2
- notionary/blocks/code/code_element.py +96 -181
- notionary/blocks/code/code_markdown_node.py +64 -13
- notionary/blocks/code/code_models.py +94 -0
- notionary/blocks/column/__init__.py +25 -1
- notionary/blocks/column/column_element.py +44 -312
- notionary/blocks/column/column_list_element.py +52 -0
- notionary/blocks/column/column_list_markdown_node.py +50 -0
- notionary/blocks/column/column_markdown_node.py +59 -0
- notionary/blocks/column/column_models.py +26 -0
- notionary/blocks/divider/__init__.py +9 -2
- notionary/blocks/divider/divider_element.py +18 -49
- notionary/blocks/divider/divider_markdown_node.py +2 -1
- notionary/blocks/divider/divider_models.py +12 -0
- notionary/blocks/embed/__init__.py +9 -2
- notionary/blocks/embed/embed_element.py +65 -111
- notionary/blocks/embed/embed_markdown_node.py +3 -1
- notionary/blocks/embed/embed_models.py +14 -0
- notionary/blocks/equation/__init__.py +14 -0
- notionary/blocks/equation/equation_element.py +133 -0
- notionary/blocks/equation/equation_element_markdown_node.py +35 -0
- notionary/blocks/equation/equation_models.py +11 -0
- notionary/blocks/file/__init__.py +25 -0
- notionary/blocks/file/file_element.py +112 -0
- notionary/blocks/file/file_element_markdown_node.py +37 -0
- notionary/blocks/file/file_element_models.py +39 -0
- notionary/blocks/guards.py +22 -0
- notionary/blocks/heading/__init__.py +16 -2
- notionary/blocks/heading/heading_element.py +83 -69
- notionary/blocks/heading/heading_markdown_node.py +2 -1
- notionary/blocks/heading/heading_models.py +29 -0
- notionary/blocks/image_block/__init__.py +13 -0
- notionary/blocks/image_block/image_element.py +89 -0
- notionary/blocks/{image → image_block}/image_markdown_node.py +13 -6
- notionary/blocks/image_block/image_models.py +10 -0
- notionary/blocks/mixins/captions/__init__.py +4 -0
- notionary/blocks/mixins/captions/caption_markdown_node_mixin.py +31 -0
- notionary/blocks/mixins/captions/caption_mixin.py +92 -0
- notionary/blocks/models.py +174 -0
- notionary/blocks/numbered_list/__init__.py +12 -2
- notionary/blocks/numbered_list/numbered_list_element.py +48 -56
- notionary/blocks/numbered_list/numbered_list_markdown_node.py +3 -1
- notionary/blocks/numbered_list/numbered_list_models.py +17 -0
- notionary/blocks/paragraph/__init__.py +12 -2
- notionary/blocks/paragraph/paragraph_element.py +40 -66
- notionary/blocks/paragraph/paragraph_markdown_node.py +2 -1
- notionary/blocks/paragraph/paragraph_models.py +16 -0
- notionary/blocks/pdf/__init__.py +13 -0
- notionary/blocks/pdf/pdf_element.py +97 -0
- notionary/blocks/pdf/pdf_markdown_node.py +37 -0
- notionary/blocks/pdf/pdf_models.py +11 -0
- notionary/blocks/quote/__init__.py +11 -2
- notionary/blocks/quote/quote_element.py +45 -62
- notionary/blocks/quote/quote_markdown_node.py +6 -3
- notionary/blocks/quote/quote_models.py +18 -0
- notionary/blocks/registry/__init__.py +4 -0
- notionary/blocks/registry/block_registry.py +60 -121
- notionary/blocks/registry/block_registry_builder.py +115 -59
- notionary/blocks/rich_text/__init__.py +33 -0
- notionary/blocks/rich_text/name_to_id_resolver.py +205 -0
- notionary/blocks/rich_text/rich_text_models.py +221 -0
- notionary/blocks/rich_text/text_inline_formatter.py +456 -0
- notionary/blocks/syntax_prompt_builder.py +137 -0
- notionary/blocks/table/__init__.py +16 -2
- notionary/blocks/table/table_element.py +136 -228
- notionary/blocks/table/table_markdown_node.py +2 -1
- notionary/blocks/table/table_models.py +28 -0
- notionary/blocks/table_of_contents/__init__.py +19 -0
- notionary/blocks/table_of_contents/table_of_contents_element.py +68 -0
- notionary/blocks/table_of_contents/table_of_contents_markdown_node.py +35 -0
- notionary/blocks/table_of_contents/table_of_contents_models.py +18 -0
- notionary/blocks/todo/__init__.py +9 -2
- notionary/blocks/todo/todo_element.py +52 -92
- notionary/blocks/todo/todo_markdown_node.py +2 -1
- notionary/blocks/todo/todo_models.py +19 -0
- notionary/blocks/toggle/__init__.py +13 -3
- notionary/blocks/toggle/toggle_element.py +69 -260
- notionary/blocks/toggle/toggle_markdown_node.py +25 -15
- notionary/blocks/toggle/toggle_models.py +17 -0
- notionary/blocks/toggleable_heading/__init__.py +6 -2
- notionary/blocks/toggleable_heading/toggleable_heading_element.py +86 -241
- notionary/blocks/toggleable_heading/toggleable_heading_markdown_node.py +26 -18
- notionary/blocks/types.py +130 -0
- notionary/blocks/video/__init__.py +8 -2
- notionary/blocks/video/video_element.py +70 -141
- notionary/blocks/video/video_element_models.py +10 -0
- notionary/blocks/video/video_markdown_node.py +13 -6
- notionary/database/client.py +26 -8
- notionary/database/database.py +13 -14
- notionary/database/database_filter_builder.py +2 -2
- notionary/database/database_provider.py +5 -4
- notionary/database/models.py +337 -0
- notionary/database/notion_database.py +6 -7
- notionary/file_upload/client.py +5 -7
- notionary/file_upload/models.py +3 -2
- notionary/file_upload/notion_file_upload.py +2 -3
- notionary/markdown/markdown_builder.py +729 -0
- notionary/markdown/markdown_document_model.py +228 -0
- notionary/{blocks → markdown}/markdown_node.py +1 -0
- notionary/models/notion_database_response.py +0 -338
- notionary/page/client.py +34 -15
- notionary/page/models.py +327 -0
- notionary/page/notion_page.py +136 -58
- notionary/page/{content/page_content_writer.py → page_content_deleting_service.py} +25 -59
- notionary/page/page_content_writer.py +177 -0
- notionary/page/page_context.py +65 -0
- notionary/page/reader/handler/__init__.py +19 -0
- notionary/page/reader/handler/base_block_renderer.py +44 -0
- notionary/page/reader/handler/block_processing_context.py +35 -0
- notionary/page/reader/handler/block_rendering_context.py +48 -0
- notionary/page/reader/handler/column_list_renderer.py +51 -0
- notionary/page/reader/handler/column_renderer.py +60 -0
- notionary/page/reader/handler/line_renderer.py +73 -0
- notionary/page/reader/handler/numbered_list_renderer.py +85 -0
- notionary/page/reader/handler/toggle_renderer.py +69 -0
- notionary/page/reader/handler/toggleable_heading_renderer.py +89 -0
- notionary/page/reader/page_content_retriever.py +81 -0
- notionary/page/search_filter_builder.py +2 -1
- notionary/page/writer/handler/__init__.py +24 -0
- notionary/page/writer/handler/code_handler.py +72 -0
- notionary/page/writer/handler/column_handler.py +141 -0
- notionary/page/writer/handler/column_list_handler.py +139 -0
- notionary/page/writer/handler/equation_handler.py +74 -0
- notionary/page/writer/handler/line_handler.py +35 -0
- notionary/page/writer/handler/line_processing_context.py +54 -0
- notionary/page/writer/handler/regular_line_handler.py +86 -0
- notionary/page/writer/handler/table_handler.py +66 -0
- notionary/page/writer/handler/toggle_handler.py +155 -0
- notionary/page/writer/handler/toggleable_heading_handler.py +173 -0
- notionary/page/writer/markdown_to_notion_converter.py +95 -0
- notionary/page/writer/markdown_to_notion_converter_context.py +30 -0
- notionary/page/writer/markdown_to_notion_formatting_post_processor.py +73 -0
- notionary/page/writer/notion_text_length_processor.py +150 -0
- notionary/telemetry/__init__.py +2 -2
- notionary/telemetry/service.py +3 -3
- notionary/user/__init__.py +2 -2
- notionary/user/base_notion_user.py +2 -1
- notionary/user/client.py +2 -3
- notionary/user/models.py +1 -0
- notionary/user/notion_bot_user.py +4 -5
- notionary/user/notion_user.py +3 -4
- notionary/user/notion_user_manager.py +23 -95
- notionary/util/__init__.py +3 -2
- notionary/util/fuzzy.py +2 -1
- notionary/util/logging_mixin.py +2 -2
- notionary/util/singleton_metaclass.py +1 -1
- notionary/workspace.py +6 -5
- notionary-0.2.22.dist-info/METADATA +237 -0
- notionary-0.2.22.dist-info/RECORD +200 -0
- notionary/blocks/document/__init__.py +0 -7
- notionary/blocks/document/document_element.py +0 -102
- notionary/blocks/document/document_markdown_node.py +0 -31
- notionary/blocks/image/__init__.py +0 -7
- notionary/blocks/image/image_element.py +0 -151
- notionary/blocks/markdown_builder.py +0 -356
- notionary/blocks/mention/__init__.py +0 -7
- notionary/blocks/mention/mention_element.py +0 -229
- notionary/blocks/mention/mention_markdown_node.py +0 -38
- notionary/blocks/prompts/element_prompt_builder.py +0 -83
- notionary/blocks/prompts/element_prompt_content.py +0 -41
- notionary/blocks/shared/models.py +0 -713
- notionary/blocks/shared/notion_block_element.py +0 -37
- notionary/blocks/shared/text_inline_formatter.py +0 -262
- notionary/blocks/shared/text_inline_formatter_new.py +0 -139
- notionary/database/models/page_result.py +0 -10
- notionary/models/notion_block_response.py +0 -264
- notionary/models/notion_page_response.py +0 -78
- notionary/models/search_response.py +0 -0
- notionary/page/__init__.py +0 -0
- notionary/page/content/markdown_whitespace_processor.py +0 -80
- notionary/page/content/notion_text_length_utils.py +0 -87
- notionary/page/content/page_content_retriever.py +0 -60
- notionary/page/formatting/line_processor.py +0 -153
- notionary/page/formatting/markdown_to_notion_converter.py +0 -153
- notionary/page/markdown_syntax_prompt_generator.py +0 -114
- notionary/page/notion_to_markdown_converter.py +0 -179
- notionary/page/properites/property_value_extractor.py +0 -0
- notionary/user/notion_user_provider.py +0 -1
- notionary-0.2.19.dist-info/METADATA +0 -225
- notionary-0.2.19.dist-info/RECORD +0 -150
- /notionary/{blocks/document/document_models.py → markdown/___init__.py} +0 -0
- /notionary/{blocks/image/image_models.py → markdown/makdown_document_model.py} +0 -0
- /notionary/{blocks/mention/mention_models.py → page/reader/handler/equation_renderer.py} +0 -0
- /notionary/{blocks/shared/__init__.py → page/writer/markdown_to_notion_post_processor.py} +0 -0
- /notionary/{blocks/toggleable_heading/toggleable_heading_models.py → page/writer/markdown_to_notion_text_length_post_processor.py} +0 -0
- /notionary/{elements/__init__.py → util/concurrency_limiter.py} +0 -0
- {notionary-0.2.19.dist-info → notionary-0.2.22.dist-info}/LICENSE +0 -0
- {notionary-0.2.19.dist-info → notionary-0.2.22.dist-info}/WHEEL +0 -0
@@ -1,78 +0,0 @@
|
|
1
|
-
from typing import Literal, Optional, Dict, Any, Union
|
2
|
-
|
3
|
-
from pydantic import BaseModel
|
4
|
-
|
5
|
-
|
6
|
-
class User(BaseModel):
|
7
|
-
"""Represents a Notion user object."""
|
8
|
-
|
9
|
-
object: str
|
10
|
-
id: str
|
11
|
-
|
12
|
-
|
13
|
-
class ExternalFile(BaseModel):
|
14
|
-
"""Represents an external file, e.g., for cover images."""
|
15
|
-
|
16
|
-
url: str
|
17
|
-
|
18
|
-
|
19
|
-
class Cover(BaseModel):
|
20
|
-
"""Cover image for a Notion page."""
|
21
|
-
|
22
|
-
type: str
|
23
|
-
external: ExternalFile
|
24
|
-
|
25
|
-
|
26
|
-
class EmojiIcon(BaseModel):
|
27
|
-
type: Literal["emoji"]
|
28
|
-
emoji: str
|
29
|
-
|
30
|
-
|
31
|
-
class ExternalIcon(BaseModel):
|
32
|
-
type: Literal["external"]
|
33
|
-
external: ExternalFile
|
34
|
-
|
35
|
-
|
36
|
-
Icon = Union[EmojiIcon, ExternalIcon]
|
37
|
-
|
38
|
-
|
39
|
-
class DatabaseParent(BaseModel):
|
40
|
-
type: Literal["database_id"]
|
41
|
-
database_id: str
|
42
|
-
|
43
|
-
|
44
|
-
class PageParent(BaseModel):
|
45
|
-
type: Literal["page_id"]
|
46
|
-
page_id: str
|
47
|
-
|
48
|
-
|
49
|
-
class WorkspaceParent(BaseModel):
|
50
|
-
type: Literal["workspace"]
|
51
|
-
workspace: bool = True
|
52
|
-
|
53
|
-
|
54
|
-
Parent = Union[DatabaseParent, PageParent, WorkspaceParent]
|
55
|
-
|
56
|
-
|
57
|
-
class NotionPageResponse(BaseModel):
|
58
|
-
"""
|
59
|
-
Represents a full Notion page object as returned by the Notion API.
|
60
|
-
|
61
|
-
This structure is flexible and designed to work with different database schemas.
|
62
|
-
"""
|
63
|
-
|
64
|
-
object: str
|
65
|
-
id: str
|
66
|
-
created_time: str
|
67
|
-
last_edited_time: str
|
68
|
-
created_by: User
|
69
|
-
last_edited_by: User
|
70
|
-
cover: Optional[Cover]
|
71
|
-
icon: Optional[Icon]
|
72
|
-
parent: Parent
|
73
|
-
archived: bool
|
74
|
-
in_trash: bool
|
75
|
-
properties: Dict[str, Any]
|
76
|
-
url: str
|
77
|
-
public_url: Optional[str]
|
78
|
-
request_id: str
|
File without changes
|
notionary/page/__init__.py
DELETED
File without changes
|
@@ -1,80 +0,0 @@
|
|
1
|
-
class MarkdownWhitespaceProcessor:
|
2
|
-
"""Helper class for processing markdown whitespace."""
|
3
|
-
|
4
|
-
def __init__(self):
|
5
|
-
self.processed_lines = []
|
6
|
-
self.in_code_block = False
|
7
|
-
self.current_code_block = []
|
8
|
-
|
9
|
-
def process_lines(self, lines: list[str]) -> str:
|
10
|
-
"""Process all lines and return the processed markdown."""
|
11
|
-
self.processed_lines = []
|
12
|
-
self.in_code_block = False
|
13
|
-
self.current_code_block = []
|
14
|
-
|
15
|
-
for line in lines:
|
16
|
-
self._process_single_line(line)
|
17
|
-
|
18
|
-
# Handle unclosed code block
|
19
|
-
if self.in_code_block and self.current_code_block:
|
20
|
-
self._finish_code_block()
|
21
|
-
|
22
|
-
return "\n".join(self.processed_lines)
|
23
|
-
|
24
|
-
def _process_single_line(self, line: str) -> None:
|
25
|
-
"""Process a single line of markdown."""
|
26
|
-
if self._is_code_block_marker(line):
|
27
|
-
self._handle_code_block_marker(line)
|
28
|
-
return
|
29
|
-
|
30
|
-
if self.in_code_block:
|
31
|
-
self.current_code_block.append(line)
|
32
|
-
return
|
33
|
-
|
34
|
-
# Regular text - remove leading whitespace
|
35
|
-
self.processed_lines.append(line.lstrip())
|
36
|
-
|
37
|
-
def _handle_code_block_marker(self, line: str) -> None:
|
38
|
-
"""Handle code block start/end markers."""
|
39
|
-
if not self.in_code_block:
|
40
|
-
# Starting new code block
|
41
|
-
self.in_code_block = True
|
42
|
-
self.processed_lines.append(self._normalize_code_block_start(line))
|
43
|
-
self.current_code_block = []
|
44
|
-
else:
|
45
|
-
# Ending code block
|
46
|
-
self._finish_code_block()
|
47
|
-
|
48
|
-
def _finish_code_block(self) -> None:
|
49
|
-
"""Finish processing current code block."""
|
50
|
-
self.processed_lines.extend(
|
51
|
-
self._normalize_code_block_content(self.current_code_block)
|
52
|
-
)
|
53
|
-
self.processed_lines.append("```")
|
54
|
-
self.in_code_block = False
|
55
|
-
|
56
|
-
def _is_code_block_marker(self, line: str) -> bool:
|
57
|
-
"""Check if line is a code block marker."""
|
58
|
-
return line.lstrip().startswith("```")
|
59
|
-
|
60
|
-
def _normalize_code_block_start(self, line: str) -> str:
|
61
|
-
"""Normalize code block opening marker."""
|
62
|
-
language = line.lstrip().replace("```", "", 1).strip()
|
63
|
-
return "```" + language
|
64
|
-
|
65
|
-
def _normalize_code_block_content(self, code_lines: list[str]) -> list[str]:
|
66
|
-
"""Normalize code block indentation."""
|
67
|
-
if not code_lines:
|
68
|
-
return []
|
69
|
-
|
70
|
-
# Find minimum indentation from non-empty lines
|
71
|
-
non_empty_lines = [line for line in code_lines if line.strip()]
|
72
|
-
if not non_empty_lines:
|
73
|
-
return [""] * len(code_lines)
|
74
|
-
|
75
|
-
min_indent = min(len(line) - len(line.lstrip()) for line in non_empty_lines)
|
76
|
-
if min_indent == 0:
|
77
|
-
return code_lines
|
78
|
-
|
79
|
-
# Remove common indentation
|
80
|
-
return ["" if not line.strip() else line[min_indent:] for line in code_lines]
|
@@ -1,87 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Utility functions for handling Notion API text length limitations.
|
3
|
-
|
4
|
-
This module provides functions to fix text content that exceeds Notion's
|
5
|
-
rich_text character limit of 2000 characters per element.
|
6
|
-
|
7
|
-
Resolves API errors like:
|
8
|
-
"validation_error - body.children[79].toggle.children[2].paragraph.rich_text[0].text.content.length
|
9
|
-
should be ≤ 2000, instead was 2162."
|
10
|
-
"""
|
11
|
-
|
12
|
-
import re
|
13
|
-
import logging
|
14
|
-
from typing import Any
|
15
|
-
|
16
|
-
logger = logging.getLogger(__name__)
|
17
|
-
|
18
|
-
|
19
|
-
def fix_blocks_content_length(
|
20
|
-
blocks: list[dict[str, Any]], max_text_length: int = 1900
|
21
|
-
) -> list[dict[str, Any]]:
|
22
|
-
"""Check each block and ensure text content doesn't exceed Notion's limit."""
|
23
|
-
return [_fix_single_block_content(block, max_text_length) for block in blocks]
|
24
|
-
|
25
|
-
|
26
|
-
def _fix_single_block_content(
|
27
|
-
block: dict[str, Any], max_text_length: int
|
28
|
-
) -> dict[str, Any]:
|
29
|
-
"""Fix content length in a single block and its children recursively."""
|
30
|
-
block_copy = block.copy()
|
31
|
-
|
32
|
-
block_type = block.get("type")
|
33
|
-
if not block_type:
|
34
|
-
return block_copy
|
35
|
-
|
36
|
-
content = block.get(block_type)
|
37
|
-
if not content:
|
38
|
-
return block_copy
|
39
|
-
|
40
|
-
if "rich_text" in content:
|
41
|
-
_fix_rich_text_content(block_copy, block_type, content, max_text_length)
|
42
|
-
|
43
|
-
if "children" in content and content["children"]:
|
44
|
-
block_copy[block_type]["children"] = [
|
45
|
-
_fix_single_block_content(child, max_text_length)
|
46
|
-
for child in content["children"]
|
47
|
-
]
|
48
|
-
|
49
|
-
return block_copy
|
50
|
-
|
51
|
-
|
52
|
-
def _fix_rich_text_content(
|
53
|
-
block_copy: dict[str, Any],
|
54
|
-
block_type: str,
|
55
|
-
content: dict[str, Any],
|
56
|
-
max_text_length: int,
|
57
|
-
) -> None:
|
58
|
-
"""Fix rich text content that exceeds the length limit."""
|
59
|
-
rich_text = content["rich_text"]
|
60
|
-
for i, text_item in enumerate(rich_text):
|
61
|
-
if "text" not in text_item or "content" not in text_item["text"]:
|
62
|
-
continue
|
63
|
-
|
64
|
-
text_content = text_item["text"]["content"]
|
65
|
-
if len(text_content) <= max_text_length:
|
66
|
-
continue
|
67
|
-
|
68
|
-
logger.warning(
|
69
|
-
"Truncating text content from %d to %d chars",
|
70
|
-
len(text_content),
|
71
|
-
max_text_length,
|
72
|
-
)
|
73
|
-
block_copy[block_type]["rich_text"][i]["text"]["content"] = text_content[
|
74
|
-
:max_text_length
|
75
|
-
]
|
76
|
-
|
77
|
-
|
78
|
-
def split_to_paragraphs(markdown_text: str) -> list[str]:
|
79
|
-
"""Split markdown into paragraphs."""
|
80
|
-
paragraphs = re.split(r"\n\s*\n", markdown_text)
|
81
|
-
return [p for p in paragraphs if p.strip()]
|
82
|
-
|
83
|
-
|
84
|
-
def split_to_sentences(paragraph: str) -> list[str]:
|
85
|
-
"""Split a paragraph into sentences."""
|
86
|
-
sentences = re.split(r"(?<=[.!?])\s+", paragraph)
|
87
|
-
return [s for s in sentences if s.strip()]
|
@@ -1,60 +0,0 @@
|
|
1
|
-
import json
|
2
|
-
from typing import Any, Dict, Optional
|
3
|
-
|
4
|
-
from notionary.blocks.registry.block_registry import BlockRegistry
|
5
|
-
|
6
|
-
from notionary.blocks import NotionBlockClient
|
7
|
-
from notionary.blocks.shared.models import Block
|
8
|
-
from notionary.page.notion_to_markdown_converter import (
|
9
|
-
NotionToMarkdownConverter,
|
10
|
-
)
|
11
|
-
from notionary.util import LoggingMixin
|
12
|
-
|
13
|
-
|
14
|
-
class PageContentRetriever(LoggingMixin):
|
15
|
-
def __init__(
|
16
|
-
self,
|
17
|
-
page_id: str,
|
18
|
-
block_registry: BlockRegistry,
|
19
|
-
):
|
20
|
-
self.page_id = page_id
|
21
|
-
self._notion_to_markdown_converter = NotionToMarkdownConverter(
|
22
|
-
block_registry=block_registry
|
23
|
-
)
|
24
|
-
self.client = NotionBlockClient()
|
25
|
-
|
26
|
-
async def get_page_content(self) -> str:
|
27
|
-
blocks = await self._get_page_blocks_with_children()
|
28
|
-
|
29
|
-
# TODO: Fix this quick fix🧯 Quick-Fix: Konvertiere rekursive Block-Objekte in plain dicts
|
30
|
-
blocks_as_dicts = [block.model_dump(mode="python", exclude_unset=True) for block in blocks]
|
31
|
-
|
32
|
-
return self._notion_to_markdown_converter.convert(blocks_as_dicts)
|
33
|
-
|
34
|
-
async def _get_page_blocks_with_children(
|
35
|
-
self, parent_id: Optional[str] = None
|
36
|
-
) -> list[Block]:
|
37
|
-
response = (
|
38
|
-
await self.client.get_block_children(block_id=self.page_id)
|
39
|
-
if parent_id is None
|
40
|
-
else await self.client.get_block_children(parent_id)
|
41
|
-
)
|
42
|
-
|
43
|
-
if not response or not response.results:
|
44
|
-
return []
|
45
|
-
|
46
|
-
blocks = response.results
|
47
|
-
|
48
|
-
for block in blocks:
|
49
|
-
if not block.has_children:
|
50
|
-
continue
|
51
|
-
|
52
|
-
block_id = block.id
|
53
|
-
if not block_id:
|
54
|
-
continue
|
55
|
-
|
56
|
-
children = await self._get_page_blocks_with_children(block_id)
|
57
|
-
if children:
|
58
|
-
block.children = children
|
59
|
-
|
60
|
-
return blocks
|
@@ -1,153 +0,0 @@
|
|
1
|
-
import re
|
2
|
-
from notionary.blocks.shared.notion_block_element import NotionBlock
|
3
|
-
from notionary.blocks.registry.block_registry import BlockRegistry
|
4
|
-
|
5
|
-
|
6
|
-
class LineProcessingState:
|
7
|
-
"""Tracks state during line-by-line processing"""
|
8
|
-
|
9
|
-
def __init__(self):
|
10
|
-
self.paragraph_lines: list[str] = []
|
11
|
-
self.paragraph_start: int = 0
|
12
|
-
|
13
|
-
def add_to_paragraph(self, line: str, current_pos: int):
|
14
|
-
"""Add line to current paragraph"""
|
15
|
-
if not self.paragraph_lines:
|
16
|
-
self.paragraph_start = current_pos
|
17
|
-
self.paragraph_lines.append(line)
|
18
|
-
|
19
|
-
def reset_paragraph(self):
|
20
|
-
"""Reset paragraph state"""
|
21
|
-
self.paragraph_lines = []
|
22
|
-
self.paragraph_start = 0
|
23
|
-
|
24
|
-
def has_paragraph(self) -> bool:
|
25
|
-
"""Check if there are paragraph lines to process"""
|
26
|
-
return len(self.paragraph_lines) > 0
|
27
|
-
|
28
|
-
|
29
|
-
class LineProcessor:
|
30
|
-
"""Handles line-by-line processing of markdown text"""
|
31
|
-
|
32
|
-
def __init__(
|
33
|
-
self,
|
34
|
-
block_registry: BlockRegistry,
|
35
|
-
excluded_ranges: set[int],
|
36
|
-
pipe_pattern: str,
|
37
|
-
):
|
38
|
-
self._block_registry = block_registry
|
39
|
-
self._excluded_ranges = excluded_ranges
|
40
|
-
self._pipe_pattern = pipe_pattern
|
41
|
-
|
42
|
-
@staticmethod
|
43
|
-
def _normalize_to_list(result) -> list[dict[str, any]]:
|
44
|
-
"""Normalize Union[list[dict], dict] to list[dict]"""
|
45
|
-
if result is None:
|
46
|
-
return []
|
47
|
-
return result if isinstance(result, list) else [result]
|
48
|
-
|
49
|
-
def process_lines(self, text: str) -> list[tuple[int, int, dict[str, any]]]:
|
50
|
-
"""Process all lines and return blocks with positions"""
|
51
|
-
lines = text.split("\n")
|
52
|
-
line_blocks = []
|
53
|
-
|
54
|
-
state = LineProcessingState()
|
55
|
-
current_pos = 0
|
56
|
-
|
57
|
-
for line in lines:
|
58
|
-
line_length = len(line) + 1 # +1 for newline
|
59
|
-
line_end = current_pos + line_length - 1
|
60
|
-
|
61
|
-
if self._should_skip_line(line, current_pos, line_end):
|
62
|
-
current_pos += line_length
|
63
|
-
continue
|
64
|
-
|
65
|
-
self._process_single_line(line, current_pos, line_end, line_blocks, state)
|
66
|
-
current_pos += line_length
|
67
|
-
|
68
|
-
# Process any remaining paragraph
|
69
|
-
self._finalize_paragraph(state, current_pos, line_blocks)
|
70
|
-
|
71
|
-
return line_blocks
|
72
|
-
|
73
|
-
def _should_skip_line(self, line: str, current_pos: int, line_end: int) -> bool:
|
74
|
-
"""Check if line should be skipped (excluded or pipe syntax)"""
|
75
|
-
return self._overlaps_with_excluded(
|
76
|
-
current_pos, line_end
|
77
|
-
) or self._is_pipe_syntax_line(line)
|
78
|
-
|
79
|
-
def _overlaps_with_excluded(self, start_pos: int, end_pos: int) -> bool:
|
80
|
-
"""Check if position range overlaps with excluded ranges"""
|
81
|
-
return any(
|
82
|
-
pos in self._excluded_ranges for pos in range(start_pos, end_pos + 1)
|
83
|
-
)
|
84
|
-
|
85
|
-
def _is_pipe_syntax_line(self, line: str) -> bool:
|
86
|
-
"""Check if line uses pipe syntax for nested content"""
|
87
|
-
return bool(re.match(self._pipe_pattern, line))
|
88
|
-
|
89
|
-
def _process_single_line(
|
90
|
-
self,
|
91
|
-
line: str,
|
92
|
-
current_pos: int,
|
93
|
-
line_end: int,
|
94
|
-
line_blocks: list[tuple[int, int, dict[str, any]]],
|
95
|
-
state: LineProcessingState,
|
96
|
-
):
|
97
|
-
"""Process a single line of text"""
|
98
|
-
# Handle empty lines
|
99
|
-
if not line.strip():
|
100
|
-
self._finalize_paragraph(state, current_pos, line_blocks)
|
101
|
-
state.reset_paragraph()
|
102
|
-
return
|
103
|
-
|
104
|
-
# Handle special blocks (headings, todos, dividers, etc.)
|
105
|
-
special_blocks = self._extract_special_block(line)
|
106
|
-
if special_blocks:
|
107
|
-
self._finalize_paragraph(state, current_pos, line_blocks)
|
108
|
-
# Mehrere Blöcke hinzufügen
|
109
|
-
for block in special_blocks:
|
110
|
-
line_blocks.append((current_pos, line_end, block))
|
111
|
-
state.reset_paragraph()
|
112
|
-
return
|
113
|
-
|
114
|
-
# Add to current paragraph
|
115
|
-
state.add_to_paragraph(line, current_pos)
|
116
|
-
|
117
|
-
def _extract_special_block(self, line: str) -> list[NotionBlock]:
|
118
|
-
"""Extract special block (non-paragraph) from line"""
|
119
|
-
for element in (
|
120
|
-
element
|
121
|
-
for element in self._block_registry.get_elements()
|
122
|
-
if not element.is_multiline()
|
123
|
-
):
|
124
|
-
if not element.match_markdown(line):
|
125
|
-
continue
|
126
|
-
|
127
|
-
result = element.markdown_to_notion(line)
|
128
|
-
blocks = self._normalize_to_list(result)
|
129
|
-
if not blocks:
|
130
|
-
continue
|
131
|
-
|
132
|
-
# Gibt nur zurück, wenn mindestens ein Nicht-Paragraph-Block dabei ist
|
133
|
-
if any(block.get("type") != "paragraph" for block in blocks):
|
134
|
-
return blocks
|
135
|
-
|
136
|
-
return []
|
137
|
-
|
138
|
-
def _finalize_paragraph(
|
139
|
-
self,
|
140
|
-
state: LineProcessingState,
|
141
|
-
end_pos: int,
|
142
|
-
line_blocks: list[tuple[int, int, dict[str, any]]],
|
143
|
-
):
|
144
|
-
"""Convert current paragraph lines to paragraph block"""
|
145
|
-
if not state.has_paragraph():
|
146
|
-
return
|
147
|
-
|
148
|
-
paragraph_text = "\n".join(state.paragraph_lines)
|
149
|
-
result = self._block_registry.markdown_to_notion(paragraph_text)
|
150
|
-
blocks = self._normalize_to_list(result)
|
151
|
-
|
152
|
-
for block in blocks:
|
153
|
-
line_blocks.append((state.paragraph_start, end_pos, block))
|
@@ -1,153 +0,0 @@
|
|
1
|
-
from notionary.blocks import ColumnElement, BlockRegistry
|
2
|
-
from notionary.page.formatting.line_processor import LineProcessor
|
3
|
-
|
4
|
-
# TODO: Hier rekursiven Baum Parser verwenden!
|
5
|
-
class MarkdownToNotionConverter:
|
6
|
-
"""Clean converter focused on block identification and conversion"""
|
7
|
-
|
8
|
-
def __init__(self, block_registry: BlockRegistry):
|
9
|
-
self._block_registry = block_registry
|
10
|
-
self._pipe_content_pattern = r"^\|\s?(.*)$"
|
11
|
-
self._toggle_element_types = ["ToggleElement", "ToggleableHeadingElement"]
|
12
|
-
|
13
|
-
# Setup column element callback if available
|
14
|
-
if self._block_registry.contains(ColumnElement):
|
15
|
-
ColumnElement.set_converter_callback(self.convert)
|
16
|
-
|
17
|
-
def convert(self, markdown_text: str) -> list[dict[str, any]]:
|
18
|
-
"""Convert markdown text to Notion API block format"""
|
19
|
-
if not markdown_text:
|
20
|
-
return []
|
21
|
-
|
22
|
-
# Main conversion pipeline
|
23
|
-
blocks_with_positions = self._identify_all_blocks(markdown_text)
|
24
|
-
blocks_with_positions.sort(key=lambda x: x[0]) # Sort by position
|
25
|
-
|
26
|
-
# Flatten blocks (some elements return lists of blocks)
|
27
|
-
result = []
|
28
|
-
for _, _, block in blocks_with_positions:
|
29
|
-
if isinstance(block, list):
|
30
|
-
result.extend(block)
|
31
|
-
else:
|
32
|
-
result.append(block)
|
33
|
-
return result
|
34
|
-
|
35
|
-
def _identify_all_blocks(
|
36
|
-
self, markdown_text: str
|
37
|
-
) -> list[tuple[int, int, dict[str, any]]]:
|
38
|
-
"""Main block identification pipeline"""
|
39
|
-
all_blocks = []
|
40
|
-
|
41
|
-
# 1. Process complex multiline blocks first (toggles, etc.)
|
42
|
-
toggleable_blocks = self._find_toggleable_blocks(markdown_text)
|
43
|
-
all_blocks.extend(toggleable_blocks)
|
44
|
-
|
45
|
-
# 2. Process other multiline blocks
|
46
|
-
multiline_blocks = self._find_multiline_blocks(markdown_text, toggleable_blocks)
|
47
|
-
all_blocks.extend(multiline_blocks)
|
48
|
-
|
49
|
-
# 3. Process remaining text line by line
|
50
|
-
processed_blocks = toggleable_blocks + multiline_blocks
|
51
|
-
line_blocks = self._process_remaining_lines(markdown_text, processed_blocks)
|
52
|
-
all_blocks.extend(line_blocks)
|
53
|
-
|
54
|
-
return all_blocks
|
55
|
-
|
56
|
-
def _find_toggleable_blocks(
|
57
|
-
self, text: str
|
58
|
-
) -> list[tuple[int, int, dict[str, any]]]:
|
59
|
-
"""Find all toggleable blocks (Toggle and ToggleableHeading)"""
|
60
|
-
toggleable_elements = self._get_elements_by_type(
|
61
|
-
self._toggle_element_types, multiline_only=True
|
62
|
-
)
|
63
|
-
|
64
|
-
blocks = []
|
65
|
-
for element in toggleable_elements:
|
66
|
-
matches = element.find_matches(text, self.convert, context_aware=True)
|
67
|
-
if matches:
|
68
|
-
blocks.extend(matches)
|
69
|
-
|
70
|
-
return blocks
|
71
|
-
|
72
|
-
def _find_multiline_blocks(
|
73
|
-
self, text: str, exclude_blocks: list[tuple[int, int, dict[str, any]]]
|
74
|
-
) -> list[tuple[int, int, dict[str, any]]]:
|
75
|
-
"""Find all multiline blocks except toggleable ones"""
|
76
|
-
multiline_elements = [
|
77
|
-
element
|
78
|
-
for element in self._block_registry.get_multiline_elements()
|
79
|
-
if element.__name__ not in self._toggle_element_types
|
80
|
-
]
|
81
|
-
|
82
|
-
excluded_ranges = self._create_excluded_ranges(exclude_blocks)
|
83
|
-
|
84
|
-
blocks = []
|
85
|
-
for element in multiline_elements:
|
86
|
-
matches = element.find_matches(text)
|
87
|
-
|
88
|
-
for start_pos, end_pos, block in matches:
|
89
|
-
if not self._overlaps_with_ranges(start_pos, end_pos, excluded_ranges):
|
90
|
-
# Handle multiple blocks from single element
|
91
|
-
element_blocks = self._normalize_to_list(block)
|
92
|
-
|
93
|
-
current_pos = start_pos
|
94
|
-
for i, single_block in enumerate(element_blocks):
|
95
|
-
blocks.append((current_pos, end_pos, single_block))
|
96
|
-
# Increment position for subsequent blocks
|
97
|
-
current_pos = end_pos + i + 1
|
98
|
-
|
99
|
-
return blocks
|
100
|
-
|
101
|
-
def _process_remaining_lines(
|
102
|
-
self, text: str, exclude_blocks: list[tuple[int, int, dict[str, any]]]
|
103
|
-
) -> list[tuple[int, int, dict[str, any]]]:
|
104
|
-
"""Process text line by line, excluding already processed ranges"""
|
105
|
-
if not text:
|
106
|
-
return []
|
107
|
-
|
108
|
-
excluded_ranges = self._create_excluded_ranges(exclude_blocks)
|
109
|
-
processor = LineProcessor(
|
110
|
-
block_registry=self._block_registry,
|
111
|
-
excluded_ranges=excluded_ranges,
|
112
|
-
pipe_pattern=self._pipe_content_pattern,
|
113
|
-
)
|
114
|
-
|
115
|
-
return processor.process_lines(text)
|
116
|
-
|
117
|
-
def _get_elements_by_type(
|
118
|
-
self, type_names: list[str], multiline_only: bool = False
|
119
|
-
) -> list[any]:
|
120
|
-
"""Get elements from registry by type names"""
|
121
|
-
elements = (
|
122
|
-
self._block_registry.get_multiline_elements()
|
123
|
-
if multiline_only
|
124
|
-
else self._block_registry.get_elements()
|
125
|
-
)
|
126
|
-
|
127
|
-
return [
|
128
|
-
element
|
129
|
-
for element in elements
|
130
|
-
if element.__name__ in type_names and hasattr(element, "match_markdown")
|
131
|
-
]
|
132
|
-
|
133
|
-
def _create_excluded_ranges(
|
134
|
-
self, exclude_blocks: list[tuple[int, int, dict[str, any]]]
|
135
|
-
) -> set[int]:
|
136
|
-
"""Create set of excluded positions from block ranges"""
|
137
|
-
excluded_positions = set()
|
138
|
-
for start_pos, end_pos, _ in exclude_blocks:
|
139
|
-
excluded_positions.update(range(start_pos, end_pos + 1))
|
140
|
-
return excluded_positions
|
141
|
-
|
142
|
-
def _overlaps_with_ranges(
|
143
|
-
self, start_pos: int, end_pos: int, excluded_ranges: set[int]
|
144
|
-
) -> bool:
|
145
|
-
"""Check if a range overlaps with excluded positions"""
|
146
|
-
return any(pos in excluded_ranges for pos in range(start_pos, end_pos + 1))
|
147
|
-
|
148
|
-
@staticmethod
|
149
|
-
def _normalize_to_list(result) -> list[dict[str, any]]:
|
150
|
-
"""Normalize Union[list[dict], dict] to list[dict]"""
|
151
|
-
if result is None:
|
152
|
-
return []
|
153
|
-
return result if isinstance(result, list) else [result]
|