notionary 0.2.19__py3-none-any.whl → 0.2.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. notionary/__init__.py +8 -4
  2. notionary/base_notion_client.py +3 -1
  3. notionary/blocks/__init__.py +2 -91
  4. notionary/blocks/_bootstrap.py +271 -0
  5. notionary/blocks/audio/__init__.py +8 -2
  6. notionary/blocks/audio/audio_element.py +69 -106
  7. notionary/blocks/audio/audio_markdown_node.py +13 -5
  8. notionary/blocks/audio/audio_models.py +6 -55
  9. notionary/blocks/base_block_element.py +42 -0
  10. notionary/blocks/bookmark/__init__.py +9 -2
  11. notionary/blocks/bookmark/bookmark_element.py +49 -139
  12. notionary/blocks/bookmark/bookmark_markdown_node.py +19 -18
  13. notionary/blocks/bookmark/bookmark_models.py +15 -0
  14. notionary/blocks/breadcrumbs/__init__.py +17 -0
  15. notionary/blocks/breadcrumbs/breadcrumb_element.py +39 -0
  16. notionary/blocks/breadcrumbs/breadcrumb_markdown_node.py +32 -0
  17. notionary/blocks/breadcrumbs/breadcrumb_models.py +12 -0
  18. notionary/blocks/bulleted_list/__init__.py +12 -2
  19. notionary/blocks/bulleted_list/bulleted_list_element.py +55 -53
  20. notionary/blocks/bulleted_list/bulleted_list_markdown_node.py +2 -1
  21. notionary/blocks/bulleted_list/bulleted_list_models.py +18 -0
  22. notionary/blocks/callout/__init__.py +9 -2
  23. notionary/blocks/callout/callout_element.py +53 -86
  24. notionary/blocks/callout/callout_markdown_node.py +3 -1
  25. notionary/blocks/callout/callout_models.py +33 -0
  26. notionary/blocks/child_database/__init__.py +14 -0
  27. notionary/blocks/child_database/child_database_element.py +61 -0
  28. notionary/blocks/child_database/child_database_models.py +12 -0
  29. notionary/blocks/child_page/__init__.py +9 -0
  30. notionary/blocks/child_page/child_page_element.py +94 -0
  31. notionary/blocks/child_page/child_page_models.py +12 -0
  32. notionary/blocks/{shared/block_client.py → client.py} +54 -54
  33. notionary/blocks/code/__init__.py +6 -2
  34. notionary/blocks/code/code_element.py +96 -181
  35. notionary/blocks/code/code_markdown_node.py +64 -13
  36. notionary/blocks/code/code_models.py +94 -0
  37. notionary/blocks/column/__init__.py +25 -1
  38. notionary/blocks/column/column_element.py +44 -312
  39. notionary/blocks/column/column_list_element.py +52 -0
  40. notionary/blocks/column/column_list_markdown_node.py +50 -0
  41. notionary/blocks/column/column_markdown_node.py +59 -0
  42. notionary/blocks/column/column_models.py +26 -0
  43. notionary/blocks/divider/__init__.py +9 -2
  44. notionary/blocks/divider/divider_element.py +18 -49
  45. notionary/blocks/divider/divider_markdown_node.py +2 -1
  46. notionary/blocks/divider/divider_models.py +12 -0
  47. notionary/blocks/embed/__init__.py +9 -2
  48. notionary/blocks/embed/embed_element.py +65 -111
  49. notionary/blocks/embed/embed_markdown_node.py +3 -1
  50. notionary/blocks/embed/embed_models.py +14 -0
  51. notionary/blocks/equation/__init__.py +14 -0
  52. notionary/blocks/equation/equation_element.py +133 -0
  53. notionary/blocks/equation/equation_element_markdown_node.py +35 -0
  54. notionary/blocks/equation/equation_models.py +11 -0
  55. notionary/blocks/file/__init__.py +25 -0
  56. notionary/blocks/file/file_element.py +112 -0
  57. notionary/blocks/file/file_element_markdown_node.py +37 -0
  58. notionary/blocks/file/file_element_models.py +39 -0
  59. notionary/blocks/guards.py +22 -0
  60. notionary/blocks/heading/__init__.py +16 -2
  61. notionary/blocks/heading/heading_element.py +83 -69
  62. notionary/blocks/heading/heading_markdown_node.py +2 -1
  63. notionary/blocks/heading/heading_models.py +29 -0
  64. notionary/blocks/image_block/__init__.py +13 -0
  65. notionary/blocks/image_block/image_element.py +89 -0
  66. notionary/blocks/{image → image_block}/image_markdown_node.py +13 -6
  67. notionary/blocks/image_block/image_models.py +10 -0
  68. notionary/blocks/mixins/captions/__init__.py +4 -0
  69. notionary/blocks/mixins/captions/caption_markdown_node_mixin.py +31 -0
  70. notionary/blocks/mixins/captions/caption_mixin.py +92 -0
  71. notionary/blocks/models.py +174 -0
  72. notionary/blocks/numbered_list/__init__.py +12 -2
  73. notionary/blocks/numbered_list/numbered_list_element.py +48 -56
  74. notionary/blocks/numbered_list/numbered_list_markdown_node.py +3 -1
  75. notionary/blocks/numbered_list/numbered_list_models.py +17 -0
  76. notionary/blocks/paragraph/__init__.py +12 -2
  77. notionary/blocks/paragraph/paragraph_element.py +40 -66
  78. notionary/blocks/paragraph/paragraph_markdown_node.py +2 -1
  79. notionary/blocks/paragraph/paragraph_models.py +16 -0
  80. notionary/blocks/pdf/__init__.py +13 -0
  81. notionary/blocks/pdf/pdf_element.py +97 -0
  82. notionary/blocks/pdf/pdf_markdown_node.py +37 -0
  83. notionary/blocks/pdf/pdf_models.py +11 -0
  84. notionary/blocks/quote/__init__.py +11 -2
  85. notionary/blocks/quote/quote_element.py +45 -62
  86. notionary/blocks/quote/quote_markdown_node.py +6 -3
  87. notionary/blocks/quote/quote_models.py +18 -0
  88. notionary/blocks/registry/__init__.py +4 -0
  89. notionary/blocks/registry/block_registry.py +60 -121
  90. notionary/blocks/registry/block_registry_builder.py +115 -59
  91. notionary/blocks/rich_text/__init__.py +33 -0
  92. notionary/blocks/rich_text/name_to_id_resolver.py +205 -0
  93. notionary/blocks/rich_text/rich_text_models.py +221 -0
  94. notionary/blocks/rich_text/text_inline_formatter.py +456 -0
  95. notionary/blocks/syntax_prompt_builder.py +137 -0
  96. notionary/blocks/table/__init__.py +16 -2
  97. notionary/blocks/table/table_element.py +136 -228
  98. notionary/blocks/table/table_markdown_node.py +2 -1
  99. notionary/blocks/table/table_models.py +28 -0
  100. notionary/blocks/table_of_contents/__init__.py +19 -0
  101. notionary/blocks/table_of_contents/table_of_contents_element.py +68 -0
  102. notionary/blocks/table_of_contents/table_of_contents_markdown_node.py +35 -0
  103. notionary/blocks/table_of_contents/table_of_contents_models.py +18 -0
  104. notionary/blocks/todo/__init__.py +9 -2
  105. notionary/blocks/todo/todo_element.py +52 -92
  106. notionary/blocks/todo/todo_markdown_node.py +2 -1
  107. notionary/blocks/todo/todo_models.py +19 -0
  108. notionary/blocks/toggle/__init__.py +13 -3
  109. notionary/blocks/toggle/toggle_element.py +69 -260
  110. notionary/blocks/toggle/toggle_markdown_node.py +25 -15
  111. notionary/blocks/toggle/toggle_models.py +17 -0
  112. notionary/blocks/toggleable_heading/__init__.py +6 -2
  113. notionary/blocks/toggleable_heading/toggleable_heading_element.py +86 -241
  114. notionary/blocks/toggleable_heading/toggleable_heading_markdown_node.py +26 -18
  115. notionary/blocks/types.py +130 -0
  116. notionary/blocks/video/__init__.py +8 -2
  117. notionary/blocks/video/video_element.py +70 -141
  118. notionary/blocks/video/video_element_models.py +10 -0
  119. notionary/blocks/video/video_markdown_node.py +13 -6
  120. notionary/database/client.py +26 -8
  121. notionary/database/database.py +13 -14
  122. notionary/database/database_filter_builder.py +2 -2
  123. notionary/database/database_provider.py +5 -4
  124. notionary/database/models.py +337 -0
  125. notionary/database/notion_database.py +6 -7
  126. notionary/file_upload/client.py +5 -7
  127. notionary/file_upload/models.py +3 -2
  128. notionary/file_upload/notion_file_upload.py +2 -3
  129. notionary/markdown/markdown_builder.py +729 -0
  130. notionary/markdown/markdown_document_model.py +228 -0
  131. notionary/{blocks → markdown}/markdown_node.py +1 -0
  132. notionary/models/notion_database_response.py +0 -338
  133. notionary/page/client.py +34 -15
  134. notionary/page/models.py +327 -0
  135. notionary/page/notion_page.py +136 -58
  136. notionary/page/{content/page_content_writer.py → page_content_deleting_service.py} +25 -59
  137. notionary/page/page_content_writer.py +177 -0
  138. notionary/page/page_context.py +65 -0
  139. notionary/page/reader/handler/__init__.py +19 -0
  140. notionary/page/reader/handler/base_block_renderer.py +44 -0
  141. notionary/page/reader/handler/block_processing_context.py +35 -0
  142. notionary/page/reader/handler/block_rendering_context.py +48 -0
  143. notionary/page/reader/handler/column_list_renderer.py +51 -0
  144. notionary/page/reader/handler/column_renderer.py +60 -0
  145. notionary/page/reader/handler/line_renderer.py +73 -0
  146. notionary/page/reader/handler/numbered_list_renderer.py +85 -0
  147. notionary/page/reader/handler/toggle_renderer.py +69 -0
  148. notionary/page/reader/handler/toggleable_heading_renderer.py +89 -0
  149. notionary/page/reader/page_content_retriever.py +81 -0
  150. notionary/page/search_filter_builder.py +2 -1
  151. notionary/page/writer/handler/__init__.py +24 -0
  152. notionary/page/writer/handler/code_handler.py +72 -0
  153. notionary/page/writer/handler/column_handler.py +141 -0
  154. notionary/page/writer/handler/column_list_handler.py +139 -0
  155. notionary/page/writer/handler/equation_handler.py +74 -0
  156. notionary/page/writer/handler/line_handler.py +35 -0
  157. notionary/page/writer/handler/line_processing_context.py +54 -0
  158. notionary/page/writer/handler/regular_line_handler.py +86 -0
  159. notionary/page/writer/handler/table_handler.py +66 -0
  160. notionary/page/writer/handler/toggle_handler.py +155 -0
  161. notionary/page/writer/handler/toggleable_heading_handler.py +173 -0
  162. notionary/page/writer/markdown_to_notion_converter.py +95 -0
  163. notionary/page/writer/markdown_to_notion_converter_context.py +30 -0
  164. notionary/page/writer/markdown_to_notion_formatting_post_processor.py +73 -0
  165. notionary/page/writer/notion_text_length_processor.py +150 -0
  166. notionary/telemetry/__init__.py +2 -2
  167. notionary/telemetry/service.py +3 -3
  168. notionary/user/__init__.py +2 -2
  169. notionary/user/base_notion_user.py +2 -1
  170. notionary/user/client.py +2 -3
  171. notionary/user/models.py +1 -0
  172. notionary/user/notion_bot_user.py +4 -5
  173. notionary/user/notion_user.py +3 -4
  174. notionary/user/notion_user_manager.py +23 -95
  175. notionary/util/__init__.py +3 -2
  176. notionary/util/fuzzy.py +2 -1
  177. notionary/util/logging_mixin.py +2 -2
  178. notionary/util/singleton_metaclass.py +1 -1
  179. notionary/workspace.py +6 -5
  180. notionary-0.2.22.dist-info/METADATA +237 -0
  181. notionary-0.2.22.dist-info/RECORD +200 -0
  182. notionary/blocks/document/__init__.py +0 -7
  183. notionary/blocks/document/document_element.py +0 -102
  184. notionary/blocks/document/document_markdown_node.py +0 -31
  185. notionary/blocks/image/__init__.py +0 -7
  186. notionary/blocks/image/image_element.py +0 -151
  187. notionary/blocks/markdown_builder.py +0 -356
  188. notionary/blocks/mention/__init__.py +0 -7
  189. notionary/blocks/mention/mention_element.py +0 -229
  190. notionary/blocks/mention/mention_markdown_node.py +0 -38
  191. notionary/blocks/prompts/element_prompt_builder.py +0 -83
  192. notionary/blocks/prompts/element_prompt_content.py +0 -41
  193. notionary/blocks/shared/models.py +0 -713
  194. notionary/blocks/shared/notion_block_element.py +0 -37
  195. notionary/blocks/shared/text_inline_formatter.py +0 -262
  196. notionary/blocks/shared/text_inline_formatter_new.py +0 -139
  197. notionary/database/models/page_result.py +0 -10
  198. notionary/models/notion_block_response.py +0 -264
  199. notionary/models/notion_page_response.py +0 -78
  200. notionary/models/search_response.py +0 -0
  201. notionary/page/__init__.py +0 -0
  202. notionary/page/content/markdown_whitespace_processor.py +0 -80
  203. notionary/page/content/notion_text_length_utils.py +0 -87
  204. notionary/page/content/page_content_retriever.py +0 -60
  205. notionary/page/formatting/line_processor.py +0 -153
  206. notionary/page/formatting/markdown_to_notion_converter.py +0 -153
  207. notionary/page/markdown_syntax_prompt_generator.py +0 -114
  208. notionary/page/notion_to_markdown_converter.py +0 -179
  209. notionary/page/properites/property_value_extractor.py +0 -0
  210. notionary/user/notion_user_provider.py +0 -1
  211. notionary-0.2.19.dist-info/METADATA +0 -225
  212. notionary-0.2.19.dist-info/RECORD +0 -150
  213. /notionary/{blocks/document/document_models.py → markdown/___init__.py} +0 -0
  214. /notionary/{blocks/image/image_models.py → markdown/makdown_document_model.py} +0 -0
  215. /notionary/{blocks/mention/mention_models.py → page/reader/handler/equation_renderer.py} +0 -0
  216. /notionary/{blocks/shared/__init__.py → page/writer/markdown_to_notion_post_processor.py} +0 -0
  217. /notionary/{blocks/toggleable_heading/toggleable_heading_models.py → page/writer/markdown_to_notion_text_length_post_processor.py} +0 -0
  218. /notionary/{elements/__init__.py → util/concurrency_limiter.py} +0 -0
  219. {notionary-0.2.19.dist-info → notionary-0.2.22.dist-info}/LICENSE +0 -0
  220. {notionary-0.2.19.dist-info → notionary-0.2.22.dist-info}/WHEEL +0 -0
@@ -1,78 +0,0 @@
1
- from typing import Literal, Optional, Dict, Any, Union
2
-
3
- from pydantic import BaseModel
4
-
5
-
6
- class User(BaseModel):
7
- """Represents a Notion user object."""
8
-
9
- object: str
10
- id: str
11
-
12
-
13
- class ExternalFile(BaseModel):
14
- """Represents an external file, e.g., for cover images."""
15
-
16
- url: str
17
-
18
-
19
- class Cover(BaseModel):
20
- """Cover image for a Notion page."""
21
-
22
- type: str
23
- external: ExternalFile
24
-
25
-
26
- class EmojiIcon(BaseModel):
27
- type: Literal["emoji"]
28
- emoji: str
29
-
30
-
31
- class ExternalIcon(BaseModel):
32
- type: Literal["external"]
33
- external: ExternalFile
34
-
35
-
36
- Icon = Union[EmojiIcon, ExternalIcon]
37
-
38
-
39
- class DatabaseParent(BaseModel):
40
- type: Literal["database_id"]
41
- database_id: str
42
-
43
-
44
- class PageParent(BaseModel):
45
- type: Literal["page_id"]
46
- page_id: str
47
-
48
-
49
- class WorkspaceParent(BaseModel):
50
- type: Literal["workspace"]
51
- workspace: bool = True
52
-
53
-
54
- Parent = Union[DatabaseParent, PageParent, WorkspaceParent]
55
-
56
-
57
- class NotionPageResponse(BaseModel):
58
- """
59
- Represents a full Notion page object as returned by the Notion API.
60
-
61
- This structure is flexible and designed to work with different database schemas.
62
- """
63
-
64
- object: str
65
- id: str
66
- created_time: str
67
- last_edited_time: str
68
- created_by: User
69
- last_edited_by: User
70
- cover: Optional[Cover]
71
- icon: Optional[Icon]
72
- parent: Parent
73
- archived: bool
74
- in_trash: bool
75
- properties: Dict[str, Any]
76
- url: str
77
- public_url: Optional[str]
78
- request_id: str
File without changes
File without changes
@@ -1,80 +0,0 @@
1
- class MarkdownWhitespaceProcessor:
2
- """Helper class for processing markdown whitespace."""
3
-
4
- def __init__(self):
5
- self.processed_lines = []
6
- self.in_code_block = False
7
- self.current_code_block = []
8
-
9
- def process_lines(self, lines: list[str]) -> str:
10
- """Process all lines and return the processed markdown."""
11
- self.processed_lines = []
12
- self.in_code_block = False
13
- self.current_code_block = []
14
-
15
- for line in lines:
16
- self._process_single_line(line)
17
-
18
- # Handle unclosed code block
19
- if self.in_code_block and self.current_code_block:
20
- self._finish_code_block()
21
-
22
- return "\n".join(self.processed_lines)
23
-
24
- def _process_single_line(self, line: str) -> None:
25
- """Process a single line of markdown."""
26
- if self._is_code_block_marker(line):
27
- self._handle_code_block_marker(line)
28
- return
29
-
30
- if self.in_code_block:
31
- self.current_code_block.append(line)
32
- return
33
-
34
- # Regular text - remove leading whitespace
35
- self.processed_lines.append(line.lstrip())
36
-
37
- def _handle_code_block_marker(self, line: str) -> None:
38
- """Handle code block start/end markers."""
39
- if not self.in_code_block:
40
- # Starting new code block
41
- self.in_code_block = True
42
- self.processed_lines.append(self._normalize_code_block_start(line))
43
- self.current_code_block = []
44
- else:
45
- # Ending code block
46
- self._finish_code_block()
47
-
48
- def _finish_code_block(self) -> None:
49
- """Finish processing current code block."""
50
- self.processed_lines.extend(
51
- self._normalize_code_block_content(self.current_code_block)
52
- )
53
- self.processed_lines.append("```")
54
- self.in_code_block = False
55
-
56
- def _is_code_block_marker(self, line: str) -> bool:
57
- """Check if line is a code block marker."""
58
- return line.lstrip().startswith("```")
59
-
60
- def _normalize_code_block_start(self, line: str) -> str:
61
- """Normalize code block opening marker."""
62
- language = line.lstrip().replace("```", "", 1).strip()
63
- return "```" + language
64
-
65
- def _normalize_code_block_content(self, code_lines: list[str]) -> list[str]:
66
- """Normalize code block indentation."""
67
- if not code_lines:
68
- return []
69
-
70
- # Find minimum indentation from non-empty lines
71
- non_empty_lines = [line for line in code_lines if line.strip()]
72
- if not non_empty_lines:
73
- return [""] * len(code_lines)
74
-
75
- min_indent = min(len(line) - len(line.lstrip()) for line in non_empty_lines)
76
- if min_indent == 0:
77
- return code_lines
78
-
79
- # Remove common indentation
80
- return ["" if not line.strip() else line[min_indent:] for line in code_lines]
@@ -1,87 +0,0 @@
1
- """
2
- Utility functions for handling Notion API text length limitations.
3
-
4
- This module provides functions to fix text content that exceeds Notion's
5
- rich_text character limit of 2000 characters per element.
6
-
7
- Resolves API errors like:
8
- "validation_error - body.children[79].toggle.children[2].paragraph.rich_text[0].text.content.length
9
- should be ≤ 2000, instead was 2162."
10
- """
11
-
12
- import re
13
- import logging
14
- from typing import Any
15
-
16
- logger = logging.getLogger(__name__)
17
-
18
-
19
- def fix_blocks_content_length(
20
- blocks: list[dict[str, Any]], max_text_length: int = 1900
21
- ) -> list[dict[str, Any]]:
22
- """Check each block and ensure text content doesn't exceed Notion's limit."""
23
- return [_fix_single_block_content(block, max_text_length) for block in blocks]
24
-
25
-
26
- def _fix_single_block_content(
27
- block: dict[str, Any], max_text_length: int
28
- ) -> dict[str, Any]:
29
- """Fix content length in a single block and its children recursively."""
30
- block_copy = block.copy()
31
-
32
- block_type = block.get("type")
33
- if not block_type:
34
- return block_copy
35
-
36
- content = block.get(block_type)
37
- if not content:
38
- return block_copy
39
-
40
- if "rich_text" in content:
41
- _fix_rich_text_content(block_copy, block_type, content, max_text_length)
42
-
43
- if "children" in content and content["children"]:
44
- block_copy[block_type]["children"] = [
45
- _fix_single_block_content(child, max_text_length)
46
- for child in content["children"]
47
- ]
48
-
49
- return block_copy
50
-
51
-
52
- def _fix_rich_text_content(
53
- block_copy: dict[str, Any],
54
- block_type: str,
55
- content: dict[str, Any],
56
- max_text_length: int,
57
- ) -> None:
58
- """Fix rich text content that exceeds the length limit."""
59
- rich_text = content["rich_text"]
60
- for i, text_item in enumerate(rich_text):
61
- if "text" not in text_item or "content" not in text_item["text"]:
62
- continue
63
-
64
- text_content = text_item["text"]["content"]
65
- if len(text_content) <= max_text_length:
66
- continue
67
-
68
- logger.warning(
69
- "Truncating text content from %d to %d chars",
70
- len(text_content),
71
- max_text_length,
72
- )
73
- block_copy[block_type]["rich_text"][i]["text"]["content"] = text_content[
74
- :max_text_length
75
- ]
76
-
77
-
78
- def split_to_paragraphs(markdown_text: str) -> list[str]:
79
- """Split markdown into paragraphs."""
80
- paragraphs = re.split(r"\n\s*\n", markdown_text)
81
- return [p for p in paragraphs if p.strip()]
82
-
83
-
84
- def split_to_sentences(paragraph: str) -> list[str]:
85
- """Split a paragraph into sentences."""
86
- sentences = re.split(r"(?<=[.!?])\s+", paragraph)
87
- return [s for s in sentences if s.strip()]
@@ -1,60 +0,0 @@
1
- import json
2
- from typing import Any, Dict, Optional
3
-
4
- from notionary.blocks.registry.block_registry import BlockRegistry
5
-
6
- from notionary.blocks import NotionBlockClient
7
- from notionary.blocks.shared.models import Block
8
- from notionary.page.notion_to_markdown_converter import (
9
- NotionToMarkdownConverter,
10
- )
11
- from notionary.util import LoggingMixin
12
-
13
-
14
- class PageContentRetriever(LoggingMixin):
15
- def __init__(
16
- self,
17
- page_id: str,
18
- block_registry: BlockRegistry,
19
- ):
20
- self.page_id = page_id
21
- self._notion_to_markdown_converter = NotionToMarkdownConverter(
22
- block_registry=block_registry
23
- )
24
- self.client = NotionBlockClient()
25
-
26
- async def get_page_content(self) -> str:
27
- blocks = await self._get_page_blocks_with_children()
28
-
29
- # TODO: Fix this quick fix🧯 Quick-Fix: Konvertiere rekursive Block-Objekte in plain dicts
30
- blocks_as_dicts = [block.model_dump(mode="python", exclude_unset=True) for block in blocks]
31
-
32
- return self._notion_to_markdown_converter.convert(blocks_as_dicts)
33
-
34
- async def _get_page_blocks_with_children(
35
- self, parent_id: Optional[str] = None
36
- ) -> list[Block]:
37
- response = (
38
- await self.client.get_block_children(block_id=self.page_id)
39
- if parent_id is None
40
- else await self.client.get_block_children(parent_id)
41
- )
42
-
43
- if not response or not response.results:
44
- return []
45
-
46
- blocks = response.results
47
-
48
- for block in blocks:
49
- if not block.has_children:
50
- continue
51
-
52
- block_id = block.id
53
- if not block_id:
54
- continue
55
-
56
- children = await self._get_page_blocks_with_children(block_id)
57
- if children:
58
- block.children = children
59
-
60
- return blocks
@@ -1,153 +0,0 @@
1
- import re
2
- from notionary.blocks.shared.notion_block_element import NotionBlock
3
- from notionary.blocks.registry.block_registry import BlockRegistry
4
-
5
-
6
- class LineProcessingState:
7
- """Tracks state during line-by-line processing"""
8
-
9
- def __init__(self):
10
- self.paragraph_lines: list[str] = []
11
- self.paragraph_start: int = 0
12
-
13
- def add_to_paragraph(self, line: str, current_pos: int):
14
- """Add line to current paragraph"""
15
- if not self.paragraph_lines:
16
- self.paragraph_start = current_pos
17
- self.paragraph_lines.append(line)
18
-
19
- def reset_paragraph(self):
20
- """Reset paragraph state"""
21
- self.paragraph_lines = []
22
- self.paragraph_start = 0
23
-
24
- def has_paragraph(self) -> bool:
25
- """Check if there are paragraph lines to process"""
26
- return len(self.paragraph_lines) > 0
27
-
28
-
29
- class LineProcessor:
30
- """Handles line-by-line processing of markdown text"""
31
-
32
- def __init__(
33
- self,
34
- block_registry: BlockRegistry,
35
- excluded_ranges: set[int],
36
- pipe_pattern: str,
37
- ):
38
- self._block_registry = block_registry
39
- self._excluded_ranges = excluded_ranges
40
- self._pipe_pattern = pipe_pattern
41
-
42
- @staticmethod
43
- def _normalize_to_list(result) -> list[dict[str, any]]:
44
- """Normalize Union[list[dict], dict] to list[dict]"""
45
- if result is None:
46
- return []
47
- return result if isinstance(result, list) else [result]
48
-
49
- def process_lines(self, text: str) -> list[tuple[int, int, dict[str, any]]]:
50
- """Process all lines and return blocks with positions"""
51
- lines = text.split("\n")
52
- line_blocks = []
53
-
54
- state = LineProcessingState()
55
- current_pos = 0
56
-
57
- for line in lines:
58
- line_length = len(line) + 1 # +1 for newline
59
- line_end = current_pos + line_length - 1
60
-
61
- if self._should_skip_line(line, current_pos, line_end):
62
- current_pos += line_length
63
- continue
64
-
65
- self._process_single_line(line, current_pos, line_end, line_blocks, state)
66
- current_pos += line_length
67
-
68
- # Process any remaining paragraph
69
- self._finalize_paragraph(state, current_pos, line_blocks)
70
-
71
- return line_blocks
72
-
73
- def _should_skip_line(self, line: str, current_pos: int, line_end: int) -> bool:
74
- """Check if line should be skipped (excluded or pipe syntax)"""
75
- return self._overlaps_with_excluded(
76
- current_pos, line_end
77
- ) or self._is_pipe_syntax_line(line)
78
-
79
- def _overlaps_with_excluded(self, start_pos: int, end_pos: int) -> bool:
80
- """Check if position range overlaps with excluded ranges"""
81
- return any(
82
- pos in self._excluded_ranges for pos in range(start_pos, end_pos + 1)
83
- )
84
-
85
- def _is_pipe_syntax_line(self, line: str) -> bool:
86
- """Check if line uses pipe syntax for nested content"""
87
- return bool(re.match(self._pipe_pattern, line))
88
-
89
- def _process_single_line(
90
- self,
91
- line: str,
92
- current_pos: int,
93
- line_end: int,
94
- line_blocks: list[tuple[int, int, dict[str, any]]],
95
- state: LineProcessingState,
96
- ):
97
- """Process a single line of text"""
98
- # Handle empty lines
99
- if not line.strip():
100
- self._finalize_paragraph(state, current_pos, line_blocks)
101
- state.reset_paragraph()
102
- return
103
-
104
- # Handle special blocks (headings, todos, dividers, etc.)
105
- special_blocks = self._extract_special_block(line)
106
- if special_blocks:
107
- self._finalize_paragraph(state, current_pos, line_blocks)
108
- # Mehrere Blöcke hinzufügen
109
- for block in special_blocks:
110
- line_blocks.append((current_pos, line_end, block))
111
- state.reset_paragraph()
112
- return
113
-
114
- # Add to current paragraph
115
- state.add_to_paragraph(line, current_pos)
116
-
117
- def _extract_special_block(self, line: str) -> list[NotionBlock]:
118
- """Extract special block (non-paragraph) from line"""
119
- for element in (
120
- element
121
- for element in self._block_registry.get_elements()
122
- if not element.is_multiline()
123
- ):
124
- if not element.match_markdown(line):
125
- continue
126
-
127
- result = element.markdown_to_notion(line)
128
- blocks = self._normalize_to_list(result)
129
- if not blocks:
130
- continue
131
-
132
- # Gibt nur zurück, wenn mindestens ein Nicht-Paragraph-Block dabei ist
133
- if any(block.get("type") != "paragraph" for block in blocks):
134
- return blocks
135
-
136
- return []
137
-
138
- def _finalize_paragraph(
139
- self,
140
- state: LineProcessingState,
141
- end_pos: int,
142
- line_blocks: list[tuple[int, int, dict[str, any]]],
143
- ):
144
- """Convert current paragraph lines to paragraph block"""
145
- if not state.has_paragraph():
146
- return
147
-
148
- paragraph_text = "\n".join(state.paragraph_lines)
149
- result = self._block_registry.markdown_to_notion(paragraph_text)
150
- blocks = self._normalize_to_list(result)
151
-
152
- for block in blocks:
153
- line_blocks.append((state.paragraph_start, end_pos, block))
@@ -1,153 +0,0 @@
1
- from notionary.blocks import ColumnElement, BlockRegistry
2
- from notionary.page.formatting.line_processor import LineProcessor
3
-
4
- # TODO: Hier rekursiven Baum Parser verwenden!
5
- class MarkdownToNotionConverter:
6
- """Clean converter focused on block identification and conversion"""
7
-
8
- def __init__(self, block_registry: BlockRegistry):
9
- self._block_registry = block_registry
10
- self._pipe_content_pattern = r"^\|\s?(.*)$"
11
- self._toggle_element_types = ["ToggleElement", "ToggleableHeadingElement"]
12
-
13
- # Setup column element callback if available
14
- if self._block_registry.contains(ColumnElement):
15
- ColumnElement.set_converter_callback(self.convert)
16
-
17
- def convert(self, markdown_text: str) -> list[dict[str, any]]:
18
- """Convert markdown text to Notion API block format"""
19
- if not markdown_text:
20
- return []
21
-
22
- # Main conversion pipeline
23
- blocks_with_positions = self._identify_all_blocks(markdown_text)
24
- blocks_with_positions.sort(key=lambda x: x[0]) # Sort by position
25
-
26
- # Flatten blocks (some elements return lists of blocks)
27
- result = []
28
- for _, _, block in blocks_with_positions:
29
- if isinstance(block, list):
30
- result.extend(block)
31
- else:
32
- result.append(block)
33
- return result
34
-
35
- def _identify_all_blocks(
36
- self, markdown_text: str
37
- ) -> list[tuple[int, int, dict[str, any]]]:
38
- """Main block identification pipeline"""
39
- all_blocks = []
40
-
41
- # 1. Process complex multiline blocks first (toggles, etc.)
42
- toggleable_blocks = self._find_toggleable_blocks(markdown_text)
43
- all_blocks.extend(toggleable_blocks)
44
-
45
- # 2. Process other multiline blocks
46
- multiline_blocks = self._find_multiline_blocks(markdown_text, toggleable_blocks)
47
- all_blocks.extend(multiline_blocks)
48
-
49
- # 3. Process remaining text line by line
50
- processed_blocks = toggleable_blocks + multiline_blocks
51
- line_blocks = self._process_remaining_lines(markdown_text, processed_blocks)
52
- all_blocks.extend(line_blocks)
53
-
54
- return all_blocks
55
-
56
- def _find_toggleable_blocks(
57
- self, text: str
58
- ) -> list[tuple[int, int, dict[str, any]]]:
59
- """Find all toggleable blocks (Toggle and ToggleableHeading)"""
60
- toggleable_elements = self._get_elements_by_type(
61
- self._toggle_element_types, multiline_only=True
62
- )
63
-
64
- blocks = []
65
- for element in toggleable_elements:
66
- matches = element.find_matches(text, self.convert, context_aware=True)
67
- if matches:
68
- blocks.extend(matches)
69
-
70
- return blocks
71
-
72
- def _find_multiline_blocks(
73
- self, text: str, exclude_blocks: list[tuple[int, int, dict[str, any]]]
74
- ) -> list[tuple[int, int, dict[str, any]]]:
75
- """Find all multiline blocks except toggleable ones"""
76
- multiline_elements = [
77
- element
78
- for element in self._block_registry.get_multiline_elements()
79
- if element.__name__ not in self._toggle_element_types
80
- ]
81
-
82
- excluded_ranges = self._create_excluded_ranges(exclude_blocks)
83
-
84
- blocks = []
85
- for element in multiline_elements:
86
- matches = element.find_matches(text)
87
-
88
- for start_pos, end_pos, block in matches:
89
- if not self._overlaps_with_ranges(start_pos, end_pos, excluded_ranges):
90
- # Handle multiple blocks from single element
91
- element_blocks = self._normalize_to_list(block)
92
-
93
- current_pos = start_pos
94
- for i, single_block in enumerate(element_blocks):
95
- blocks.append((current_pos, end_pos, single_block))
96
- # Increment position for subsequent blocks
97
- current_pos = end_pos + i + 1
98
-
99
- return blocks
100
-
101
- def _process_remaining_lines(
102
- self, text: str, exclude_blocks: list[tuple[int, int, dict[str, any]]]
103
- ) -> list[tuple[int, int, dict[str, any]]]:
104
- """Process text line by line, excluding already processed ranges"""
105
- if not text:
106
- return []
107
-
108
- excluded_ranges = self._create_excluded_ranges(exclude_blocks)
109
- processor = LineProcessor(
110
- block_registry=self._block_registry,
111
- excluded_ranges=excluded_ranges,
112
- pipe_pattern=self._pipe_content_pattern,
113
- )
114
-
115
- return processor.process_lines(text)
116
-
117
- def _get_elements_by_type(
118
- self, type_names: list[str], multiline_only: bool = False
119
- ) -> list[any]:
120
- """Get elements from registry by type names"""
121
- elements = (
122
- self._block_registry.get_multiline_elements()
123
- if multiline_only
124
- else self._block_registry.get_elements()
125
- )
126
-
127
- return [
128
- element
129
- for element in elements
130
- if element.__name__ in type_names and hasattr(element, "match_markdown")
131
- ]
132
-
133
- def _create_excluded_ranges(
134
- self, exclude_blocks: list[tuple[int, int, dict[str, any]]]
135
- ) -> set[int]:
136
- """Create set of excluded positions from block ranges"""
137
- excluded_positions = set()
138
- for start_pos, end_pos, _ in exclude_blocks:
139
- excluded_positions.update(range(start_pos, end_pos + 1))
140
- return excluded_positions
141
-
142
- def _overlaps_with_ranges(
143
- self, start_pos: int, end_pos: int, excluded_ranges: set[int]
144
- ) -> bool:
145
- """Check if a range overlaps with excluded positions"""
146
- return any(pos in excluded_ranges for pos in range(start_pos, end_pos + 1))
147
-
148
- @staticmethod
149
- def _normalize_to_list(result) -> list[dict[str, any]]:
150
- """Normalize Union[list[dict], dict] to list[dict]"""
151
- if result is None:
152
- return []
153
- return result if isinstance(result, list) else [result]