notionary 0.2.18__py3-none-any.whl → 0.2.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. notionary/__init__.py +8 -4
  2. notionary/base_notion_client.py +3 -1
  3. notionary/blocks/__init__.py +2 -91
  4. notionary/blocks/_bootstrap.py +263 -0
  5. notionary/blocks/audio/__init__.py +8 -2
  6. notionary/blocks/audio/audio_element.py +42 -104
  7. notionary/blocks/audio/audio_markdown_node.py +3 -1
  8. notionary/blocks/audio/audio_models.py +6 -55
  9. notionary/blocks/base_block_element.py +30 -0
  10. notionary/blocks/bookmark/__init__.py +9 -2
  11. notionary/blocks/bookmark/bookmark_element.py +46 -139
  12. notionary/blocks/bookmark/bookmark_markdown_node.py +3 -1
  13. notionary/blocks/bookmark/bookmark_models.py +15 -0
  14. notionary/blocks/breadcrumbs/__init__.py +17 -0
  15. notionary/blocks/breadcrumbs/breadcrumb_element.py +39 -0
  16. notionary/blocks/breadcrumbs/breadcrumb_markdown_node.py +32 -0
  17. notionary/blocks/breadcrumbs/breadcrumb_models.py +12 -0
  18. notionary/blocks/bulleted_list/__init__.py +12 -2
  19. notionary/blocks/bulleted_list/bulleted_list_element.py +40 -55
  20. notionary/blocks/bulleted_list/bulleted_list_markdown_node.py +2 -1
  21. notionary/blocks/bulleted_list/bulleted_list_models.py +18 -0
  22. notionary/blocks/callout/__init__.py +9 -2
  23. notionary/blocks/callout/callout_element.py +40 -89
  24. notionary/blocks/callout/callout_markdown_node.py +3 -1
  25. notionary/blocks/callout/callout_models.py +33 -0
  26. notionary/blocks/child_database/__init__.py +7 -0
  27. notionary/blocks/child_database/child_database_models.py +19 -0
  28. notionary/blocks/child_page/__init__.py +9 -0
  29. notionary/blocks/child_page/child_page_models.py +12 -0
  30. notionary/blocks/{shared/block_client.py → client.py} +55 -54
  31. notionary/blocks/code/__init__.py +6 -2
  32. notionary/blocks/code/code_element.py +53 -187
  33. notionary/blocks/code/code_markdown_node.py +13 -13
  34. notionary/blocks/code/code_models.py +94 -0
  35. notionary/blocks/column/__init__.py +25 -1
  36. notionary/blocks/column/column_element.py +40 -314
  37. notionary/blocks/column/column_list_element.py +37 -0
  38. notionary/blocks/column/column_list_markdown_node.py +50 -0
  39. notionary/blocks/column/column_markdown_node.py +59 -0
  40. notionary/blocks/column/column_models.py +26 -0
  41. notionary/blocks/divider/__init__.py +9 -2
  42. notionary/blocks/divider/divider_element.py +26 -49
  43. notionary/blocks/divider/divider_markdown_node.py +2 -1
  44. notionary/blocks/divider/divider_models.py +12 -0
  45. notionary/blocks/embed/__init__.py +9 -2
  46. notionary/blocks/embed/embed_element.py +47 -114
  47. notionary/blocks/embed/embed_markdown_node.py +3 -1
  48. notionary/blocks/embed/embed_models.py +14 -0
  49. notionary/blocks/equation/__init__.py +14 -0
  50. notionary/blocks/equation/equation_element.py +80 -0
  51. notionary/blocks/equation/equation_element_markdown_node.py +36 -0
  52. notionary/blocks/equation/equation_models.py +11 -0
  53. notionary/blocks/file/__init__.py +25 -0
  54. notionary/blocks/file/file_element.py +93 -0
  55. notionary/blocks/file/file_element_markdown_node.py +35 -0
  56. notionary/blocks/file/file_element_models.py +39 -0
  57. notionary/blocks/heading/__init__.py +16 -2
  58. notionary/blocks/heading/heading_element.py +67 -72
  59. notionary/blocks/heading/heading_markdown_node.py +2 -1
  60. notionary/blocks/heading/heading_models.py +29 -0
  61. notionary/blocks/image_block/__init__.py +13 -0
  62. notionary/blocks/image_block/image_element.py +84 -0
  63. notionary/blocks/{image → image_block}/image_markdown_node.py +3 -1
  64. notionary/blocks/image_block/image_models.py +10 -0
  65. notionary/blocks/models.py +172 -0
  66. notionary/blocks/numbered_list/__init__.py +12 -2
  67. notionary/blocks/numbered_list/numbered_list_element.py +33 -58
  68. notionary/blocks/numbered_list/numbered_list_markdown_node.py +3 -1
  69. notionary/blocks/numbered_list/numbered_list_models.py +17 -0
  70. notionary/blocks/paragraph/__init__.py +12 -2
  71. notionary/blocks/paragraph/paragraph_element.py +27 -69
  72. notionary/blocks/paragraph/paragraph_markdown_node.py +2 -1
  73. notionary/blocks/paragraph/paragraph_models.py +16 -0
  74. notionary/blocks/pdf/__init__.py +13 -0
  75. notionary/blocks/pdf/pdf_element.py +91 -0
  76. notionary/blocks/pdf/pdf_markdown_node.py +35 -0
  77. notionary/blocks/pdf/pdf_models.py +11 -0
  78. notionary/blocks/quote/__init__.py +11 -2
  79. notionary/blocks/quote/quote_element.py +31 -65
  80. notionary/blocks/quote/quote_markdown_node.py +4 -1
  81. notionary/blocks/quote/quote_models.py +18 -0
  82. notionary/blocks/registry/__init__.py +4 -0
  83. notionary/blocks/registry/block_registry.py +75 -91
  84. notionary/blocks/registry/block_registry_builder.py +107 -59
  85. notionary/blocks/rich_text/__init__.py +33 -0
  86. notionary/blocks/rich_text/rich_text_models.py +188 -0
  87. notionary/blocks/rich_text/text_inline_formatter.py +125 -0
  88. notionary/blocks/table/__init__.py +16 -2
  89. notionary/blocks/table/table_element.py +48 -241
  90. notionary/blocks/table/table_markdown_node.py +2 -1
  91. notionary/blocks/table/table_models.py +28 -0
  92. notionary/blocks/table_of_contents/__init__.py +19 -0
  93. notionary/blocks/table_of_contents/table_of_contents_element.py +51 -0
  94. notionary/blocks/table_of_contents/table_of_contents_markdown_node.py +35 -0
  95. notionary/blocks/table_of_contents/table_of_contents_models.py +18 -0
  96. notionary/blocks/todo/__init__.py +9 -2
  97. notionary/blocks/todo/todo_element.py +38 -95
  98. notionary/blocks/todo/todo_markdown_node.py +2 -1
  99. notionary/blocks/todo/todo_models.py +19 -0
  100. notionary/blocks/toggle/__init__.py +13 -3
  101. notionary/blocks/toggle/toggle_element.py +57 -264
  102. notionary/blocks/toggle/toggle_markdown_node.py +24 -14
  103. notionary/blocks/toggle/toggle_models.py +17 -0
  104. notionary/blocks/toggleable_heading/__init__.py +6 -2
  105. notionary/blocks/toggleable_heading/toggleable_heading_element.py +74 -244
  106. notionary/blocks/toggleable_heading/toggleable_heading_markdown_node.py +26 -18
  107. notionary/blocks/types.py +61 -0
  108. notionary/blocks/video/__init__.py +8 -2
  109. notionary/blocks/video/video_element.py +67 -143
  110. notionary/blocks/video/video_element_models.py +10 -0
  111. notionary/blocks/video/video_markdown_node.py +3 -1
  112. notionary/database/client.py +3 -8
  113. notionary/database/database.py +13 -14
  114. notionary/database/database_filter_builder.py +2 -2
  115. notionary/database/database_provider.py +5 -4
  116. notionary/database/models.py +337 -0
  117. notionary/database/notion_database.py +6 -7
  118. notionary/file_upload/client.py +5 -7
  119. notionary/file_upload/models.py +2 -1
  120. notionary/file_upload/notion_file_upload.py +2 -3
  121. notionary/markdown/markdown_builder.py +722 -0
  122. notionary/markdown/markdown_document_model.py +228 -0
  123. notionary/{blocks → markdown}/markdown_node.py +1 -0
  124. notionary/models/notion_database_response.py +0 -338
  125. notionary/page/client.py +9 -10
  126. notionary/page/models.py +327 -0
  127. notionary/page/notion_page.py +99 -52
  128. notionary/page/notion_text_length_utils.py +119 -0
  129. notionary/page/{content/page_content_writer.py → page_content_writer.py} +88 -38
  130. notionary/page/reader/handler/__init__.py +17 -0
  131. notionary/page/reader/handler/base_block_renderer.py +44 -0
  132. notionary/page/reader/handler/block_processing_context.py +35 -0
  133. notionary/page/reader/handler/block_rendering_context.py +43 -0
  134. notionary/page/reader/handler/column_list_renderer.py +51 -0
  135. notionary/page/reader/handler/column_renderer.py +60 -0
  136. notionary/page/reader/handler/line_renderer.py +60 -0
  137. notionary/page/reader/handler/toggle_renderer.py +69 -0
  138. notionary/page/reader/handler/toggleable_heading_renderer.py +89 -0
  139. notionary/page/reader/page_content_retriever.py +69 -0
  140. notionary/page/search_filter_builder.py +2 -1
  141. notionary/page/writer/handler/__init__.py +22 -0
  142. notionary/page/writer/handler/code_handler.py +100 -0
  143. notionary/page/writer/handler/column_handler.py +141 -0
  144. notionary/page/writer/handler/column_list_handler.py +139 -0
  145. notionary/page/writer/handler/line_handler.py +35 -0
  146. notionary/page/writer/handler/line_processing_context.py +54 -0
  147. notionary/page/writer/handler/regular_line_handler.py +92 -0
  148. notionary/page/writer/handler/table_handler.py +130 -0
  149. notionary/page/writer/handler/toggle_handler.py +153 -0
  150. notionary/page/writer/handler/toggleable_heading_handler.py +167 -0
  151. notionary/page/writer/markdown_to_notion_converter.py +76 -0
  152. notionary/telemetry/__init__.py +2 -2
  153. notionary/telemetry/service.py +4 -3
  154. notionary/user/__init__.py +2 -2
  155. notionary/user/base_notion_user.py +2 -1
  156. notionary/user/client.py +2 -3
  157. notionary/user/models.py +1 -0
  158. notionary/user/notion_bot_user.py +4 -5
  159. notionary/user/notion_user.py +3 -4
  160. notionary/user/notion_user_manager.py +3 -2
  161. notionary/user/notion_user_provider.py +1 -1
  162. notionary/util/__init__.py +3 -2
  163. notionary/util/fuzzy.py +2 -1
  164. notionary/util/logging_mixin.py +2 -2
  165. notionary/util/singleton_metaclass.py +1 -1
  166. notionary/workspace.py +3 -2
  167. {notionary-0.2.18.dist-info → notionary-0.2.21.dist-info}/METADATA +12 -8
  168. notionary-0.2.21.dist-info/RECORD +185 -0
  169. notionary/blocks/document/__init__.py +0 -7
  170. notionary/blocks/document/document_element.py +0 -102
  171. notionary/blocks/document/document_markdown_node.py +0 -31
  172. notionary/blocks/image/__init__.py +0 -7
  173. notionary/blocks/image/image_element.py +0 -151
  174. notionary/blocks/markdown_builder.py +0 -356
  175. notionary/blocks/mention/__init__.py +0 -7
  176. notionary/blocks/mention/mention_element.py +0 -229
  177. notionary/blocks/mention/mention_markdown_node.py +0 -38
  178. notionary/blocks/prompts/element_prompt_builder.py +0 -83
  179. notionary/blocks/prompts/element_prompt_content.py +0 -41
  180. notionary/blocks/shared/__init__.py +0 -0
  181. notionary/blocks/shared/models.py +0 -710
  182. notionary/blocks/shared/notion_block_element.py +0 -37
  183. notionary/blocks/shared/text_inline_formatter.py +0 -262
  184. notionary/blocks/shared/text_inline_formatter_new.py +0 -139
  185. notionary/blocks/toggleable_heading/toggleable_heading_models.py +0 -0
  186. notionary/database/models/page_result.py +0 -10
  187. notionary/models/notion_block_response.py +0 -264
  188. notionary/models/notion_page_response.py +0 -78
  189. notionary/models/search_response.py +0 -0
  190. notionary/page/__init__.py +0 -0
  191. notionary/page/content/notion_text_length_utils.py +0 -87
  192. notionary/page/content/page_content_retriever.py +0 -52
  193. notionary/page/formatting/line_processor.py +0 -153
  194. notionary/page/formatting/markdown_to_notion_converter.py +0 -153
  195. notionary/page/markdown_syntax_prompt_generator.py +0 -114
  196. notionary/page/notion_to_markdown_converter.py +0 -179
  197. notionary/page/properites/property_value_extractor.py +0 -0
  198. notionary-0.2.18.dist-info/RECORD +0 -149
  199. /notionary/{blocks/document/document_models.py → markdown/___init__.py} +0 -0
  200. /notionary/{blocks/image/image_models.py → markdown/makdown_document_model.py} +0 -0
  201. /notionary/page/{content/markdown_whitespace_processor.py → markdown_whitespace_processor.py} +0 -0
  202. /notionary/{blocks/mention/mention_models.py → page/reader/handler/context.py} +0 -0
  203. {notionary-0.2.18.dist-info → notionary-0.2.21.dist-info}/LICENSE +0 -0
  204. {notionary-0.2.18.dist-info → notionary-0.2.21.dist-info}/WHEEL +0 -0
@@ -1,264 +0,0 @@
1
- from typing import List, Optional, Union, Literal
2
- from pydantic import BaseModel
3
-
4
-
5
- # Rich Text Komponenten
6
- class TextContent(BaseModel):
7
- content: str
8
- link: Optional[dict] = None
9
-
10
-
11
- class Annotations(BaseModel):
12
- bold: bool
13
- italic: bool
14
- strikethrough: bool
15
- underline: bool
16
- code: bool
17
- color: str
18
-
19
-
20
- class RichText(BaseModel):
21
- type: Literal["text"]
22
- text: TextContent
23
- annotations: Annotations
24
- plain_text: str
25
- href: Optional[str]
26
-
27
-
28
- # Benutzerobjekt
29
- class User(BaseModel):
30
- object: str
31
- id: str
32
-
33
-
34
- # Elternobjekte
35
- class PageParent(BaseModel):
36
- type: Literal["page_id"]
37
- page_id: str
38
-
39
-
40
- class DatabaseParent(BaseModel):
41
- type: Literal["database_id"]
42
- database_id: str
43
-
44
-
45
- class WorkspaceParent(BaseModel):
46
- type: Literal["workspace"]
47
- workspace: bool = True
48
-
49
-
50
- Parent = Union[PageParent, DatabaseParent, WorkspaceParent]
51
-
52
-
53
- # Block-spezifische Inhalte
54
- class ParagraphBlock(BaseModel):
55
- rich_text: List[RichText]
56
- color: Optional[str] = "default"
57
-
58
-
59
- class Heading1Block(BaseModel):
60
- rich_text: List[RichText]
61
- color: Optional[str] = "default"
62
- is_toggleable: Optional[bool] = False
63
-
64
-
65
- class Heading2Block(BaseModel):
66
- rich_text: List[RichText]
67
- color: Optional[str] = "default"
68
- is_toggleable: Optional[bool] = False
69
-
70
-
71
- class Heading3Block(BaseModel):
72
- rich_text: List[RichText]
73
- color: Optional[str] = "default"
74
- is_toggleable: Optional[bool] = False
75
-
76
-
77
- class BulletedListItemBlock(BaseModel):
78
- rich_text: List[RichText]
79
- color: Optional[str] = "default"
80
-
81
-
82
- class NumberedListItemBlock(BaseModel):
83
- rich_text: List[RichText]
84
- color: Optional[str] = "default"
85
-
86
-
87
- class ToDoBlock(BaseModel):
88
- rich_text: List[RichText]
89
- checked: Optional[bool] = False
90
- color: Optional[str] = "default"
91
-
92
-
93
- class ToggleBlock(BaseModel):
94
- rich_text: List[RichText]
95
- color: Optional[str] = "default"
96
-
97
-
98
- class QuoteBlock(BaseModel):
99
- rich_text: List[RichText]
100
- color: Optional[str] = "default"
101
-
102
-
103
- class CalloutBlock(BaseModel):
104
- rich_text: List[RichText]
105
- icon: Optional[dict] = None
106
- color: Optional[str] = "default"
107
-
108
-
109
- class CodeBlock(BaseModel):
110
- rich_text: List[RichText]
111
- language: Optional[str] = "plain text"
112
-
113
-
114
- class EquationBlock(BaseModel):
115
- expression: str
116
-
117
-
118
- class DividerBlock(BaseModel):
119
- pass
120
-
121
-
122
- class TableOfContentsBlock(BaseModel):
123
- color: Optional[str] = "default"
124
-
125
-
126
- class BreadcrumbBlock(BaseModel):
127
- pass
128
-
129
-
130
- class ColumnListBlock(BaseModel):
131
- pass
132
-
133
-
134
- class ColumnBlock(BaseModel):
135
- pass
136
-
137
-
138
- class LinkToPageBlock(BaseModel):
139
- type: str
140
- page_id: Optional[str] = None
141
- database_id: Optional[str] = None
142
-
143
-
144
- class SyncedBlock(BaseModel):
145
- synced_from: Optional[dict] = None
146
-
147
-
148
- class TemplateBlock(BaseModel):
149
- rich_text: List[RichText]
150
-
151
-
152
- class TableBlock(BaseModel):
153
- table_width: int
154
- has_column_header: bool
155
- has_row_header: bool
156
-
157
-
158
- class TableRowBlock(BaseModel):
159
- cells: List[List[RichText]]
160
-
161
-
162
- class BookmarkBlock(BaseModel):
163
- caption: List[RichText]
164
- url: str
165
-
166
-
167
- class EmbedBlock(BaseModel):
168
- url: str
169
-
170
-
171
- class ImageBlock(BaseModel):
172
- type: str
173
- external: Optional[dict] = None
174
- file: Optional[dict] = None
175
- caption: List[RichText]
176
-
177
-
178
- class VideoBlock(BaseModel):
179
- type: str
180
- external: Optional[dict] = None
181
- file: Optional[dict] = None
182
- caption: List[RichText]
183
-
184
-
185
- class PDFBlock(BaseModel):
186
- type: str
187
- external: Optional[dict] = None
188
- file: Optional[dict] = None
189
- caption: List[RichText]
190
-
191
-
192
- class FileBlock(BaseModel):
193
- type: str
194
- external: Optional[dict] = None
195
- file: Optional[dict] = None
196
- caption: List[RichText]
197
-
198
-
199
- class AudioBlock(BaseModel):
200
- type: str
201
- external: Optional[dict] = None
202
- file: Optional[dict] = None
203
- caption: List[RichText]
204
-
205
-
206
- class LinkPreviewBlock(BaseModel):
207
- url: str
208
-
209
-
210
- class ChildPageBlock(BaseModel):
211
- title: str
212
-
213
-
214
- class ChildDatabaseBlock(BaseModel):
215
- title: str
216
-
217
-
218
- # TODO: Use the block typing here:
219
- # Test the code base.
220
- class Block(BaseModel):
221
- object: Literal["block"]
222
- id: str
223
- parent: Parent
224
- created_time: str
225
- last_edited_time: str
226
- created_by: User
227
- last_edited_by: User
228
- has_children: bool
229
- archived: bool
230
- in_trash: bool
231
- type: str
232
- paragraph: Optional[ParagraphBlock] = None
233
- heading_1: Optional[Heading1Block] = None
234
- heading_2: Optional[Heading2Block] = None
235
- heading_3: Optional[Heading3Block] = None
236
- bulleted_list_item: Optional[BulletedListItemBlock] = None
237
- numbered_list_item: Optional[NumberedListItemBlock] = None
238
- to_do: Optional[ToDoBlock] = None
239
- toggle: Optional[ToggleBlock] = None
240
- quote: Optional[QuoteBlock] = None
241
- callout: Optional[CalloutBlock] = None
242
- code: Optional[CodeBlock] = None
243
- equation: Optional[EquationBlock] = None
244
- divider: Optional[DividerBlock] = None
245
- table_of_contents: Optional[TableOfContentsBlock] = None
246
- breadcrumb: Optional[BreadcrumbBlock] = None
247
- column_list: Optional[ColumnListBlock] = None
248
- column: Optional[ColumnBlock] = None
249
- link_to_page: Optional[LinkToPageBlock] = None
250
- synced_block: Optional[SyncedBlock] = None
251
- template: Optional[TemplateBlock] = None
252
- table: Optional[TableBlock] = None
253
- table_row: Optional[TableRowBlock] = None
254
- bookmark: Optional[BookmarkBlock] = None
255
- embed: Optional[EmbedBlock] = None
256
- image: Optional[ImageBlock] = None
257
- video: Optional[VideoBlock] = None
258
- pdf: Optional[PDFBlock] = None
259
- file: Optional[FileBlock] = None
260
- audio: Optional[AudioBlock] = None
261
- link_preview: Optional[LinkPreviewBlock] = None
262
- child_page: Optional[ChildPageBlock] = None
263
- child_database: Optional[ChildDatabaseBlock] = None
264
- unsupported: Optional[dict] = None
@@ -1,78 +0,0 @@
1
- from typing import Literal, Optional, Dict, Any, Union
2
-
3
- from pydantic import BaseModel
4
-
5
-
6
- class User(BaseModel):
7
- """Represents a Notion user object."""
8
-
9
- object: str
10
- id: str
11
-
12
-
13
- class ExternalFile(BaseModel):
14
- """Represents an external file, e.g., for cover images."""
15
-
16
- url: str
17
-
18
-
19
- class Cover(BaseModel):
20
- """Cover image for a Notion page."""
21
-
22
- type: str
23
- external: ExternalFile
24
-
25
-
26
- class EmojiIcon(BaseModel):
27
- type: Literal["emoji"]
28
- emoji: str
29
-
30
-
31
- class ExternalIcon(BaseModel):
32
- type: Literal["external"]
33
- external: ExternalFile
34
-
35
-
36
- Icon = Union[EmojiIcon, ExternalIcon]
37
-
38
-
39
- class DatabaseParent(BaseModel):
40
- type: Literal["database_id"]
41
- database_id: str
42
-
43
-
44
- class PageParent(BaseModel):
45
- type: Literal["page_id"]
46
- page_id: str
47
-
48
-
49
- class WorkspaceParent(BaseModel):
50
- type: Literal["workspace"]
51
- workspace: bool = True
52
-
53
-
54
- Parent = Union[DatabaseParent, PageParent, WorkspaceParent]
55
-
56
-
57
- class NotionPageResponse(BaseModel):
58
- """
59
- Represents a full Notion page object as returned by the Notion API.
60
-
61
- This structure is flexible and designed to work with different database schemas.
62
- """
63
-
64
- object: str
65
- id: str
66
- created_time: str
67
- last_edited_time: str
68
- created_by: User
69
- last_edited_by: User
70
- cover: Optional[Cover]
71
- icon: Optional[Icon]
72
- parent: Parent
73
- archived: bool
74
- in_trash: bool
75
- properties: Dict[str, Any]
76
- url: str
77
- public_url: Optional[str]
78
- request_id: str
File without changes
File without changes
@@ -1,87 +0,0 @@
1
- """
2
- Utility functions for handling Notion API text length limitations.
3
-
4
- This module provides functions to fix text content that exceeds Notion's
5
- rich_text character limit of 2000 characters per element.
6
-
7
- Resolves API errors like:
8
- "validation_error - body.children[79].toggle.children[2].paragraph.rich_text[0].text.content.length
9
- should be ≤ 2000, instead was 2162."
10
- """
11
-
12
- import re
13
- import logging
14
- from typing import Any
15
-
16
- logger = logging.getLogger(__name__)
17
-
18
-
19
- def fix_blocks_content_length(
20
- blocks: list[dict[str, Any]], max_text_length: int = 1900
21
- ) -> list[dict[str, Any]]:
22
- """Check each block and ensure text content doesn't exceed Notion's limit."""
23
- return [_fix_single_block_content(block, max_text_length) for block in blocks]
24
-
25
-
26
- def _fix_single_block_content(
27
- block: dict[str, Any], max_text_length: int
28
- ) -> dict[str, Any]:
29
- """Fix content length in a single block and its children recursively."""
30
- block_copy = block.copy()
31
-
32
- block_type = block.get("type")
33
- if not block_type:
34
- return block_copy
35
-
36
- content = block.get(block_type)
37
- if not content:
38
- return block_copy
39
-
40
- if "rich_text" in content:
41
- _fix_rich_text_content(block_copy, block_type, content, max_text_length)
42
-
43
- if "children" in content and content["children"]:
44
- block_copy[block_type]["children"] = [
45
- _fix_single_block_content(child, max_text_length)
46
- for child in content["children"]
47
- ]
48
-
49
- return block_copy
50
-
51
-
52
- def _fix_rich_text_content(
53
- block_copy: dict[str, Any],
54
- block_type: str,
55
- content: dict[str, Any],
56
- max_text_length: int,
57
- ) -> None:
58
- """Fix rich text content that exceeds the length limit."""
59
- rich_text = content["rich_text"]
60
- for i, text_item in enumerate(rich_text):
61
- if "text" not in text_item or "content" not in text_item["text"]:
62
- continue
63
-
64
- text_content = text_item["text"]["content"]
65
- if len(text_content) <= max_text_length:
66
- continue
67
-
68
- logger.warning(
69
- "Truncating text content from %d to %d chars",
70
- len(text_content),
71
- max_text_length,
72
- )
73
- block_copy[block_type]["rich_text"][i]["text"]["content"] = text_content[
74
- :max_text_length
75
- ]
76
-
77
-
78
- def split_to_paragraphs(markdown_text: str) -> list[str]:
79
- """Split markdown into paragraphs."""
80
- paragraphs = re.split(r"\n\s*\n", markdown_text)
81
- return [p for p in paragraphs if p.strip()]
82
-
83
-
84
- def split_to_sentences(paragraph: str) -> list[str]:
85
- """Split a paragraph into sentences."""
86
- sentences = re.split(r"(?<=[.!?])\s+", paragraph)
87
- return [s for s in sentences if s.strip()]
@@ -1,52 +0,0 @@
1
- from typing import Any, Dict, Optional
2
-
3
- from notionary.blocks.registry.block_registry import BlockRegistry
4
-
5
- from notionary.blocks import NotionBlockClient
6
- from notionary.page.notion_to_markdown_converter import (
7
- NotionToMarkdownConverter,
8
- )
9
- from notionary.util import LoggingMixin
10
-
11
-
12
- class PageContentRetriever(LoggingMixin):
13
- def __init__(
14
- self,
15
- page_id: str,
16
- block_registry: BlockRegistry,
17
- ):
18
- self.page_id = page_id
19
- self._notion_to_markdown_converter = NotionToMarkdownConverter(
20
- block_registry=block_registry
21
- )
22
- self.client = NotionBlockClient()
23
-
24
- async def get_page_content(self) -> str:
25
- blocks = await self._get_page_blocks_with_children()
26
- return self._notion_to_markdown_converter.convert(blocks)
27
-
28
- async def _get_page_blocks_with_children(
29
- self, parent_id: Optional[str] = None
30
- ) -> list[Dict[str, Any]]:
31
- blocks = (
32
- await self.client.get_page_blocks(page_id=self.page_id)
33
- if parent_id is None
34
- else await self.client.get_block_children(parent_id)
35
- )
36
-
37
- if not blocks:
38
- return []
39
-
40
- for block in blocks:
41
- if not block.get("has_children"):
42
- continue
43
-
44
- block_id = block.get("id")
45
- if not block_id:
46
- continue
47
-
48
- children = await self._get_page_blocks_with_children(block_id)
49
- if children:
50
- block["children"] = children
51
-
52
- return blocks
@@ -1,153 +0,0 @@
1
- import re
2
- from notionary.blocks.shared.notion_block_element import NotionBlock
3
- from notionary.blocks.registry.block_registry import BlockRegistry
4
-
5
-
6
- class LineProcessingState:
7
- """Tracks state during line-by-line processing"""
8
-
9
- def __init__(self):
10
- self.paragraph_lines: list[str] = []
11
- self.paragraph_start: int = 0
12
-
13
- def add_to_paragraph(self, line: str, current_pos: int):
14
- """Add line to current paragraph"""
15
- if not self.paragraph_lines:
16
- self.paragraph_start = current_pos
17
- self.paragraph_lines.append(line)
18
-
19
- def reset_paragraph(self):
20
- """Reset paragraph state"""
21
- self.paragraph_lines = []
22
- self.paragraph_start = 0
23
-
24
- def has_paragraph(self) -> bool:
25
- """Check if there are paragraph lines to process"""
26
- return len(self.paragraph_lines) > 0
27
-
28
-
29
- class LineProcessor:
30
- """Handles line-by-line processing of markdown text"""
31
-
32
- def __init__(
33
- self,
34
- block_registry: BlockRegistry,
35
- excluded_ranges: set[int],
36
- pipe_pattern: str,
37
- ):
38
- self._block_registry = block_registry
39
- self._excluded_ranges = excluded_ranges
40
- self._pipe_pattern = pipe_pattern
41
-
42
- @staticmethod
43
- def _normalize_to_list(result) -> list[dict[str, any]]:
44
- """Normalize Union[list[dict], dict] to list[dict]"""
45
- if result is None:
46
- return []
47
- return result if isinstance(result, list) else [result]
48
-
49
- def process_lines(self, text: str) -> list[tuple[int, int, dict[str, any]]]:
50
- """Process all lines and return blocks with positions"""
51
- lines = text.split("\n")
52
- line_blocks = []
53
-
54
- state = LineProcessingState()
55
- current_pos = 0
56
-
57
- for line in lines:
58
- line_length = len(line) + 1 # +1 for newline
59
- line_end = current_pos + line_length - 1
60
-
61
- if self._should_skip_line(line, current_pos, line_end):
62
- current_pos += line_length
63
- continue
64
-
65
- self._process_single_line(line, current_pos, line_end, line_blocks, state)
66
- current_pos += line_length
67
-
68
- # Process any remaining paragraph
69
- self._finalize_paragraph(state, current_pos, line_blocks)
70
-
71
- return line_blocks
72
-
73
- def _should_skip_line(self, line: str, current_pos: int, line_end: int) -> bool:
74
- """Check if line should be skipped (excluded or pipe syntax)"""
75
- return self._overlaps_with_excluded(
76
- current_pos, line_end
77
- ) or self._is_pipe_syntax_line(line)
78
-
79
- def _overlaps_with_excluded(self, start_pos: int, end_pos: int) -> bool:
80
- """Check if position range overlaps with excluded ranges"""
81
- return any(
82
- pos in self._excluded_ranges for pos in range(start_pos, end_pos + 1)
83
- )
84
-
85
- def _is_pipe_syntax_line(self, line: str) -> bool:
86
- """Check if line uses pipe syntax for nested content"""
87
- return bool(re.match(self._pipe_pattern, line))
88
-
89
- def _process_single_line(
90
- self,
91
- line: str,
92
- current_pos: int,
93
- line_end: int,
94
- line_blocks: list[tuple[int, int, dict[str, any]]],
95
- state: LineProcessingState,
96
- ):
97
- """Process a single line of text"""
98
- # Handle empty lines
99
- if not line.strip():
100
- self._finalize_paragraph(state, current_pos, line_blocks)
101
- state.reset_paragraph()
102
- return
103
-
104
- # Handle special blocks (headings, todos, dividers, etc.)
105
- special_blocks = self._extract_special_block(line)
106
- if special_blocks:
107
- self._finalize_paragraph(state, current_pos, line_blocks)
108
- # Mehrere Blöcke hinzufügen
109
- for block in special_blocks:
110
- line_blocks.append((current_pos, line_end, block))
111
- state.reset_paragraph()
112
- return
113
-
114
- # Add to current paragraph
115
- state.add_to_paragraph(line, current_pos)
116
-
117
- def _extract_special_block(self, line: str) -> list[NotionBlock]:
118
- """Extract special block (non-paragraph) from line"""
119
- for element in (
120
- element
121
- for element in self._block_registry.get_elements()
122
- if not element.is_multiline()
123
- ):
124
- if not element.match_markdown(line):
125
- continue
126
-
127
- result = element.markdown_to_notion(line)
128
- blocks = self._normalize_to_list(result)
129
- if not blocks:
130
- continue
131
-
132
- # Gibt nur zurück, wenn mindestens ein Nicht-Paragraph-Block dabei ist
133
- if any(block.get("type") != "paragraph" for block in blocks):
134
- return blocks
135
-
136
- return []
137
-
138
- def _finalize_paragraph(
139
- self,
140
- state: LineProcessingState,
141
- end_pos: int,
142
- line_blocks: list[tuple[int, int, dict[str, any]]],
143
- ):
144
- """Convert current paragraph lines to paragraph block"""
145
- if not state.has_paragraph():
146
- return
147
-
148
- paragraph_text = "\n".join(state.paragraph_lines)
149
- result = self._block_registry.markdown_to_notion(paragraph_text)
150
- blocks = self._normalize_to_list(result)
151
-
152
- for block in blocks:
153
- line_blocks.append((state.paragraph_start, end_pos, block))