notionary 0.2.16__py3-none-any.whl → 0.2.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. notionary/__init__.py +10 -5
  2. notionary/base_notion_client.py +18 -7
  3. notionary/blocks/__init__.py +55 -24
  4. notionary/blocks/audio/__init__.py +7 -0
  5. notionary/blocks/audio/audio_element.py +152 -0
  6. notionary/blocks/audio/audio_markdown_node.py +29 -0
  7. notionary/blocks/audio/audio_models.py +59 -0
  8. notionary/blocks/bookmark/__init__.py +7 -0
  9. notionary/blocks/{bookmark_element.py → bookmark/bookmark_element.py} +20 -65
  10. notionary/blocks/bookmark/bookmark_markdown_node.py +43 -0
  11. notionary/blocks/bulleted_list/__init__.py +7 -0
  12. notionary/blocks/{bulleted_list_element.py → bulleted_list/bulleted_list_element.py} +7 -3
  13. notionary/blocks/bulleted_list/bulleted_list_markdown_node.py +33 -0
  14. notionary/blocks/bulleted_list/bulleted_list_models.py +0 -0
  15. notionary/blocks/callout/__init__.py +7 -0
  16. notionary/blocks/callout/callout_element.py +132 -0
  17. notionary/blocks/callout/callout_markdown_node.py +31 -0
  18. notionary/blocks/callout/callout_models.py +0 -0
  19. notionary/blocks/code/__init__.py +7 -0
  20. notionary/blocks/{code_block_element.py → code/code_element.py} +72 -40
  21. notionary/blocks/code/code_markdown_node.py +43 -0
  22. notionary/blocks/code/code_models.py +0 -0
  23. notionary/blocks/column/__init__.py +5 -0
  24. notionary/blocks/{column_element.py → column/column_element.py} +24 -55
  25. notionary/blocks/column/column_models.py +0 -0
  26. notionary/blocks/divider/__init__.py +7 -0
  27. notionary/blocks/{divider_element.py → divider/divider_element.py} +11 -3
  28. notionary/blocks/divider/divider_markdown_node.py +24 -0
  29. notionary/blocks/divider/divider_models.py +0 -0
  30. notionary/blocks/document/__init__.py +7 -0
  31. notionary/blocks/document/document_element.py +102 -0
  32. notionary/blocks/document/document_markdown_node.py +31 -0
  33. notionary/blocks/document/document_models.py +0 -0
  34. notionary/blocks/embed/__init__.py +7 -0
  35. notionary/blocks/{embed_element.py → embed/embed_element.py} +50 -32
  36. notionary/blocks/embed/embed_markdown_node.py +30 -0
  37. notionary/blocks/embed/embed_models.py +0 -0
  38. notionary/blocks/heading/__init__.py +7 -0
  39. notionary/blocks/{heading_element.py → heading/heading_element.py} +25 -17
  40. notionary/blocks/heading/heading_markdown_node.py +29 -0
  41. notionary/blocks/heading/heading_models.py +0 -0
  42. notionary/blocks/image/__init__.py +7 -0
  43. notionary/blocks/{image_element.py → image/image_element.py} +62 -42
  44. notionary/blocks/image/image_markdown_node.py +33 -0
  45. notionary/blocks/image/image_models.py +0 -0
  46. notionary/blocks/markdown_builder.py +356 -0
  47. notionary/blocks/markdown_node.py +29 -0
  48. notionary/blocks/mention/__init__.py +7 -0
  49. notionary/blocks/{mention_element.py → mention/mention_element.py} +6 -2
  50. notionary/blocks/mention/mention_markdown_node.py +38 -0
  51. notionary/blocks/mention/mention_models.py +0 -0
  52. notionary/blocks/numbered_list/__init__.py +7 -0
  53. notionary/blocks/{numbered_list_element.py → numbered_list/numbered_list_element.py} +10 -6
  54. notionary/blocks/numbered_list/numbered_list_markdown_node.py +29 -0
  55. notionary/blocks/numbered_list/numbered_list_models.py +0 -0
  56. notionary/blocks/paragraph/__init__.py +7 -0
  57. notionary/blocks/{paragraph_element.py → paragraph/paragraph_element.py} +7 -3
  58. notionary/blocks/paragraph/paragraph_markdown_node.py +25 -0
  59. notionary/blocks/paragraph/paragraph_models.py +0 -0
  60. notionary/blocks/quote/__init__.py +7 -0
  61. notionary/blocks/quote/quote_element.py +92 -0
  62. notionary/blocks/quote/quote_markdown_node.py +23 -0
  63. notionary/blocks/quote/quote_models.py +0 -0
  64. notionary/blocks/registry/block_registry.py +17 -3
  65. notionary/blocks/registry/block_registry_builder.py +90 -178
  66. notionary/blocks/shared/__init__.py +0 -0
  67. notionary/blocks/shared/block_client.py +256 -0
  68. notionary/blocks/shared/models.py +710 -0
  69. notionary/blocks/{notion_block_element.py → shared/notion_block_element.py} +8 -5
  70. notionary/blocks/{text_inline_formatter.py → shared/text_inline_formatter.py} +14 -14
  71. notionary/blocks/shared/text_inline_formatter_new.py +139 -0
  72. notionary/blocks/table/__init__.py +7 -0
  73. notionary/blocks/{table_element.py → table/table_element.py} +23 -11
  74. notionary/blocks/table/table_markdown_node.py +40 -0
  75. notionary/blocks/table/table_models.py +0 -0
  76. notionary/blocks/todo/__init__.py +7 -0
  77. notionary/blocks/{todo_element.py → todo/todo_element.py} +8 -4
  78. notionary/blocks/todo/todo_markdown_node.py +31 -0
  79. notionary/blocks/todo/todo_models.py +0 -0
  80. notionary/blocks/toggle/__init__.py +4 -0
  81. notionary/blocks/{toggle_element.py → toggle/toggle_element.py} +7 -3
  82. notionary/blocks/toggle/toggle_markdown_node.py +35 -0
  83. notionary/blocks/toggle/toggle_models.py +0 -0
  84. notionary/blocks/toggleable_heading/__init__.py +9 -0
  85. notionary/blocks/{toggleable_heading_element.py → toggleable_heading/toggleable_heading_element.py} +8 -4
  86. notionary/blocks/toggleable_heading/toggleable_heading_markdown_node.py +43 -0
  87. notionary/blocks/toggleable_heading/toggleable_heading_models.py +0 -0
  88. notionary/blocks/video/__init__.py +7 -0
  89. notionary/blocks/{video_element.py → video/video_element.py} +82 -57
  90. notionary/blocks/video/video_markdown_node.py +30 -0
  91. notionary/database/__init__.py +4 -0
  92. notionary/database/database.py +481 -0
  93. notionary/database/{filter_builder.py → database_filter_builder.py} +27 -29
  94. notionary/database/{notion_database_provider.py → database_provider.py} +4 -4
  95. notionary/database/notion_database.py +45 -18
  96. notionary/file_upload/__init__.py +7 -0
  97. notionary/file_upload/client.py +254 -0
  98. notionary/file_upload/models.py +60 -0
  99. notionary/file_upload/notion_file_upload.py +387 -0
  100. notionary/page/content/markdown_whitespace_processor.py +80 -0
  101. notionary/page/content/notion_text_length_utils.py +87 -0
  102. notionary/page/content/page_content_retriever.py +2 -2
  103. notionary/page/content/page_content_writer.py +97 -148
  104. notionary/page/formatting/line_processor.py +153 -0
  105. notionary/page/formatting/markdown_to_notion_converter.py +103 -424
  106. notionary/page/notion_page.py +13 -14
  107. notionary/page/notion_to_markdown_converter.py +9 -13
  108. notionary/telemetry/views.py +15 -6
  109. notionary/user/__init__.py +11 -0
  110. notionary/user/base_notion_user.py +52 -0
  111. notionary/user/client.py +129 -0
  112. notionary/user/models.py +83 -0
  113. notionary/user/notion_bot_user.py +227 -0
  114. notionary/user/notion_user.py +256 -0
  115. notionary/user/notion_user_manager.py +173 -0
  116. notionary/user/notion_user_provider.py +1 -0
  117. notionary/util/__init__.py +3 -5
  118. notionary/util/factory_decorator.py +0 -33
  119. notionary/util/factory_only.py +37 -0
  120. notionary/util/fuzzy.py +74 -0
  121. notionary/util/logging_mixin.py +12 -12
  122. notionary/workspace.py +38 -3
  123. {notionary-0.2.16.dist-info → notionary-0.2.18.dist-info}/METADATA +2 -1
  124. notionary-0.2.18.dist-info/RECORD +149 -0
  125. notionary/blocks/audio_element.py +0 -144
  126. notionary/blocks/callout_element.py +0 -122
  127. notionary/blocks/notion_block_client.py +0 -26
  128. notionary/blocks/qoute_element.py +0 -169
  129. notionary/page/content/notion_page_content_chunker.py +0 -84
  130. notionary/page/formatting/spacer_rules.py +0 -483
  131. notionary/util/fuzzy_matcher.py +0 -82
  132. notionary-0.2.16.dist-info/RECORD +0 -71
  133. /notionary/{elements/__init__.py → blocks/bookmark/bookmark_models.py} +0 -0
  134. /notionary/database/{database_exceptions.py → exceptions.py} +0 -0
  135. /notionary/util/{singleton_decorator.py → singleton.py} +0 -0
  136. {notionary-0.2.16.dist-info → notionary-0.2.18.dist-info}/LICENSE +0 -0
  137. {notionary-0.2.16.dist-info → notionary-0.2.18.dist-info}/WHEEL +0 -0
@@ -0,0 +1,387 @@
1
+ import asyncio
2
+ import mimetypes
3
+ from typing import Optional
4
+ from pathlib import Path
5
+ from datetime import datetime, timedelta
6
+ from io import BytesIO
7
+
8
+
9
+ from notionary.file_upload.models import FileUploadResponse
10
+ from notionary.util import LoggingMixin
11
+
12
+
13
+ class NotionFileUpload(LoggingMixin):
14
+ """
15
+ High-level service for managing Notion file uploads.
16
+ Handles both small file (single-part) and large file (multi-part) uploads.
17
+ """
18
+
19
+ # Notion's file size limits
20
+ SINGLE_PART_MAX_SIZE = 20 * 1024 * 1024 # 20MB
21
+ MULTI_PART_CHUNK_SIZE = 10 * 1024 * 1024 # 10MB per part
22
+ MAX_FILENAME_BYTES = 900
23
+
24
+ def __init__(self, token: Optional[str] = None):
25
+ """Initialize the file upload service."""
26
+ from notionary.file_upload import NotionFileUploadClient
27
+
28
+ self.client = NotionFileUploadClient(token=token)
29
+
30
+ async def upload_file(
31
+ self, file_path: Path, filename: Optional[str] = None
32
+ ) -> Optional[FileUploadResponse]:
33
+ """
34
+ Upload a file to Notion, automatically choosing single-part or multi-part based on size.
35
+
36
+ Args:
37
+ file_path: Path to the file to upload
38
+ filename: Optional custom filename (defaults to file_path.name)
39
+
40
+ Returns:
41
+ FileUploadResponse if successful, None otherwise
42
+ """
43
+ if not file_path.exists():
44
+ self.logger.error("File does not exist: %s", file_path)
45
+ return None
46
+
47
+ file_size = file_path.stat().st_size
48
+ filename = filename or file_path.name
49
+
50
+ # Validate filename length
51
+ if len(filename.encode("utf-8")) > self.MAX_FILENAME_BYTES:
52
+ self.logger.error(
53
+ "Filename too long: %d bytes (max %d)",
54
+ len(filename.encode("utf-8")),
55
+ self.MAX_FILENAME_BYTES,
56
+ )
57
+ return None
58
+
59
+ # Choose upload method based on file size
60
+ if file_size <= self.SINGLE_PART_MAX_SIZE:
61
+ return await self._upload_small_file(file_path, filename, file_size)
62
+ else:
63
+ return await self._upload_large_file(file_path, filename, file_size)
64
+
65
+ async def upload_from_bytes(
66
+ self, file_content: bytes, filename: str, content_type: Optional[str] = None
67
+ ) -> Optional[FileUploadResponse]:
68
+ """
69
+ Upload file content from bytes.
70
+
71
+ Args:
72
+ file_content: File content as bytes
73
+ filename: Name for the file
74
+ content_type: Optional MIME type
75
+
76
+ Returns:
77
+ FileUploadResponse if successful, None otherwise
78
+ """
79
+ file_size = len(file_content)
80
+
81
+ # Validate filename length
82
+ if len(filename.encode("utf-8")) > self.MAX_FILENAME_BYTES:
83
+ self.logger.error(
84
+ "Filename too long: %d bytes (max %d)",
85
+ len(filename.encode("utf-8")),
86
+ self.MAX_FILENAME_BYTES,
87
+ )
88
+ return None
89
+
90
+ # Guess content type if not provided
91
+ if not content_type:
92
+ content_type, _ = mimetypes.guess_type(filename)
93
+
94
+ # Choose upload method based on size
95
+ if file_size <= self.SINGLE_PART_MAX_SIZE:
96
+ return await self._upload_small_file_from_bytes(
97
+ file_content, filename, content_type, file_size
98
+ )
99
+ else:
100
+ return await self._upload_large_file_from_bytes(
101
+ file_content, filename, content_type, file_size
102
+ )
103
+
104
+ async def get_upload_status(self, file_upload_id: str) -> Optional[str]:
105
+ """
106
+ Get the current status of a file upload.
107
+
108
+ Args:
109
+ file_upload_id: ID of the file upload
110
+
111
+ Returns:
112
+ Status string ("pending", "uploaded", etc.) or None if failed
113
+ """
114
+ upload_info = await self.client.retrieve_file_upload(file_upload_id)
115
+ return upload_info.status if upload_info else None
116
+
117
+ async def wait_for_upload_completion(
118
+ self, file_upload_id: str, timeout_seconds: int = 300, poll_interval: int = 2
119
+ ) -> Optional[FileUploadResponse]:
120
+ """
121
+ Wait for a file upload to complete.
122
+
123
+ Args:
124
+ file_upload_id: ID of the file upload
125
+ timeout_seconds: Maximum time to wait
126
+ poll_interval: Seconds between status checks
127
+
128
+ Returns:
129
+ FileUploadResponse when complete, None if timeout or failed
130
+ """
131
+ start_time = datetime.now()
132
+ timeout_delta = timedelta(seconds=timeout_seconds)
133
+
134
+ while datetime.now() - start_time < timeout_delta:
135
+ upload_info = await self.client.retrieve_file_upload(file_upload_id)
136
+
137
+ if not upload_info:
138
+ self.logger.error(
139
+ "Failed to retrieve upload info for %s", file_upload_id
140
+ )
141
+ return None
142
+
143
+ if upload_info.status == "uploaded":
144
+ self.logger.info("Upload completed: %s", file_upload_id)
145
+ return upload_info
146
+ elif upload_info.status == "failed":
147
+ self.logger.error("Upload failed: %s", file_upload_id)
148
+ return None
149
+
150
+ await asyncio.sleep(poll_interval)
151
+
152
+ self.logger.warning("Upload timeout: %s", file_upload_id)
153
+ return None
154
+
155
+ async def list_recent_uploads(self, limit: int = 50) -> list[FileUploadResponse]:
156
+ """
157
+ List recent file uploads.
158
+
159
+ Args:
160
+ limit: Maximum number of uploads to return
161
+
162
+ Returns:
163
+ List of FileUploadResponse objects
164
+ """
165
+ uploads = []
166
+ start_cursor = None
167
+ remaining = limit
168
+
169
+ while remaining > 0:
170
+ page_size = min(remaining, 100) # API max per request
171
+
172
+ response = await self.client.list_file_uploads(
173
+ page_size=page_size, start_cursor=start_cursor
174
+ )
175
+
176
+ if not response or not response.results:
177
+ break
178
+
179
+ uploads.extend(response.results)
180
+ remaining -= len(response.results)
181
+
182
+ if not response.has_more or not response.next_cursor:
183
+ break
184
+
185
+ start_cursor = response.next_cursor
186
+
187
+ return uploads[:limit]
188
+
189
+ async def _upload_small_file(
190
+ self, file_path: Path, filename: str, file_size: int
191
+ ) -> Optional[FileUploadResponse]:
192
+ """Upload a small file using single-part upload."""
193
+ content_type, _ = mimetypes.guess_type(str(file_path))
194
+
195
+ # Create file upload
196
+ file_upload = await self.client.create_file_upload(
197
+ filename=filename,
198
+ content_type=content_type,
199
+ content_length=file_size,
200
+ mode="single_part",
201
+ )
202
+
203
+ if not file_upload:
204
+ self.logger.error("Failed to create file upload for %s", filename)
205
+ return None
206
+
207
+ # Send file content
208
+ success = await self.client.send_file_from_path(
209
+ file_upload_id=file_upload.id, file_path=file_path
210
+ )
211
+
212
+ if not success:
213
+ self.logger.error("Failed to send file content for %s", filename)
214
+ return None
215
+
216
+ self.logger.info(
217
+ "Successfully uploaded file: %s (ID: %s)", filename, file_upload.id
218
+ )
219
+ return file_upload
220
+
221
+ async def _upload_large_file(
222
+ self, file_path: Path, filename: str, file_size: int
223
+ ) -> Optional[FileUploadResponse]:
224
+ """Upload a large file using multi-part upload."""
225
+ content_type, _ = mimetypes.guess_type(str(file_path))
226
+
227
+ # Create file upload with multi-part mode
228
+ file_upload = await self.client.create_file_upload(
229
+ filename=filename,
230
+ content_type=content_type,
231
+ content_length=file_size,
232
+ mode="multi_part",
233
+ )
234
+
235
+ if not file_upload:
236
+ self.logger.error(
237
+ "Failed to create multi-part file upload for %s", filename
238
+ )
239
+ return None
240
+
241
+ # Upload file in parts
242
+ success = await self._upload_file_parts(file_upload.id, file_path, file_size)
243
+
244
+ if not success:
245
+ self.logger.error("Failed to upload file parts for %s", filename)
246
+ return None
247
+
248
+ # Complete the upload
249
+ completed_upload = await self.client.complete_file_upload(file_upload.id)
250
+
251
+ if not completed_upload:
252
+ self.logger.error("Failed to complete file upload for %s", filename)
253
+ return None
254
+
255
+ self.logger.info(
256
+ "Successfully uploaded large file: %s (ID: %s)", filename, file_upload.id
257
+ )
258
+ return completed_upload
259
+
260
+ async def _upload_small_file_from_bytes(
261
+ self,
262
+ file_content: bytes,
263
+ filename: str,
264
+ content_type: Optional[str],
265
+ file_size: int,
266
+ ) -> Optional[FileUploadResponse]:
267
+ """Upload small file from bytes."""
268
+ # Create file upload
269
+ file_upload = await self.client.create_file_upload(
270
+ filename=filename,
271
+ content_type=content_type,
272
+ content_length=file_size,
273
+ mode="single_part",
274
+ )
275
+
276
+ if not file_upload:
277
+ return None
278
+
279
+ # Send file content
280
+ from io import BytesIO
281
+
282
+ success = await self.client.send_file_upload(
283
+ file_upload_id=file_upload.id,
284
+ file_content=BytesIO(file_content),
285
+ filename=filename,
286
+ )
287
+
288
+ return file_upload if success else None
289
+
290
+ async def _upload_large_file_from_bytes(
291
+ self,
292
+ file_content: bytes,
293
+ filename: str,
294
+ content_type: Optional[str],
295
+ file_size: int,
296
+ ) -> Optional[FileUploadResponse]:
297
+ """Upload large file from bytes using multi-part."""
298
+ # Create file upload
299
+ file_upload = await self.client.create_file_upload(
300
+ filename=filename,
301
+ content_type=content_type,
302
+ content_length=file_size,
303
+ mode="multi_part",
304
+ )
305
+
306
+ if not file_upload:
307
+ return None
308
+
309
+ # Upload in chunks
310
+ success = await self._upload_bytes_parts(file_upload.id, file_content)
311
+
312
+ if not success:
313
+ return None
314
+
315
+ # Complete the upload
316
+ return await self.client.complete_file_upload(file_upload.id)
317
+
318
+ async def _upload_file_parts(
319
+ self, file_upload_id: str, file_path: Path, file_size: int
320
+ ) -> bool:
321
+ """Upload file in parts for multi-part upload."""
322
+ part_number = 1
323
+ total_parts = (
324
+ file_size + self.MULTI_PART_CHUNK_SIZE - 1
325
+ ) // self.MULTI_PART_CHUNK_SIZE
326
+
327
+ try:
328
+ import aiofiles
329
+
330
+ async with aiofiles.open(file_path, "rb") as file:
331
+ while True:
332
+ chunk = await file.read(self.MULTI_PART_CHUNK_SIZE)
333
+ if not chunk:
334
+ break
335
+
336
+ success = await self.client.send_file_upload(
337
+ file_upload_id=file_upload_id,
338
+ file_content=BytesIO(chunk),
339
+ filename=file_path.name,
340
+ part_number=part_number,
341
+ )
342
+
343
+ if not success:
344
+ self.logger.error(
345
+ "Failed to upload part %d/%d", part_number, total_parts
346
+ )
347
+ return False
348
+
349
+ self.logger.debug("Uploaded part %d/%d", part_number, total_parts)
350
+ part_number += 1
351
+
352
+ self.logger.info("Successfully uploaded all %d parts", total_parts)
353
+ return True
354
+
355
+ except Exception as e:
356
+ self.logger.error("Error uploading file parts: %s", e)
357
+ return False
358
+
359
+ async def _upload_bytes_parts(
360
+ self, file_upload_id: str, file_content: bytes
361
+ ) -> bool:
362
+ """Upload bytes in parts for multi-part upload."""
363
+ part_number = 1
364
+ total_parts = (
365
+ len(file_content) + self.MULTI_PART_CHUNK_SIZE - 1
366
+ ) // self.MULTI_PART_CHUNK_SIZE
367
+
368
+ for i in range(0, len(file_content), self.MULTI_PART_CHUNK_SIZE):
369
+ chunk = file_content[i : i + self.MULTI_PART_CHUNK_SIZE]
370
+
371
+ success = await self.client.send_file_upload(
372
+ file_upload_id=file_upload_id,
373
+ file_content=BytesIO(chunk),
374
+ part_number=part_number,
375
+ )
376
+
377
+ if not success:
378
+ self.logger.error(
379
+ "Failed to upload part %d/%d", part_number, total_parts
380
+ )
381
+ return False
382
+
383
+ self.logger.debug("Uploaded part %d/%d", part_number, total_parts)
384
+ part_number += 1
385
+
386
+ self.logger.info("Successfully uploaded all %d parts", total_parts)
387
+ return True
@@ -0,0 +1,80 @@
1
+ class MarkdownWhitespaceProcessor:
2
+ """Helper class for processing markdown whitespace."""
3
+
4
+ def __init__(self):
5
+ self.processed_lines = []
6
+ self.in_code_block = False
7
+ self.current_code_block = []
8
+
9
+ def process_lines(self, lines: list[str]) -> str:
10
+ """Process all lines and return the processed markdown."""
11
+ self.processed_lines = []
12
+ self.in_code_block = False
13
+ self.current_code_block = []
14
+
15
+ for line in lines:
16
+ self._process_single_line(line)
17
+
18
+ # Handle unclosed code block
19
+ if self.in_code_block and self.current_code_block:
20
+ self._finish_code_block()
21
+
22
+ return "\n".join(self.processed_lines)
23
+
24
+ def _process_single_line(self, line: str) -> None:
25
+ """Process a single line of markdown."""
26
+ if self._is_code_block_marker(line):
27
+ self._handle_code_block_marker(line)
28
+ return
29
+
30
+ if self.in_code_block:
31
+ self.current_code_block.append(line)
32
+ return
33
+
34
+ # Regular text - remove leading whitespace
35
+ self.processed_lines.append(line.lstrip())
36
+
37
+ def _handle_code_block_marker(self, line: str) -> None:
38
+ """Handle code block start/end markers."""
39
+ if not self.in_code_block:
40
+ # Starting new code block
41
+ self.in_code_block = True
42
+ self.processed_lines.append(self._normalize_code_block_start(line))
43
+ self.current_code_block = []
44
+ else:
45
+ # Ending code block
46
+ self._finish_code_block()
47
+
48
+ def _finish_code_block(self) -> None:
49
+ """Finish processing current code block."""
50
+ self.processed_lines.extend(
51
+ self._normalize_code_block_content(self.current_code_block)
52
+ )
53
+ self.processed_lines.append("```")
54
+ self.in_code_block = False
55
+
56
+ def _is_code_block_marker(self, line: str) -> bool:
57
+ """Check if line is a code block marker."""
58
+ return line.lstrip().startswith("```")
59
+
60
+ def _normalize_code_block_start(self, line: str) -> str:
61
+ """Normalize code block opening marker."""
62
+ language = line.lstrip().replace("```", "", 1).strip()
63
+ return "```" + language
64
+
65
+ def _normalize_code_block_content(self, code_lines: list[str]) -> list[str]:
66
+ """Normalize code block indentation."""
67
+ if not code_lines:
68
+ return []
69
+
70
+ # Find minimum indentation from non-empty lines
71
+ non_empty_lines = [line for line in code_lines if line.strip()]
72
+ if not non_empty_lines:
73
+ return [""] * len(code_lines)
74
+
75
+ min_indent = min(len(line) - len(line.lstrip()) for line in non_empty_lines)
76
+ if min_indent == 0:
77
+ return code_lines
78
+
79
+ # Remove common indentation
80
+ return ["" if not line.strip() else line[min_indent:] for line in code_lines]
@@ -0,0 +1,87 @@
1
+ """
2
+ Utility functions for handling Notion API text length limitations.
3
+
4
+ This module provides functions to fix text content that exceeds Notion's
5
+ rich_text character limit of 2000 characters per element.
6
+
7
+ Resolves API errors like:
8
+ "validation_error - body.children[79].toggle.children[2].paragraph.rich_text[0].text.content.length
9
+ should be ≤ 2000, instead was 2162."
10
+ """
11
+
12
+ import re
13
+ import logging
14
+ from typing import Any
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ def fix_blocks_content_length(
20
+ blocks: list[dict[str, Any]], max_text_length: int = 1900
21
+ ) -> list[dict[str, Any]]:
22
+ """Check each block and ensure text content doesn't exceed Notion's limit."""
23
+ return [_fix_single_block_content(block, max_text_length) for block in blocks]
24
+
25
+
26
+ def _fix_single_block_content(
27
+ block: dict[str, Any], max_text_length: int
28
+ ) -> dict[str, Any]:
29
+ """Fix content length in a single block and its children recursively."""
30
+ block_copy = block.copy()
31
+
32
+ block_type = block.get("type")
33
+ if not block_type:
34
+ return block_copy
35
+
36
+ content = block.get(block_type)
37
+ if not content:
38
+ return block_copy
39
+
40
+ if "rich_text" in content:
41
+ _fix_rich_text_content(block_copy, block_type, content, max_text_length)
42
+
43
+ if "children" in content and content["children"]:
44
+ block_copy[block_type]["children"] = [
45
+ _fix_single_block_content(child, max_text_length)
46
+ for child in content["children"]
47
+ ]
48
+
49
+ return block_copy
50
+
51
+
52
+ def _fix_rich_text_content(
53
+ block_copy: dict[str, Any],
54
+ block_type: str,
55
+ content: dict[str, Any],
56
+ max_text_length: int,
57
+ ) -> None:
58
+ """Fix rich text content that exceeds the length limit."""
59
+ rich_text = content["rich_text"]
60
+ for i, text_item in enumerate(rich_text):
61
+ if "text" not in text_item or "content" not in text_item["text"]:
62
+ continue
63
+
64
+ text_content = text_item["text"]["content"]
65
+ if len(text_content) <= max_text_length:
66
+ continue
67
+
68
+ logger.warning(
69
+ "Truncating text content from %d to %d chars",
70
+ len(text_content),
71
+ max_text_length,
72
+ )
73
+ block_copy[block_type]["rich_text"][i]["text"]["content"] = text_content[
74
+ :max_text_length
75
+ ]
76
+
77
+
78
+ def split_to_paragraphs(markdown_text: str) -> list[str]:
79
+ """Split markdown into paragraphs."""
80
+ paragraphs = re.split(r"\n\s*\n", markdown_text)
81
+ return [p for p in paragraphs if p.strip()]
82
+
83
+
84
+ def split_to_sentences(paragraph: str) -> list[str]:
85
+ """Split a paragraph into sentences."""
86
+ sentences = re.split(r"(?<=[.!?])\s+", paragraph)
87
+ return [s for s in sentences if s.strip()]
@@ -1,4 +1,4 @@
1
- from typing import Any, Dict, List, Optional
1
+ from typing import Any, Dict, Optional
2
2
 
3
3
  from notionary.blocks.registry.block_registry import BlockRegistry
4
4
 
@@ -27,7 +27,7 @@ class PageContentRetriever(LoggingMixin):
27
27
 
28
28
  async def _get_page_blocks_with_children(
29
29
  self, parent_id: Optional[str] = None
30
- ) -> List[Dict[str, Any]]:
30
+ ) -> list[Dict[str, Any]]:
31
31
  blocks = (
32
32
  await self.client.get_page_blocks(page_id=self.page_id)
33
33
  if parent_id is None