notionary 0.2.16__py3-none-any.whl → 0.2.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- notionary/__init__.py +10 -5
- notionary/base_notion_client.py +18 -7
- notionary/blocks/__init__.py +55 -24
- notionary/blocks/audio/__init__.py +7 -0
- notionary/blocks/audio/audio_element.py +152 -0
- notionary/blocks/audio/audio_markdown_node.py +29 -0
- notionary/blocks/audio/audio_models.py +59 -0
- notionary/blocks/bookmark/__init__.py +7 -0
- notionary/blocks/{bookmark_element.py → bookmark/bookmark_element.py} +20 -65
- notionary/blocks/bookmark/bookmark_markdown_node.py +43 -0
- notionary/blocks/bulleted_list/__init__.py +7 -0
- notionary/blocks/{bulleted_list_element.py → bulleted_list/bulleted_list_element.py} +7 -3
- notionary/blocks/bulleted_list/bulleted_list_markdown_node.py +33 -0
- notionary/blocks/bulleted_list/bulleted_list_models.py +0 -0
- notionary/blocks/callout/__init__.py +7 -0
- notionary/blocks/callout/callout_element.py +132 -0
- notionary/blocks/callout/callout_markdown_node.py +31 -0
- notionary/blocks/callout/callout_models.py +0 -0
- notionary/blocks/code/__init__.py +7 -0
- notionary/blocks/{code_block_element.py → code/code_element.py} +72 -40
- notionary/blocks/code/code_markdown_node.py +43 -0
- notionary/blocks/code/code_models.py +0 -0
- notionary/blocks/column/__init__.py +5 -0
- notionary/blocks/{column_element.py → column/column_element.py} +24 -55
- notionary/blocks/column/column_models.py +0 -0
- notionary/blocks/divider/__init__.py +7 -0
- notionary/blocks/{divider_element.py → divider/divider_element.py} +11 -3
- notionary/blocks/divider/divider_markdown_node.py +24 -0
- notionary/blocks/divider/divider_models.py +0 -0
- notionary/blocks/document/__init__.py +7 -0
- notionary/blocks/document/document_element.py +102 -0
- notionary/blocks/document/document_markdown_node.py +31 -0
- notionary/blocks/document/document_models.py +0 -0
- notionary/blocks/embed/__init__.py +7 -0
- notionary/blocks/{embed_element.py → embed/embed_element.py} +50 -32
- notionary/blocks/embed/embed_markdown_node.py +30 -0
- notionary/blocks/embed/embed_models.py +0 -0
- notionary/blocks/heading/__init__.py +7 -0
- notionary/blocks/{heading_element.py → heading/heading_element.py} +25 -17
- notionary/blocks/heading/heading_markdown_node.py +29 -0
- notionary/blocks/heading/heading_models.py +0 -0
- notionary/blocks/image/__init__.py +7 -0
- notionary/blocks/{image_element.py → image/image_element.py} +62 -42
- notionary/blocks/image/image_markdown_node.py +33 -0
- notionary/blocks/image/image_models.py +0 -0
- notionary/blocks/markdown_builder.py +356 -0
- notionary/blocks/markdown_node.py +29 -0
- notionary/blocks/mention/__init__.py +7 -0
- notionary/blocks/{mention_element.py → mention/mention_element.py} +6 -2
- notionary/blocks/mention/mention_markdown_node.py +38 -0
- notionary/blocks/mention/mention_models.py +0 -0
- notionary/blocks/numbered_list/__init__.py +7 -0
- notionary/blocks/{numbered_list_element.py → numbered_list/numbered_list_element.py} +10 -6
- notionary/blocks/numbered_list/numbered_list_markdown_node.py +29 -0
- notionary/blocks/numbered_list/numbered_list_models.py +0 -0
- notionary/blocks/paragraph/__init__.py +7 -0
- notionary/blocks/{paragraph_element.py → paragraph/paragraph_element.py} +7 -3
- notionary/blocks/paragraph/paragraph_markdown_node.py +25 -0
- notionary/blocks/paragraph/paragraph_models.py +0 -0
- notionary/blocks/quote/__init__.py +7 -0
- notionary/blocks/quote/quote_element.py +92 -0
- notionary/blocks/quote/quote_markdown_node.py +23 -0
- notionary/blocks/quote/quote_models.py +0 -0
- notionary/blocks/registry/block_registry.py +17 -3
- notionary/blocks/registry/block_registry_builder.py +90 -178
- notionary/blocks/shared/__init__.py +0 -0
- notionary/blocks/shared/block_client.py +256 -0
- notionary/blocks/shared/models.py +710 -0
- notionary/blocks/{notion_block_element.py → shared/notion_block_element.py} +8 -5
- notionary/blocks/{text_inline_formatter.py → shared/text_inline_formatter.py} +14 -14
- notionary/blocks/shared/text_inline_formatter_new.py +139 -0
- notionary/blocks/table/__init__.py +7 -0
- notionary/blocks/{table_element.py → table/table_element.py} +23 -11
- notionary/blocks/table/table_markdown_node.py +40 -0
- notionary/blocks/table/table_models.py +0 -0
- notionary/blocks/todo/__init__.py +7 -0
- notionary/blocks/{todo_element.py → todo/todo_element.py} +8 -4
- notionary/blocks/todo/todo_markdown_node.py +31 -0
- notionary/blocks/todo/todo_models.py +0 -0
- notionary/blocks/toggle/__init__.py +4 -0
- notionary/blocks/{toggle_element.py → toggle/toggle_element.py} +7 -3
- notionary/blocks/toggle/toggle_markdown_node.py +35 -0
- notionary/blocks/toggle/toggle_models.py +0 -0
- notionary/blocks/toggleable_heading/__init__.py +9 -0
- notionary/blocks/{toggleable_heading_element.py → toggleable_heading/toggleable_heading_element.py} +8 -4
- notionary/blocks/toggleable_heading/toggleable_heading_markdown_node.py +43 -0
- notionary/blocks/toggleable_heading/toggleable_heading_models.py +0 -0
- notionary/blocks/video/__init__.py +7 -0
- notionary/blocks/{video_element.py → video/video_element.py} +82 -57
- notionary/blocks/video/video_markdown_node.py +30 -0
- notionary/database/__init__.py +4 -0
- notionary/database/database.py +481 -0
- notionary/database/{filter_builder.py → database_filter_builder.py} +27 -29
- notionary/database/{notion_database_provider.py → database_provider.py} +4 -4
- notionary/database/notion_database.py +45 -18
- notionary/file_upload/__init__.py +7 -0
- notionary/file_upload/client.py +254 -0
- notionary/file_upload/models.py +60 -0
- notionary/file_upload/notion_file_upload.py +387 -0
- notionary/page/content/markdown_whitespace_processor.py +80 -0
- notionary/page/content/notion_text_length_utils.py +87 -0
- notionary/page/content/page_content_retriever.py +2 -2
- notionary/page/content/page_content_writer.py +97 -148
- notionary/page/formatting/line_processor.py +153 -0
- notionary/page/formatting/markdown_to_notion_converter.py +103 -424
- notionary/page/notion_page.py +13 -14
- notionary/page/notion_to_markdown_converter.py +9 -13
- notionary/telemetry/views.py +15 -6
- notionary/user/__init__.py +11 -0
- notionary/user/base_notion_user.py +52 -0
- notionary/user/client.py +129 -0
- notionary/user/models.py +83 -0
- notionary/user/notion_bot_user.py +227 -0
- notionary/user/notion_user.py +256 -0
- notionary/user/notion_user_manager.py +173 -0
- notionary/user/notion_user_provider.py +1 -0
- notionary/util/__init__.py +3 -5
- notionary/util/factory_decorator.py +0 -33
- notionary/util/factory_only.py +37 -0
- notionary/util/fuzzy.py +74 -0
- notionary/util/logging_mixin.py +12 -12
- notionary/workspace.py +38 -3
- {notionary-0.2.16.dist-info → notionary-0.2.18.dist-info}/METADATA +2 -1
- notionary-0.2.18.dist-info/RECORD +149 -0
- notionary/blocks/audio_element.py +0 -144
- notionary/blocks/callout_element.py +0 -122
- notionary/blocks/notion_block_client.py +0 -26
- notionary/blocks/qoute_element.py +0 -169
- notionary/page/content/notion_page_content_chunker.py +0 -84
- notionary/page/formatting/spacer_rules.py +0 -483
- notionary/util/fuzzy_matcher.py +0 -82
- notionary-0.2.16.dist-info/RECORD +0 -71
- /notionary/{elements/__init__.py → blocks/bookmark/bookmark_models.py} +0 -0
- /notionary/database/{database_exceptions.py → exceptions.py} +0 -0
- /notionary/util/{singleton_decorator.py → singleton.py} +0 -0
- {notionary-0.2.16.dist-info → notionary-0.2.18.dist-info}/LICENSE +0 -0
- {notionary-0.2.16.dist-info → notionary-0.2.18.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,387 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import mimetypes
|
|
3
|
+
from typing import Optional
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from datetime import datetime, timedelta
|
|
6
|
+
from io import BytesIO
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
from notionary.file_upload.models import FileUploadResponse
|
|
10
|
+
from notionary.util import LoggingMixin
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class NotionFileUpload(LoggingMixin):
|
|
14
|
+
"""
|
|
15
|
+
High-level service for managing Notion file uploads.
|
|
16
|
+
Handles both small file (single-part) and large file (multi-part) uploads.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
# Notion's file size limits
|
|
20
|
+
SINGLE_PART_MAX_SIZE = 20 * 1024 * 1024 # 20MB
|
|
21
|
+
MULTI_PART_CHUNK_SIZE = 10 * 1024 * 1024 # 10MB per part
|
|
22
|
+
MAX_FILENAME_BYTES = 900
|
|
23
|
+
|
|
24
|
+
def __init__(self, token: Optional[str] = None):
|
|
25
|
+
"""Initialize the file upload service."""
|
|
26
|
+
from notionary.file_upload import NotionFileUploadClient
|
|
27
|
+
|
|
28
|
+
self.client = NotionFileUploadClient(token=token)
|
|
29
|
+
|
|
30
|
+
async def upload_file(
|
|
31
|
+
self, file_path: Path, filename: Optional[str] = None
|
|
32
|
+
) -> Optional[FileUploadResponse]:
|
|
33
|
+
"""
|
|
34
|
+
Upload a file to Notion, automatically choosing single-part or multi-part based on size.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
file_path: Path to the file to upload
|
|
38
|
+
filename: Optional custom filename (defaults to file_path.name)
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
FileUploadResponse if successful, None otherwise
|
|
42
|
+
"""
|
|
43
|
+
if not file_path.exists():
|
|
44
|
+
self.logger.error("File does not exist: %s", file_path)
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
file_size = file_path.stat().st_size
|
|
48
|
+
filename = filename or file_path.name
|
|
49
|
+
|
|
50
|
+
# Validate filename length
|
|
51
|
+
if len(filename.encode("utf-8")) > self.MAX_FILENAME_BYTES:
|
|
52
|
+
self.logger.error(
|
|
53
|
+
"Filename too long: %d bytes (max %d)",
|
|
54
|
+
len(filename.encode("utf-8")),
|
|
55
|
+
self.MAX_FILENAME_BYTES,
|
|
56
|
+
)
|
|
57
|
+
return None
|
|
58
|
+
|
|
59
|
+
# Choose upload method based on file size
|
|
60
|
+
if file_size <= self.SINGLE_PART_MAX_SIZE:
|
|
61
|
+
return await self._upload_small_file(file_path, filename, file_size)
|
|
62
|
+
else:
|
|
63
|
+
return await self._upload_large_file(file_path, filename, file_size)
|
|
64
|
+
|
|
65
|
+
async def upload_from_bytes(
|
|
66
|
+
self, file_content: bytes, filename: str, content_type: Optional[str] = None
|
|
67
|
+
) -> Optional[FileUploadResponse]:
|
|
68
|
+
"""
|
|
69
|
+
Upload file content from bytes.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
file_content: File content as bytes
|
|
73
|
+
filename: Name for the file
|
|
74
|
+
content_type: Optional MIME type
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
FileUploadResponse if successful, None otherwise
|
|
78
|
+
"""
|
|
79
|
+
file_size = len(file_content)
|
|
80
|
+
|
|
81
|
+
# Validate filename length
|
|
82
|
+
if len(filename.encode("utf-8")) > self.MAX_FILENAME_BYTES:
|
|
83
|
+
self.logger.error(
|
|
84
|
+
"Filename too long: %d bytes (max %d)",
|
|
85
|
+
len(filename.encode("utf-8")),
|
|
86
|
+
self.MAX_FILENAME_BYTES,
|
|
87
|
+
)
|
|
88
|
+
return None
|
|
89
|
+
|
|
90
|
+
# Guess content type if not provided
|
|
91
|
+
if not content_type:
|
|
92
|
+
content_type, _ = mimetypes.guess_type(filename)
|
|
93
|
+
|
|
94
|
+
# Choose upload method based on size
|
|
95
|
+
if file_size <= self.SINGLE_PART_MAX_SIZE:
|
|
96
|
+
return await self._upload_small_file_from_bytes(
|
|
97
|
+
file_content, filename, content_type, file_size
|
|
98
|
+
)
|
|
99
|
+
else:
|
|
100
|
+
return await self._upload_large_file_from_bytes(
|
|
101
|
+
file_content, filename, content_type, file_size
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
async def get_upload_status(self, file_upload_id: str) -> Optional[str]:
|
|
105
|
+
"""
|
|
106
|
+
Get the current status of a file upload.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
file_upload_id: ID of the file upload
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
Status string ("pending", "uploaded", etc.) or None if failed
|
|
113
|
+
"""
|
|
114
|
+
upload_info = await self.client.retrieve_file_upload(file_upload_id)
|
|
115
|
+
return upload_info.status if upload_info else None
|
|
116
|
+
|
|
117
|
+
async def wait_for_upload_completion(
|
|
118
|
+
self, file_upload_id: str, timeout_seconds: int = 300, poll_interval: int = 2
|
|
119
|
+
) -> Optional[FileUploadResponse]:
|
|
120
|
+
"""
|
|
121
|
+
Wait for a file upload to complete.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
file_upload_id: ID of the file upload
|
|
125
|
+
timeout_seconds: Maximum time to wait
|
|
126
|
+
poll_interval: Seconds between status checks
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
FileUploadResponse when complete, None if timeout or failed
|
|
130
|
+
"""
|
|
131
|
+
start_time = datetime.now()
|
|
132
|
+
timeout_delta = timedelta(seconds=timeout_seconds)
|
|
133
|
+
|
|
134
|
+
while datetime.now() - start_time < timeout_delta:
|
|
135
|
+
upload_info = await self.client.retrieve_file_upload(file_upload_id)
|
|
136
|
+
|
|
137
|
+
if not upload_info:
|
|
138
|
+
self.logger.error(
|
|
139
|
+
"Failed to retrieve upload info for %s", file_upload_id
|
|
140
|
+
)
|
|
141
|
+
return None
|
|
142
|
+
|
|
143
|
+
if upload_info.status == "uploaded":
|
|
144
|
+
self.logger.info("Upload completed: %s", file_upload_id)
|
|
145
|
+
return upload_info
|
|
146
|
+
elif upload_info.status == "failed":
|
|
147
|
+
self.logger.error("Upload failed: %s", file_upload_id)
|
|
148
|
+
return None
|
|
149
|
+
|
|
150
|
+
await asyncio.sleep(poll_interval)
|
|
151
|
+
|
|
152
|
+
self.logger.warning("Upload timeout: %s", file_upload_id)
|
|
153
|
+
return None
|
|
154
|
+
|
|
155
|
+
async def list_recent_uploads(self, limit: int = 50) -> list[FileUploadResponse]:
|
|
156
|
+
"""
|
|
157
|
+
List recent file uploads.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
limit: Maximum number of uploads to return
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
List of FileUploadResponse objects
|
|
164
|
+
"""
|
|
165
|
+
uploads = []
|
|
166
|
+
start_cursor = None
|
|
167
|
+
remaining = limit
|
|
168
|
+
|
|
169
|
+
while remaining > 0:
|
|
170
|
+
page_size = min(remaining, 100) # API max per request
|
|
171
|
+
|
|
172
|
+
response = await self.client.list_file_uploads(
|
|
173
|
+
page_size=page_size, start_cursor=start_cursor
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
if not response or not response.results:
|
|
177
|
+
break
|
|
178
|
+
|
|
179
|
+
uploads.extend(response.results)
|
|
180
|
+
remaining -= len(response.results)
|
|
181
|
+
|
|
182
|
+
if not response.has_more or not response.next_cursor:
|
|
183
|
+
break
|
|
184
|
+
|
|
185
|
+
start_cursor = response.next_cursor
|
|
186
|
+
|
|
187
|
+
return uploads[:limit]
|
|
188
|
+
|
|
189
|
+
async def _upload_small_file(
|
|
190
|
+
self, file_path: Path, filename: str, file_size: int
|
|
191
|
+
) -> Optional[FileUploadResponse]:
|
|
192
|
+
"""Upload a small file using single-part upload."""
|
|
193
|
+
content_type, _ = mimetypes.guess_type(str(file_path))
|
|
194
|
+
|
|
195
|
+
# Create file upload
|
|
196
|
+
file_upload = await self.client.create_file_upload(
|
|
197
|
+
filename=filename,
|
|
198
|
+
content_type=content_type,
|
|
199
|
+
content_length=file_size,
|
|
200
|
+
mode="single_part",
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
if not file_upload:
|
|
204
|
+
self.logger.error("Failed to create file upload for %s", filename)
|
|
205
|
+
return None
|
|
206
|
+
|
|
207
|
+
# Send file content
|
|
208
|
+
success = await self.client.send_file_from_path(
|
|
209
|
+
file_upload_id=file_upload.id, file_path=file_path
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
if not success:
|
|
213
|
+
self.logger.error("Failed to send file content for %s", filename)
|
|
214
|
+
return None
|
|
215
|
+
|
|
216
|
+
self.logger.info(
|
|
217
|
+
"Successfully uploaded file: %s (ID: %s)", filename, file_upload.id
|
|
218
|
+
)
|
|
219
|
+
return file_upload
|
|
220
|
+
|
|
221
|
+
async def _upload_large_file(
|
|
222
|
+
self, file_path: Path, filename: str, file_size: int
|
|
223
|
+
) -> Optional[FileUploadResponse]:
|
|
224
|
+
"""Upload a large file using multi-part upload."""
|
|
225
|
+
content_type, _ = mimetypes.guess_type(str(file_path))
|
|
226
|
+
|
|
227
|
+
# Create file upload with multi-part mode
|
|
228
|
+
file_upload = await self.client.create_file_upload(
|
|
229
|
+
filename=filename,
|
|
230
|
+
content_type=content_type,
|
|
231
|
+
content_length=file_size,
|
|
232
|
+
mode="multi_part",
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
if not file_upload:
|
|
236
|
+
self.logger.error(
|
|
237
|
+
"Failed to create multi-part file upload for %s", filename
|
|
238
|
+
)
|
|
239
|
+
return None
|
|
240
|
+
|
|
241
|
+
# Upload file in parts
|
|
242
|
+
success = await self._upload_file_parts(file_upload.id, file_path, file_size)
|
|
243
|
+
|
|
244
|
+
if not success:
|
|
245
|
+
self.logger.error("Failed to upload file parts for %s", filename)
|
|
246
|
+
return None
|
|
247
|
+
|
|
248
|
+
# Complete the upload
|
|
249
|
+
completed_upload = await self.client.complete_file_upload(file_upload.id)
|
|
250
|
+
|
|
251
|
+
if not completed_upload:
|
|
252
|
+
self.logger.error("Failed to complete file upload for %s", filename)
|
|
253
|
+
return None
|
|
254
|
+
|
|
255
|
+
self.logger.info(
|
|
256
|
+
"Successfully uploaded large file: %s (ID: %s)", filename, file_upload.id
|
|
257
|
+
)
|
|
258
|
+
return completed_upload
|
|
259
|
+
|
|
260
|
+
async def _upload_small_file_from_bytes(
|
|
261
|
+
self,
|
|
262
|
+
file_content: bytes,
|
|
263
|
+
filename: str,
|
|
264
|
+
content_type: Optional[str],
|
|
265
|
+
file_size: int,
|
|
266
|
+
) -> Optional[FileUploadResponse]:
|
|
267
|
+
"""Upload small file from bytes."""
|
|
268
|
+
# Create file upload
|
|
269
|
+
file_upload = await self.client.create_file_upload(
|
|
270
|
+
filename=filename,
|
|
271
|
+
content_type=content_type,
|
|
272
|
+
content_length=file_size,
|
|
273
|
+
mode="single_part",
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
if not file_upload:
|
|
277
|
+
return None
|
|
278
|
+
|
|
279
|
+
# Send file content
|
|
280
|
+
from io import BytesIO
|
|
281
|
+
|
|
282
|
+
success = await self.client.send_file_upload(
|
|
283
|
+
file_upload_id=file_upload.id,
|
|
284
|
+
file_content=BytesIO(file_content),
|
|
285
|
+
filename=filename,
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
return file_upload if success else None
|
|
289
|
+
|
|
290
|
+
async def _upload_large_file_from_bytes(
|
|
291
|
+
self,
|
|
292
|
+
file_content: bytes,
|
|
293
|
+
filename: str,
|
|
294
|
+
content_type: Optional[str],
|
|
295
|
+
file_size: int,
|
|
296
|
+
) -> Optional[FileUploadResponse]:
|
|
297
|
+
"""Upload large file from bytes using multi-part."""
|
|
298
|
+
# Create file upload
|
|
299
|
+
file_upload = await self.client.create_file_upload(
|
|
300
|
+
filename=filename,
|
|
301
|
+
content_type=content_type,
|
|
302
|
+
content_length=file_size,
|
|
303
|
+
mode="multi_part",
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
if not file_upload:
|
|
307
|
+
return None
|
|
308
|
+
|
|
309
|
+
# Upload in chunks
|
|
310
|
+
success = await self._upload_bytes_parts(file_upload.id, file_content)
|
|
311
|
+
|
|
312
|
+
if not success:
|
|
313
|
+
return None
|
|
314
|
+
|
|
315
|
+
# Complete the upload
|
|
316
|
+
return await self.client.complete_file_upload(file_upload.id)
|
|
317
|
+
|
|
318
|
+
async def _upload_file_parts(
|
|
319
|
+
self, file_upload_id: str, file_path: Path, file_size: int
|
|
320
|
+
) -> bool:
|
|
321
|
+
"""Upload file in parts for multi-part upload."""
|
|
322
|
+
part_number = 1
|
|
323
|
+
total_parts = (
|
|
324
|
+
file_size + self.MULTI_PART_CHUNK_SIZE - 1
|
|
325
|
+
) // self.MULTI_PART_CHUNK_SIZE
|
|
326
|
+
|
|
327
|
+
try:
|
|
328
|
+
import aiofiles
|
|
329
|
+
|
|
330
|
+
async with aiofiles.open(file_path, "rb") as file:
|
|
331
|
+
while True:
|
|
332
|
+
chunk = await file.read(self.MULTI_PART_CHUNK_SIZE)
|
|
333
|
+
if not chunk:
|
|
334
|
+
break
|
|
335
|
+
|
|
336
|
+
success = await self.client.send_file_upload(
|
|
337
|
+
file_upload_id=file_upload_id,
|
|
338
|
+
file_content=BytesIO(chunk),
|
|
339
|
+
filename=file_path.name,
|
|
340
|
+
part_number=part_number,
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
if not success:
|
|
344
|
+
self.logger.error(
|
|
345
|
+
"Failed to upload part %d/%d", part_number, total_parts
|
|
346
|
+
)
|
|
347
|
+
return False
|
|
348
|
+
|
|
349
|
+
self.logger.debug("Uploaded part %d/%d", part_number, total_parts)
|
|
350
|
+
part_number += 1
|
|
351
|
+
|
|
352
|
+
self.logger.info("Successfully uploaded all %d parts", total_parts)
|
|
353
|
+
return True
|
|
354
|
+
|
|
355
|
+
except Exception as e:
|
|
356
|
+
self.logger.error("Error uploading file parts: %s", e)
|
|
357
|
+
return False
|
|
358
|
+
|
|
359
|
+
async def _upload_bytes_parts(
|
|
360
|
+
self, file_upload_id: str, file_content: bytes
|
|
361
|
+
) -> bool:
|
|
362
|
+
"""Upload bytes in parts for multi-part upload."""
|
|
363
|
+
part_number = 1
|
|
364
|
+
total_parts = (
|
|
365
|
+
len(file_content) + self.MULTI_PART_CHUNK_SIZE - 1
|
|
366
|
+
) // self.MULTI_PART_CHUNK_SIZE
|
|
367
|
+
|
|
368
|
+
for i in range(0, len(file_content), self.MULTI_PART_CHUNK_SIZE):
|
|
369
|
+
chunk = file_content[i : i + self.MULTI_PART_CHUNK_SIZE]
|
|
370
|
+
|
|
371
|
+
success = await self.client.send_file_upload(
|
|
372
|
+
file_upload_id=file_upload_id,
|
|
373
|
+
file_content=BytesIO(chunk),
|
|
374
|
+
part_number=part_number,
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
if not success:
|
|
378
|
+
self.logger.error(
|
|
379
|
+
"Failed to upload part %d/%d", part_number, total_parts
|
|
380
|
+
)
|
|
381
|
+
return False
|
|
382
|
+
|
|
383
|
+
self.logger.debug("Uploaded part %d/%d", part_number, total_parts)
|
|
384
|
+
part_number += 1
|
|
385
|
+
|
|
386
|
+
self.logger.info("Successfully uploaded all %d parts", total_parts)
|
|
387
|
+
return True
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
class MarkdownWhitespaceProcessor:
|
|
2
|
+
"""Helper class for processing markdown whitespace."""
|
|
3
|
+
|
|
4
|
+
def __init__(self):
|
|
5
|
+
self.processed_lines = []
|
|
6
|
+
self.in_code_block = False
|
|
7
|
+
self.current_code_block = []
|
|
8
|
+
|
|
9
|
+
def process_lines(self, lines: list[str]) -> str:
|
|
10
|
+
"""Process all lines and return the processed markdown."""
|
|
11
|
+
self.processed_lines = []
|
|
12
|
+
self.in_code_block = False
|
|
13
|
+
self.current_code_block = []
|
|
14
|
+
|
|
15
|
+
for line in lines:
|
|
16
|
+
self._process_single_line(line)
|
|
17
|
+
|
|
18
|
+
# Handle unclosed code block
|
|
19
|
+
if self.in_code_block and self.current_code_block:
|
|
20
|
+
self._finish_code_block()
|
|
21
|
+
|
|
22
|
+
return "\n".join(self.processed_lines)
|
|
23
|
+
|
|
24
|
+
def _process_single_line(self, line: str) -> None:
|
|
25
|
+
"""Process a single line of markdown."""
|
|
26
|
+
if self._is_code_block_marker(line):
|
|
27
|
+
self._handle_code_block_marker(line)
|
|
28
|
+
return
|
|
29
|
+
|
|
30
|
+
if self.in_code_block:
|
|
31
|
+
self.current_code_block.append(line)
|
|
32
|
+
return
|
|
33
|
+
|
|
34
|
+
# Regular text - remove leading whitespace
|
|
35
|
+
self.processed_lines.append(line.lstrip())
|
|
36
|
+
|
|
37
|
+
def _handle_code_block_marker(self, line: str) -> None:
|
|
38
|
+
"""Handle code block start/end markers."""
|
|
39
|
+
if not self.in_code_block:
|
|
40
|
+
# Starting new code block
|
|
41
|
+
self.in_code_block = True
|
|
42
|
+
self.processed_lines.append(self._normalize_code_block_start(line))
|
|
43
|
+
self.current_code_block = []
|
|
44
|
+
else:
|
|
45
|
+
# Ending code block
|
|
46
|
+
self._finish_code_block()
|
|
47
|
+
|
|
48
|
+
def _finish_code_block(self) -> None:
|
|
49
|
+
"""Finish processing current code block."""
|
|
50
|
+
self.processed_lines.extend(
|
|
51
|
+
self._normalize_code_block_content(self.current_code_block)
|
|
52
|
+
)
|
|
53
|
+
self.processed_lines.append("```")
|
|
54
|
+
self.in_code_block = False
|
|
55
|
+
|
|
56
|
+
def _is_code_block_marker(self, line: str) -> bool:
|
|
57
|
+
"""Check if line is a code block marker."""
|
|
58
|
+
return line.lstrip().startswith("```")
|
|
59
|
+
|
|
60
|
+
def _normalize_code_block_start(self, line: str) -> str:
|
|
61
|
+
"""Normalize code block opening marker."""
|
|
62
|
+
language = line.lstrip().replace("```", "", 1).strip()
|
|
63
|
+
return "```" + language
|
|
64
|
+
|
|
65
|
+
def _normalize_code_block_content(self, code_lines: list[str]) -> list[str]:
|
|
66
|
+
"""Normalize code block indentation."""
|
|
67
|
+
if not code_lines:
|
|
68
|
+
return []
|
|
69
|
+
|
|
70
|
+
# Find minimum indentation from non-empty lines
|
|
71
|
+
non_empty_lines = [line for line in code_lines if line.strip()]
|
|
72
|
+
if not non_empty_lines:
|
|
73
|
+
return [""] * len(code_lines)
|
|
74
|
+
|
|
75
|
+
min_indent = min(len(line) - len(line.lstrip()) for line in non_empty_lines)
|
|
76
|
+
if min_indent == 0:
|
|
77
|
+
return code_lines
|
|
78
|
+
|
|
79
|
+
# Remove common indentation
|
|
80
|
+
return ["" if not line.strip() else line[min_indent:] for line in code_lines]
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utility functions for handling Notion API text length limitations.
|
|
3
|
+
|
|
4
|
+
This module provides functions to fix text content that exceeds Notion's
|
|
5
|
+
rich_text character limit of 2000 characters per element.
|
|
6
|
+
|
|
7
|
+
Resolves API errors like:
|
|
8
|
+
"validation_error - body.children[79].toggle.children[2].paragraph.rich_text[0].text.content.length
|
|
9
|
+
should be ≤ 2000, instead was 2162."
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import re
|
|
13
|
+
import logging
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def fix_blocks_content_length(
|
|
20
|
+
blocks: list[dict[str, Any]], max_text_length: int = 1900
|
|
21
|
+
) -> list[dict[str, Any]]:
|
|
22
|
+
"""Check each block and ensure text content doesn't exceed Notion's limit."""
|
|
23
|
+
return [_fix_single_block_content(block, max_text_length) for block in blocks]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _fix_single_block_content(
|
|
27
|
+
block: dict[str, Any], max_text_length: int
|
|
28
|
+
) -> dict[str, Any]:
|
|
29
|
+
"""Fix content length in a single block and its children recursively."""
|
|
30
|
+
block_copy = block.copy()
|
|
31
|
+
|
|
32
|
+
block_type = block.get("type")
|
|
33
|
+
if not block_type:
|
|
34
|
+
return block_copy
|
|
35
|
+
|
|
36
|
+
content = block.get(block_type)
|
|
37
|
+
if not content:
|
|
38
|
+
return block_copy
|
|
39
|
+
|
|
40
|
+
if "rich_text" in content:
|
|
41
|
+
_fix_rich_text_content(block_copy, block_type, content, max_text_length)
|
|
42
|
+
|
|
43
|
+
if "children" in content and content["children"]:
|
|
44
|
+
block_copy[block_type]["children"] = [
|
|
45
|
+
_fix_single_block_content(child, max_text_length)
|
|
46
|
+
for child in content["children"]
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
return block_copy
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _fix_rich_text_content(
|
|
53
|
+
block_copy: dict[str, Any],
|
|
54
|
+
block_type: str,
|
|
55
|
+
content: dict[str, Any],
|
|
56
|
+
max_text_length: int,
|
|
57
|
+
) -> None:
|
|
58
|
+
"""Fix rich text content that exceeds the length limit."""
|
|
59
|
+
rich_text = content["rich_text"]
|
|
60
|
+
for i, text_item in enumerate(rich_text):
|
|
61
|
+
if "text" not in text_item or "content" not in text_item["text"]:
|
|
62
|
+
continue
|
|
63
|
+
|
|
64
|
+
text_content = text_item["text"]["content"]
|
|
65
|
+
if len(text_content) <= max_text_length:
|
|
66
|
+
continue
|
|
67
|
+
|
|
68
|
+
logger.warning(
|
|
69
|
+
"Truncating text content from %d to %d chars",
|
|
70
|
+
len(text_content),
|
|
71
|
+
max_text_length,
|
|
72
|
+
)
|
|
73
|
+
block_copy[block_type]["rich_text"][i]["text"]["content"] = text_content[
|
|
74
|
+
:max_text_length
|
|
75
|
+
]
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def split_to_paragraphs(markdown_text: str) -> list[str]:
|
|
79
|
+
"""Split markdown into paragraphs."""
|
|
80
|
+
paragraphs = re.split(r"\n\s*\n", markdown_text)
|
|
81
|
+
return [p for p in paragraphs if p.strip()]
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def split_to_sentences(paragraph: str) -> list[str]:
|
|
85
|
+
"""Split a paragraph into sentences."""
|
|
86
|
+
sentences = re.split(r"(?<=[.!?])\s+", paragraph)
|
|
87
|
+
return [s for s in sentences if s.strip()]
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any, Dict,
|
|
1
|
+
from typing import Any, Dict, Optional
|
|
2
2
|
|
|
3
3
|
from notionary.blocks.registry.block_registry import BlockRegistry
|
|
4
4
|
|
|
@@ -27,7 +27,7 @@ class PageContentRetriever(LoggingMixin):
|
|
|
27
27
|
|
|
28
28
|
async def _get_page_blocks_with_children(
|
|
29
29
|
self, parent_id: Optional[str] = None
|
|
30
|
-
) ->
|
|
30
|
+
) -> list[Dict[str, Any]]:
|
|
31
31
|
blocks = (
|
|
32
32
|
await self.client.get_page_blocks(page_id=self.page_id)
|
|
33
33
|
if parent_id is None
|