notionary 0.1.11__py3-none-any.whl → 0.1.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- notionary/__init__.py +21 -6
- notionary/{core/converters → converters}/elements/audio_element.py +7 -5
- notionary/{core/converters → converters}/elements/bookmark_element.py +1 -1
- notionary/{core/converters → converters}/elements/callout_element.py +2 -2
- notionary/{core/converters → converters}/elements/code_block_element.py +1 -1
- notionary/{core/converters → converters}/elements/column_element.py +1 -1
- notionary/{core/converters → converters}/elements/divider_element.py +1 -1
- notionary/{core/converters → converters}/elements/embed_element.py +3 -5
- notionary/{core/converters → converters}/elements/heading_element.py +2 -2
- notionary/{core/converters → converters}/elements/image_element.py +1 -1
- notionary/{core/converters → converters}/elements/list_element.py +2 -2
- notionary/{core/converters → converters}/elements/paragraph_element.py +2 -2
- notionary/{core/converters → converters}/elements/qoute_element.py +1 -1
- notionary/{core/converters → converters}/elements/table_element.py +2 -2
- notionary/{core/converters → converters}/elements/todo_lists.py +2 -2
- notionary/{core/converters → converters}/elements/toggle_element.py +24 -21
- notionary/{core/converters → converters}/elements/video_element.py +1 -1
- notionary/{core/converters → converters}/markdown_to_notion_converter.py +72 -111
- notionary/{core/converters → converters}/notion_to_markdown_converter.py +2 -2
- notionary/{core/converters → converters}/registry/block_element_registry.py +5 -5
- notionary/{core/converters → converters}/registry/block_element_registry_builder.py +18 -18
- notionary/database/database_discovery.py +142 -0
- notionary/{core/database → database}/database_info_service.py +1 -1
- notionary/{core/database/notion_database_manager.py → database/notion_database.py} +33 -57
- notionary/{core/database/notion_database_manager_factory.py → database/notion_database_factory.py} +18 -16
- notionary/{core/notion_client.py → notion_client.py} +4 -2
- notionary/page/content/notion_page_content_chunker.py +84 -0
- notionary/{core/page → page}/content/page_content_manager.py +29 -13
- notionary/{core/page → page}/metadata/metadata_editor.py +59 -46
- notionary/{core/page → page}/metadata/notion_icon_manager.py +10 -12
- notionary/{core/page → page}/metadata/notion_page_cover_manager.py +16 -21
- notionary/page/notion_page.py +504 -0
- notionary/page/notion_page_factory.py +256 -0
- notionary/{core/page → page}/properites/database_property_service.py +115 -99
- notionary/{core/page → page}/properites/page_property_manager.py +81 -52
- notionary/{core/page → page}/properites/property_formatter.py +1 -1
- notionary/{core/page → page}/properites/property_operation_result.py +43 -30
- notionary/{core/page → page}/properites/property_value_extractor.py +26 -8
- notionary/{core/page → page}/relations/notion_page_relation_manager.py +72 -53
- notionary/{core/page → page}/relations/notion_page_title_resolver.py +12 -12
- notionary/{core/page → page}/relations/page_database_relation.py +15 -15
- notionary/{core/page → page}/relations/relation_operation_result.py +50 -41
- notionary/util/page_id_utils.py +14 -8
- {notionary-0.1.11.dist-info → notionary-0.1.13.dist-info}/METADATA +1 -1
- notionary-0.1.13.dist-info/RECORD +56 -0
- notionary/core/database/notion_database_schema.py +0 -104
- notionary/core/page/notion_page_manager.py +0 -322
- notionary-0.1.11.dist-info/RECORD +0 -54
- /notionary/{core/converters → converters}/__init__.py +0 -0
- /notionary/{core/converters → converters}/elements/notion_block_element.py +0 -0
- /notionary/{core/converters → converters}/elements/text_inline_formatter.py +0 -0
- /notionary/{core/database → database}/models/page_result.py +0 -0
- {notionary-0.1.11.dist-info → notionary-0.1.13.dist-info}/WHEEL +0 -0
- {notionary-0.1.11.dist-info → notionary-0.1.13.dist-info}/licenses/LICENSE +0 -0
- {notionary-0.1.11.dist-info → notionary-0.1.13.dist-info}/top_level.txt +0 -0
@@ -1,30 +1,30 @@
|
|
1
1
|
from typing import List, Type
|
2
2
|
from collections import OrderedDict
|
3
3
|
|
4
|
-
from notionary.
|
5
|
-
from notionary.
|
6
|
-
from notionary.
|
7
|
-
from notionary.
|
4
|
+
from notionary.converters.elements.audio_element import AudioElement
|
5
|
+
from notionary.converters.elements.embed_element import EmbedElement
|
6
|
+
from notionary.converters.elements.notion_block_element import NotionBlockElement
|
7
|
+
from notionary.converters.registry.block_element_registry import (
|
8
8
|
BlockElementRegistry,
|
9
9
|
)
|
10
10
|
|
11
|
-
from notionary.
|
12
|
-
from notionary.
|
13
|
-
from notionary.
|
14
|
-
from notionary.
|
15
|
-
from notionary.
|
16
|
-
from notionary.
|
17
|
-
from notionary.
|
18
|
-
from notionary.
|
11
|
+
from notionary.converters.elements.paragraph_element import ParagraphElement
|
12
|
+
from notionary.converters.elements.heading_element import HeadingElement
|
13
|
+
from notionary.converters.elements.callout_element import CalloutElement
|
14
|
+
from notionary.converters.elements.code_block_element import CodeBlockElement
|
15
|
+
from notionary.converters.elements.divider_element import DividerElement
|
16
|
+
from notionary.converters.elements.table_element import TableElement
|
17
|
+
from notionary.converters.elements.todo_lists import TodoElement
|
18
|
+
from notionary.converters.elements.list_element import (
|
19
19
|
BulletedListElement,
|
20
20
|
NumberedListElement,
|
21
21
|
)
|
22
|
-
from notionary.
|
23
|
-
from notionary.
|
24
|
-
from notionary.
|
25
|
-
from notionary.
|
26
|
-
from notionary.
|
27
|
-
from notionary.
|
22
|
+
from notionary.converters.elements.qoute_element import QuoteElement
|
23
|
+
from notionary.converters.elements.image_element import ImageElement
|
24
|
+
from notionary.converters.elements.video_element import VideoElement
|
25
|
+
from notionary.converters.elements.toggle_element import ToggleElement
|
26
|
+
from notionary.converters.elements.bookmark_element import BookmarkElement
|
27
|
+
from notionary.converters.elements.column_element import ColumnElement
|
28
28
|
|
29
29
|
|
30
30
|
class BlockElementRegistryBuilder:
|
@@ -0,0 +1,142 @@
|
|
1
|
+
from typing import (
|
2
|
+
AsyncGenerator,
|
3
|
+
Dict,
|
4
|
+
List,
|
5
|
+
Optional,
|
6
|
+
Any,
|
7
|
+
Tuple,
|
8
|
+
)
|
9
|
+
from notionary.notion_client import NotionClient
|
10
|
+
from notionary.util.logging_mixin import LoggingMixin
|
11
|
+
|
12
|
+
|
13
|
+
class DatabaseDiscovery(LoggingMixin):
|
14
|
+
"""
|
15
|
+
A utility class that discovers Notion databases accessible to your integration.
|
16
|
+
Focused on efficiently retrieving essential database information.
|
17
|
+
"""
|
18
|
+
|
19
|
+
def __init__(self, client: Optional[NotionClient] = None) -> None:
|
20
|
+
"""
|
21
|
+
Initialize the database discovery with a NotionClient.
|
22
|
+
|
23
|
+
Args:
|
24
|
+
client: NotionClient instance for API communication
|
25
|
+
"""
|
26
|
+
self._client = client if client else NotionClient()
|
27
|
+
self.logger.info("DatabaseDiscovery initialized")
|
28
|
+
|
29
|
+
async def discover(self, page_size: int = 100) -> List[Tuple[str, str]]:
|
30
|
+
"""
|
31
|
+
Discover all accessible databases and return their titles and IDs.
|
32
|
+
|
33
|
+
Args:
|
34
|
+
page_size: The number of databases to fetch per request
|
35
|
+
|
36
|
+
Returns:
|
37
|
+
List of tuples containing (database_title, database_id)
|
38
|
+
"""
|
39
|
+
databases = []
|
40
|
+
|
41
|
+
async for database in self._iter_databases(page_size):
|
42
|
+
db_id = database.get("id")
|
43
|
+
if not db_id:
|
44
|
+
continue
|
45
|
+
|
46
|
+
title = self._extract_database_title(database)
|
47
|
+
databases.append((title, db_id))
|
48
|
+
|
49
|
+
return databases
|
50
|
+
|
51
|
+
async def discover_and_print(self, page_size: int = 100) -> List[Tuple[str, str]]:
|
52
|
+
"""
|
53
|
+
Discover databases and print the results in a nicely formatted way.
|
54
|
+
|
55
|
+
This is a convenience method that discovers databases and handles
|
56
|
+
the formatting and printing of results.
|
57
|
+
|
58
|
+
Args:
|
59
|
+
page_size: The number of databases to fetch per request
|
60
|
+
|
61
|
+
Returns:
|
62
|
+
The same list of databases as discover() for further processing
|
63
|
+
"""
|
64
|
+
databases = await self.discover(page_size)
|
65
|
+
|
66
|
+
if not databases:
|
67
|
+
print("\n⚠️ No databases found!")
|
68
|
+
print("Please ensure your Notion integration has access to databases.")
|
69
|
+
print(
|
70
|
+
"You need to share the databases with your integration in Notion settings."
|
71
|
+
)
|
72
|
+
return databases
|
73
|
+
|
74
|
+
print(f"✅ Found {len(databases)} databases:")
|
75
|
+
|
76
|
+
for i, (title, db_id) in enumerate(databases, 1):
|
77
|
+
print(f"{i}. {title} (ID: {db_id})")
|
78
|
+
|
79
|
+
return databases
|
80
|
+
|
81
|
+
async def _iter_databases(
|
82
|
+
self, page_size: int = 100
|
83
|
+
) -> AsyncGenerator[Dict[str, Any], None]:
|
84
|
+
"""
|
85
|
+
Asynchronous generator that yields Notion databases one by one.
|
86
|
+
|
87
|
+
Uses the Notion API to provide paginated access to all databases
|
88
|
+
without loading all of them into memory at once.
|
89
|
+
|
90
|
+
Args:
|
91
|
+
page_size: The number of databases to fetch per request
|
92
|
+
|
93
|
+
Yields:
|
94
|
+
Individual database objects from the Notion API
|
95
|
+
"""
|
96
|
+
start_cursor: Optional[str] = None
|
97
|
+
|
98
|
+
while True:
|
99
|
+
body: Dict[str, Any] = {
|
100
|
+
"filter": {"value": "database", "property": "object"},
|
101
|
+
"page_size": page_size,
|
102
|
+
}
|
103
|
+
|
104
|
+
if start_cursor:
|
105
|
+
body["start_cursor"] = start_cursor
|
106
|
+
|
107
|
+
result = await self._client.post("search", data=body)
|
108
|
+
|
109
|
+
if not result or "results" not in result:
|
110
|
+
self.logger.error("Error fetching databases")
|
111
|
+
return
|
112
|
+
|
113
|
+
for database in result["results"]:
|
114
|
+
yield database
|
115
|
+
|
116
|
+
if not result.get("has_more") or not result.get("next_cursor"):
|
117
|
+
return
|
118
|
+
|
119
|
+
start_cursor = result["next_cursor"]
|
120
|
+
|
121
|
+
def _extract_database_title(self, database: Dict[str, Any]) -> str:
|
122
|
+
"""
|
123
|
+
Extract the database title from a Notion API response.
|
124
|
+
|
125
|
+
Args:
|
126
|
+
database: The database object from the Notion API
|
127
|
+
|
128
|
+
Returns:
|
129
|
+
The extracted title or "Untitled" if no title is found
|
130
|
+
"""
|
131
|
+
if "title" not in database:
|
132
|
+
return "Untitled"
|
133
|
+
|
134
|
+
title_parts = []
|
135
|
+
for text_obj in database["title"]:
|
136
|
+
if "plain_text" in text_obj:
|
137
|
+
title_parts.append(text_obj["plain_text"])
|
138
|
+
|
139
|
+
if not title_parts:
|
140
|
+
return "Untitled"
|
141
|
+
|
142
|
+
return "".join(title_parts)
|
@@ -1,15 +1,15 @@
|
|
1
1
|
from typing import Any, AsyncGenerator, Dict, List, Optional
|
2
2
|
|
3
|
-
from notionary.
|
4
|
-
from notionary.
|
3
|
+
from notionary.notion_client import NotionClient
|
4
|
+
from notionary.page.notion_page import NotionPage
|
5
5
|
from notionary.util.logging_mixin import LoggingMixin
|
6
6
|
from notionary.util.page_id_utils import format_uuid
|
7
7
|
|
8
8
|
|
9
|
-
class
|
9
|
+
class NotionDatabase(LoggingMixin):
|
10
10
|
"""
|
11
11
|
Minimal manager for Notion databases.
|
12
|
-
Focused exclusively on creating basic pages and retrieving page managers
|
12
|
+
Focused exclusively on creating basic pages and retrieving page managers
|
13
13
|
for further page operations.
|
14
14
|
"""
|
15
15
|
|
@@ -24,59 +24,31 @@ class NotionDatabaseManager(LoggingMixin):
|
|
24
24
|
self.database_id = format_uuid(database_id) or database_id
|
25
25
|
self._client = NotionClient(token=token)
|
26
26
|
|
27
|
-
|
28
|
-
async def create_blank_page(self) -> Optional[str]:
|
27
|
+
async def create_blank_page(self) -> Optional[NotionPage]:
|
29
28
|
"""
|
30
29
|
Create a new blank page in the database with minimal properties.
|
31
|
-
|
30
|
+
|
32
31
|
Returns:
|
33
|
-
|
32
|
+
NotionPage for the created page, or None if creation failed
|
34
33
|
"""
|
35
34
|
try:
|
36
35
|
response = await self._client.post(
|
37
|
-
"pages",
|
38
|
-
{
|
39
|
-
"parent": {"database_id": self.database_id},
|
40
|
-
"properties": {}
|
41
|
-
}
|
36
|
+
"pages", {"parent": {"database_id": self.database_id}, "properties": {}}
|
42
37
|
)
|
43
|
-
|
38
|
+
|
44
39
|
if response and "id" in response:
|
45
40
|
page_id = response["id"]
|
46
|
-
self.logger.info(
|
47
|
-
|
48
|
-
|
49
|
-
self.logger.warning("Page creation failed: invalid response")
|
50
|
-
return None
|
51
|
-
|
52
|
-
except Exception as e:
|
53
|
-
self.logger.error("Error creating blank page: %s", str(e))
|
54
|
-
return None
|
41
|
+
self.logger.info(
|
42
|
+
"Created blank page %s in database %s", page_id, self.database_id
|
43
|
+
)
|
55
44
|
|
56
|
-
|
57
|
-
"""
|
58
|
-
Get a NotionPageManager for a specific page.
|
45
|
+
return NotionPage(page_id=page_id)
|
59
46
|
|
60
|
-
|
61
|
-
|
47
|
+
self.logger.warning("Page creation failed: invalid response")
|
48
|
+
return None
|
62
49
|
|
63
|
-
Returns:
|
64
|
-
NotionPageManager instance or None if the page wasn't found
|
65
|
-
"""
|
66
|
-
self.logger.debug("Getting page manager for page %s", page_id)
|
67
|
-
|
68
|
-
try:
|
69
|
-
# Check if the page exists
|
70
|
-
page_data = await self._client.get_page(page_id)
|
71
|
-
|
72
|
-
if not page_data:
|
73
|
-
self.logger.error("Page %s not found", page_id)
|
74
|
-
return None
|
75
|
-
|
76
|
-
return NotionPageManager(page_id=page_id)
|
77
|
-
|
78
50
|
except Exception as e:
|
79
|
-
self.logger.error("Error
|
51
|
+
self.logger.error("Error creating blank page: %s", str(e))
|
80
52
|
return None
|
81
53
|
|
82
54
|
async def get_pages(
|
@@ -84,7 +56,7 @@ class NotionDatabaseManager(LoggingMixin):
|
|
84
56
|
limit: int = 100,
|
85
57
|
filter_conditions: Optional[Dict[str, Any]] = None,
|
86
58
|
sorts: Optional[List[Dict[str, Any]]] = None,
|
87
|
-
) -> List[
|
59
|
+
) -> List[NotionPage]:
|
88
60
|
"""
|
89
61
|
Get all pages from the database.
|
90
62
|
|
@@ -94,7 +66,7 @@ class NotionDatabaseManager(LoggingMixin):
|
|
94
66
|
sorts: Optional sort instructions for the database query
|
95
67
|
|
96
68
|
Returns:
|
97
|
-
List of
|
69
|
+
List of NotionPage instances for each page
|
98
70
|
"""
|
99
71
|
self.logger.debug(
|
100
72
|
"Getting up to %d pages with filter: %s, sorts: %s",
|
@@ -103,7 +75,7 @@ class NotionDatabaseManager(LoggingMixin):
|
|
103
75
|
sorts,
|
104
76
|
)
|
105
77
|
|
106
|
-
pages: List[
|
78
|
+
pages: List[NotionPage] = []
|
107
79
|
count = 0
|
108
80
|
|
109
81
|
async for page in self.iter_pages(
|
@@ -127,7 +99,7 @@ class NotionDatabaseManager(LoggingMixin):
|
|
127
99
|
page_size: int = 100,
|
128
100
|
filter_conditions: Optional[Dict[str, Any]] = None,
|
129
101
|
sorts: Optional[List[Dict[str, Any]]] = None,
|
130
|
-
) -> AsyncGenerator[
|
102
|
+
) -> AsyncGenerator[NotionPage, None]:
|
131
103
|
"""
|
132
104
|
Asynchronous generator that yields pages from the database.
|
133
105
|
Directly queries the Notion API without using the schema.
|
@@ -138,7 +110,7 @@ class NotionDatabaseManager(LoggingMixin):
|
|
138
110
|
sorts: Optional sort instructions for the database query
|
139
111
|
|
140
112
|
Yields:
|
141
|
-
|
113
|
+
NotionPage instances for each page
|
142
114
|
"""
|
143
115
|
self.logger.debug(
|
144
116
|
"Iterating pages with page_size: %d, filter: %s, sorts: %s",
|
@@ -174,10 +146,12 @@ class NotionDatabaseManager(LoggingMixin):
|
|
174
146
|
for page in result["results"]:
|
175
147
|
page_id: str = page.get("id", "")
|
176
148
|
title = self._extract_page_title(page)
|
177
|
-
|
149
|
+
|
178
150
|
page_url = f"https://notion.so/{page_id.replace('-', '')}"
|
179
151
|
|
180
|
-
notion_page_manager =
|
152
|
+
notion_page_manager = NotionPage(
|
153
|
+
page_id=page_id, title=title, url=page_url
|
154
|
+
)
|
181
155
|
yield notion_page_manager
|
182
156
|
|
183
157
|
# Update pagination parameters
|
@@ -222,10 +196,10 @@ class NotionDatabaseManager(LoggingMixin):
|
|
222
196
|
"""
|
223
197
|
try:
|
224
198
|
formatted_page_id = format_uuid(page_id) or page_id
|
225
|
-
|
199
|
+
|
226
200
|
# Archive the page (Notion's way of deleting)
|
227
201
|
data = {"archived": True}
|
228
|
-
|
202
|
+
|
229
203
|
result = await self._client.patch(f"pages/{formatted_page_id}", data)
|
230
204
|
if not result:
|
231
205
|
self.logger.error("Error deleting page %s", formatted_page_id)
|
@@ -233,14 +207,16 @@ class NotionDatabaseManager(LoggingMixin):
|
|
233
207
|
"success": False,
|
234
208
|
"message": f"Failed to delete page {formatted_page_id}",
|
235
209
|
}
|
236
|
-
|
237
|
-
self.logger.info(
|
210
|
+
|
211
|
+
self.logger.info(
|
212
|
+
"Page %s successfully deleted (archived)", formatted_page_id
|
213
|
+
)
|
238
214
|
return {"success": True, "page_id": formatted_page_id}
|
239
|
-
|
215
|
+
|
240
216
|
except Exception as e:
|
241
217
|
self.logger.error("Error in delete_page: %s", str(e))
|
242
218
|
return {"success": False, "message": f"Error: {str(e)}"}
|
243
219
|
|
244
220
|
async def close(self) -> None:
|
245
221
|
"""Close the client connection."""
|
246
|
-
await self._client.close()
|
222
|
+
await self._client.close()
|
notionary/{core/database/notion_database_manager_factory.py → database/notion_database_factory.py}
RENAMED
@@ -2,8 +2,8 @@ import logging
|
|
2
2
|
from typing import List, Optional, Dict, Any
|
3
3
|
from difflib import SequenceMatcher
|
4
4
|
|
5
|
-
from notionary.
|
6
|
-
from notionary.
|
5
|
+
from notionary.database.notion_database import NotionDatabase
|
6
|
+
from notionary.notion_client import NotionClient
|
7
7
|
from notionary.exceptions.database_exceptions import (
|
8
8
|
DatabaseConnectionError,
|
9
9
|
DatabaseInitializationError,
|
@@ -29,7 +29,7 @@ class NotionDatabaseFactory(LoggingMixin):
|
|
29
29
|
@classmethod
|
30
30
|
async def from_database_id(
|
31
31
|
cls, database_id: str, token: Optional[str] = None
|
32
|
-
) ->
|
32
|
+
) -> NotionDatabase:
|
33
33
|
"""
|
34
34
|
Create a NotionDatabaseManager from a database ID.
|
35
35
|
|
@@ -44,18 +44,17 @@ class NotionDatabaseFactory(LoggingMixin):
|
|
44
44
|
|
45
45
|
try:
|
46
46
|
formatted_id = format_uuid(database_id) or database_id
|
47
|
-
|
48
|
-
manager = NotionDatabaseManager(formatted_id, token)
|
49
|
-
|
50
47
|
|
51
|
-
|
48
|
+
manager = NotionDatabase(formatted_id, token)
|
49
|
+
|
50
|
+
logger.info(
|
51
|
+
"Successfully created database manager for ID: %s", formatted_id
|
52
|
+
)
|
52
53
|
return manager
|
53
54
|
|
54
55
|
except DatabaseInitializationError:
|
55
|
-
# Re-raise the already typed exception
|
56
56
|
raise
|
57
57
|
except NotionDatabaseException:
|
58
|
-
# Re-raise other custom exceptions
|
59
58
|
raise
|
60
59
|
except Exception as e:
|
61
60
|
error_msg = f"Error connecting to database {database_id}: {str(e)}"
|
@@ -65,7 +64,7 @@ class NotionDatabaseFactory(LoggingMixin):
|
|
65
64
|
@classmethod
|
66
65
|
async def from_database_name(
|
67
66
|
cls, database_name: str, token: Optional[str] = None
|
68
|
-
) ->
|
67
|
+
) -> NotionDatabase:
|
69
68
|
"""
|
70
69
|
Create a NotionDatabaseManager by finding a database with a matching name.
|
71
70
|
Uses fuzzy matching to find the closest match to the given name.
|
@@ -85,13 +84,11 @@ class NotionDatabaseFactory(LoggingMixin):
|
|
85
84
|
try:
|
86
85
|
logger.debug("Using search endpoint to find databases")
|
87
86
|
|
88
|
-
# Create search query for databases
|
89
87
|
search_payload = {
|
90
88
|
"filter": {"property": "object", "value": "database"},
|
91
89
|
"page_size": 100,
|
92
90
|
}
|
93
91
|
|
94
|
-
# Perform search
|
95
92
|
response = await client.post("search", search_payload)
|
96
93
|
|
97
94
|
if not response or "results" not in response:
|
@@ -136,11 +133,16 @@ class NotionDatabaseFactory(LoggingMixin):
|
|
136
133
|
|
137
134
|
matched_name = cls._extract_title_from_database(best_match)
|
138
135
|
|
139
|
-
logger.info(
|
136
|
+
logger.info(
|
137
|
+
"Found matching database: '%s' (ID: %s) with score: %.2f",
|
138
|
+
matched_name,
|
139
|
+
database_id,
|
140
|
+
best_score,
|
141
|
+
)
|
140
142
|
|
141
|
-
manager =
|
143
|
+
manager = NotionDatabase(database_id, token)
|
142
144
|
|
143
|
-
logger.info(
|
145
|
+
logger.info("Successfully created database manager for '%s'", matched_name)
|
144
146
|
await client.close()
|
145
147
|
return manager
|
146
148
|
|
@@ -187,4 +189,4 @@ class NotionDatabaseFactory(LoggingMixin):
|
|
187
189
|
if "plain_text" in text_obj:
|
188
190
|
text_parts.append(text_obj["plain_text"])
|
189
191
|
|
190
|
-
return "".join(text_parts)
|
192
|
+
return "".join(text_parts)
|
@@ -7,6 +7,7 @@ import httpx
|
|
7
7
|
from dotenv import load_dotenv
|
8
8
|
from notionary.util.logging_mixin import LoggingMixin
|
9
9
|
|
10
|
+
|
10
11
|
class HttpMethod(Enum):
|
11
12
|
"""Enum für HTTP-Methoden."""
|
12
13
|
|
@@ -15,6 +16,7 @@ class HttpMethod(Enum):
|
|
15
16
|
PATCH = "patch"
|
16
17
|
DELETE = "delete"
|
17
18
|
|
19
|
+
|
18
20
|
class NotionClient(LoggingMixin):
|
19
21
|
"""Verbesserter Notion-Client mit automatischer Ressourcenverwaltung."""
|
20
22
|
|
@@ -50,7 +52,7 @@ class NotionClient(LoggingMixin):
|
|
50
52
|
|
51
53
|
async def get(self, endpoint: str) -> Optional[Dict[str, Any]]:
|
52
54
|
return await self._make_request(HttpMethod.GET, endpoint)
|
53
|
-
|
55
|
+
|
54
56
|
async def get_page(self, page_id: str) -> Optional[Dict[str, Any]]:
|
55
57
|
return await self.get(f"pages/{page_id}")
|
56
58
|
|
@@ -126,4 +128,4 @@ class NotionClient(LoggingMixin):
|
|
126
128
|
loop.create_task(self.close())
|
127
129
|
self.logger.debug("Created cleanup task for NotionClient")
|
128
130
|
except RuntimeError:
|
129
|
-
self.logger.warning("No event loop available for auto-closing NotionClient")
|
131
|
+
self.logger.warning("No event loop available for auto-closing NotionClient")
|
@@ -0,0 +1,84 @@
|
|
1
|
+
import re
|
2
|
+
from typing import Any, Dict, List
|
3
|
+
from notionary.util.logging_mixin import LoggingMixin
|
4
|
+
|
5
|
+
|
6
|
+
class NotionPageContentChunker(LoggingMixin):
|
7
|
+
"""
|
8
|
+
Handles markdown text processing to comply with Notion API length limitations.
|
9
|
+
|
10
|
+
This class specifically addresses the Notion API constraint that limits
|
11
|
+
rich_text elements to a maximum of 2000 characters. This particularly affects
|
12
|
+
paragraph blocks within toggle blocks or other nested structures.
|
13
|
+
|
14
|
+
Resolves the following typical API error:
|
15
|
+
"validation_error - body.children[79].toggle.children[2].paragraph.rich_text[0].text.content.length
|
16
|
+
should be ≤ 2000, instead was 2162."
|
17
|
+
|
18
|
+
The class provides methods for:
|
19
|
+
1. Automatically truncating text that exceeds the limit
|
20
|
+
2. Splitting markdown into smaller units for separate API requests
|
21
|
+
"""
|
22
|
+
|
23
|
+
def __init__(self, max_text_length: int = 1900):
|
24
|
+
self.max_text_length = max_text_length
|
25
|
+
|
26
|
+
def fix_blocks_content_length(
|
27
|
+
self, blocks: List[Dict[str, Any]]
|
28
|
+
) -> List[Dict[str, Any]]:
|
29
|
+
"""Check each block and ensure text content doesn't exceed Notion's limit."""
|
30
|
+
return [self._fix_single_block_content(block) for block in blocks]
|
31
|
+
|
32
|
+
def _fix_single_block_content(self, block: Dict[str, Any]) -> Dict[str, Any]:
|
33
|
+
"""Fix content length in a single block and its children recursively."""
|
34
|
+
block_copy = block.copy()
|
35
|
+
|
36
|
+
block_type = block.get("type")
|
37
|
+
if not block_type:
|
38
|
+
return block_copy
|
39
|
+
|
40
|
+
content = block.get(block_type)
|
41
|
+
if not content:
|
42
|
+
return block_copy
|
43
|
+
|
44
|
+
if "rich_text" in content:
|
45
|
+
self._fix_rich_text_content(block_copy, block_type, content)
|
46
|
+
|
47
|
+
if "children" in content and content["children"]:
|
48
|
+
block_copy[block_type]["children"] = [
|
49
|
+
self._fix_single_block_content(child) for child in content["children"]
|
50
|
+
]
|
51
|
+
|
52
|
+
return block_copy
|
53
|
+
|
54
|
+
def _fix_rich_text_content(
|
55
|
+
self, block_copy: Dict[str, Any], block_type: str, content: Dict[str, Any]
|
56
|
+
) -> None:
|
57
|
+
"""Fix rich text content that exceeds the length limit."""
|
58
|
+
rich_text = content["rich_text"]
|
59
|
+
for i, text_item in enumerate(rich_text):
|
60
|
+
if "text" not in text_item or "content" not in text_item["text"]:
|
61
|
+
continue
|
62
|
+
|
63
|
+
text_content = text_item["text"]["content"]
|
64
|
+
if len(text_content) <= self.max_text_length:
|
65
|
+
continue
|
66
|
+
|
67
|
+
self.logger.warning(
|
68
|
+
"Truncating text content from %d to %d chars",
|
69
|
+
len(text_content),
|
70
|
+
self.max_text_length,
|
71
|
+
)
|
72
|
+
block_copy[block_type]["rich_text"][i]["text"]["content"] = text_content[
|
73
|
+
: self.max_text_length
|
74
|
+
]
|
75
|
+
|
76
|
+
def split_to_paragraphs(self, markdown_text: str) -> List[str]:
|
77
|
+
"""Split markdown into paragraphs."""
|
78
|
+
paragraphs = re.split(r"\n\s*\n", markdown_text)
|
79
|
+
return [p for p in paragraphs if p.strip()]
|
80
|
+
|
81
|
+
def split_to_sentences(self, paragraph: str) -> List[str]:
|
82
|
+
"""Split a paragraph into sentences."""
|
83
|
+
sentences = re.split(r"(?<=[.!?])\s+", paragraph)
|
84
|
+
return [s for s in sentences if s.strip()]
|
@@ -1,15 +1,17 @@
|
|
1
1
|
from typing import Any, Dict, List, Optional
|
2
2
|
|
3
|
-
from notionary.
|
3
|
+
from notionary.notion_client import NotionClient
|
4
|
+
from notionary.converters.registry.block_element_registry import BlockElementRegistry
|
5
|
+
|
6
|
+
from notionary.converters.markdown_to_notion_converter import (
|
4
7
|
MarkdownToNotionConverter,
|
5
8
|
)
|
6
|
-
from notionary.
|
9
|
+
from notionary.converters.notion_to_markdown_converter import (
|
7
10
|
NotionToMarkdownConverter,
|
8
11
|
)
|
9
|
-
from notionary.
|
10
|
-
|
12
|
+
from notionary.page.content.notion_page_content_chunker import (
|
13
|
+
NotionPageContentChunker,
|
11
14
|
)
|
12
|
-
from notionary.core.notion_client import NotionClient
|
13
15
|
from notionary.util.logging_mixin import LoggingMixin
|
14
16
|
|
15
17
|
|
@@ -28,15 +30,29 @@ class PageContentManager(LoggingMixin):
|
|
28
30
|
self._notion_to_markdown_converter = NotionToMarkdownConverter(
|
29
31
|
block_registry=block_registry
|
30
32
|
)
|
33
|
+
self._chunker = NotionPageContentChunker()
|
31
34
|
|
32
35
|
async def append_markdown(self, markdown_text: str) -> str:
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
36
|
+
"""
|
37
|
+
Append markdown text to a Notion page, automatically handling content length limits.
|
38
|
+
"""
|
39
|
+
try:
|
40
|
+
blocks = self._markdown_to_notion_converter.convert(markdown_text)
|
41
|
+
|
42
|
+
# Fix any blocks that exceed Notion's content length limits
|
43
|
+
fixed_blocks = self._chunker.fix_blocks_content_length(blocks)
|
44
|
+
|
45
|
+
result = await self._client.patch(
|
46
|
+
f"blocks/{self.page_id}/children", {"children": fixed_blocks}
|
47
|
+
)
|
48
|
+
return (
|
49
|
+
"Successfully added text to the page."
|
50
|
+
if result
|
51
|
+
else "Failed to add text."
|
52
|
+
)
|
53
|
+
except Exception as e:
|
54
|
+
self.logger.error("Error appending markdown: %s", str(e))
|
55
|
+
raise
|
40
56
|
|
41
57
|
async def clear(self) -> str:
|
42
58
|
blocks = await self._client.get(f"blocks/{self.page_id}/children")
|
@@ -53,7 +69,7 @@ class PageContentManager(LoggingMixin):
|
|
53
69
|
if block.get("type") in ["child_database", "database", "linked_database"]:
|
54
70
|
skipped += 1
|
55
71
|
continue
|
56
|
-
|
72
|
+
|
57
73
|
if await self._client.delete(f"blocks/{block['id']}"):
|
58
74
|
deleted += 1
|
59
75
|
|