notionary 0.1.11__py3-none-any.whl → 0.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. notionary/__init__.py +21 -6
  2. notionary/{core/converters → converters}/elements/audio_element.py +7 -5
  3. notionary/{core/converters → converters}/elements/bookmark_element.py +1 -1
  4. notionary/{core/converters → converters}/elements/callout_element.py +2 -2
  5. notionary/{core/converters → converters}/elements/code_block_element.py +1 -1
  6. notionary/{core/converters → converters}/elements/column_element.py +1 -1
  7. notionary/{core/converters → converters}/elements/divider_element.py +1 -1
  8. notionary/{core/converters → converters}/elements/embed_element.py +3 -5
  9. notionary/{core/converters → converters}/elements/heading_element.py +2 -2
  10. notionary/{core/converters → converters}/elements/image_element.py +1 -1
  11. notionary/{core/converters → converters}/elements/list_element.py +2 -2
  12. notionary/{core/converters → converters}/elements/paragraph_element.py +2 -2
  13. notionary/{core/converters → converters}/elements/qoute_element.py +1 -1
  14. notionary/{core/converters → converters}/elements/table_element.py +2 -2
  15. notionary/{core/converters → converters}/elements/todo_lists.py +2 -2
  16. notionary/{core/converters → converters}/elements/toggle_element.py +24 -21
  17. notionary/{core/converters → converters}/elements/video_element.py +1 -1
  18. notionary/{core/converters → converters}/markdown_to_notion_converter.py +72 -111
  19. notionary/{core/converters → converters}/notion_to_markdown_converter.py +2 -2
  20. notionary/{core/converters → converters}/registry/block_element_registry.py +5 -5
  21. notionary/{core/converters → converters}/registry/block_element_registry_builder.py +18 -18
  22. notionary/database/database_discovery.py +142 -0
  23. notionary/{core/database → database}/database_info_service.py +1 -1
  24. notionary/{core/database/notion_database_manager.py → database/notion_database.py} +33 -57
  25. notionary/{core/database/notion_database_manager_factory.py → database/notion_database_factory.py} +18 -16
  26. notionary/{core/notion_client.py → notion_client.py} +4 -2
  27. notionary/page/content/notion_page_content_chunker.py +84 -0
  28. notionary/{core/page → page}/content/page_content_manager.py +29 -13
  29. notionary/{core/page → page}/metadata/metadata_editor.py +59 -46
  30. notionary/{core/page → page}/metadata/notion_icon_manager.py +10 -12
  31. notionary/{core/page → page}/metadata/notion_page_cover_manager.py +16 -21
  32. notionary/page/notion_page.py +504 -0
  33. notionary/page/notion_page_factory.py +256 -0
  34. notionary/{core/page → page}/properites/database_property_service.py +115 -99
  35. notionary/{core/page → page}/properites/page_property_manager.py +81 -52
  36. notionary/{core/page → page}/properites/property_formatter.py +1 -1
  37. notionary/{core/page → page}/properites/property_operation_result.py +43 -30
  38. notionary/{core/page → page}/properites/property_value_extractor.py +26 -8
  39. notionary/{core/page → page}/relations/notion_page_relation_manager.py +72 -53
  40. notionary/{core/page → page}/relations/notion_page_title_resolver.py +12 -12
  41. notionary/{core/page → page}/relations/page_database_relation.py +15 -15
  42. notionary/{core/page → page}/relations/relation_operation_result.py +50 -41
  43. notionary/util/page_id_utils.py +14 -8
  44. {notionary-0.1.11.dist-info → notionary-0.1.13.dist-info}/METADATA +1 -1
  45. notionary-0.1.13.dist-info/RECORD +56 -0
  46. notionary/core/database/notion_database_schema.py +0 -104
  47. notionary/core/page/notion_page_manager.py +0 -322
  48. notionary-0.1.11.dist-info/RECORD +0 -54
  49. /notionary/{core/converters → converters}/__init__.py +0 -0
  50. /notionary/{core/converters → converters}/elements/notion_block_element.py +0 -0
  51. /notionary/{core/converters → converters}/elements/text_inline_formatter.py +0 -0
  52. /notionary/{core/database → database}/models/page_result.py +0 -0
  53. {notionary-0.1.11.dist-info → notionary-0.1.13.dist-info}/WHEEL +0 -0
  54. {notionary-0.1.11.dist-info → notionary-0.1.13.dist-info}/licenses/LICENSE +0 -0
  55. {notionary-0.1.11.dist-info → notionary-0.1.13.dist-info}/top_level.txt +0 -0
@@ -1,30 +1,30 @@
1
1
  from typing import List, Type
2
2
  from collections import OrderedDict
3
3
 
4
- from notionary.core.converters.elements.audio_element import AudioElement
5
- from notionary.core.converters.elements.embed_element import EmbedElement
6
- from notionary.core.converters.elements.notion_block_element import NotionBlockElement
7
- from notionary.core.converters.registry.block_element_registry import (
4
+ from notionary.converters.elements.audio_element import AudioElement
5
+ from notionary.converters.elements.embed_element import EmbedElement
6
+ from notionary.converters.elements.notion_block_element import NotionBlockElement
7
+ from notionary.converters.registry.block_element_registry import (
8
8
  BlockElementRegistry,
9
9
  )
10
10
 
11
- from notionary.core.converters.elements.paragraph_element import ParagraphElement
12
- from notionary.core.converters.elements.heading_element import HeadingElement
13
- from notionary.core.converters.elements.callout_element import CalloutElement
14
- from notionary.core.converters.elements.code_block_element import CodeBlockElement
15
- from notionary.core.converters.elements.divider_element import DividerElement
16
- from notionary.core.converters.elements.table_element import TableElement
17
- from notionary.core.converters.elements.todo_lists import TodoElement
18
- from notionary.core.converters.elements.list_element import (
11
+ from notionary.converters.elements.paragraph_element import ParagraphElement
12
+ from notionary.converters.elements.heading_element import HeadingElement
13
+ from notionary.converters.elements.callout_element import CalloutElement
14
+ from notionary.converters.elements.code_block_element import CodeBlockElement
15
+ from notionary.converters.elements.divider_element import DividerElement
16
+ from notionary.converters.elements.table_element import TableElement
17
+ from notionary.converters.elements.todo_lists import TodoElement
18
+ from notionary.converters.elements.list_element import (
19
19
  BulletedListElement,
20
20
  NumberedListElement,
21
21
  )
22
- from notionary.core.converters.elements.qoute_element import QuoteElement
23
- from notionary.core.converters.elements.image_element import ImageElement
24
- from notionary.core.converters.elements.video_element import VideoElement
25
- from notionary.core.converters.elements.toggle_element import ToggleElement
26
- from notionary.core.converters.elements.bookmark_element import BookmarkElement
27
- from notionary.core.converters.elements.column_element import ColumnElement
22
+ from notionary.converters.elements.qoute_element import QuoteElement
23
+ from notionary.converters.elements.image_element import ImageElement
24
+ from notionary.converters.elements.video_element import VideoElement
25
+ from notionary.converters.elements.toggle_element import ToggleElement
26
+ from notionary.converters.elements.bookmark_element import BookmarkElement
27
+ from notionary.converters.elements.column_element import ColumnElement
28
28
 
29
29
 
30
30
  class BlockElementRegistryBuilder:
@@ -0,0 +1,142 @@
1
+ from typing import (
2
+ AsyncGenerator,
3
+ Dict,
4
+ List,
5
+ Optional,
6
+ Any,
7
+ Tuple,
8
+ )
9
+ from notionary.notion_client import NotionClient
10
+ from notionary.util.logging_mixin import LoggingMixin
11
+
12
+
13
+ class DatabaseDiscovery(LoggingMixin):
14
+ """
15
+ A utility class that discovers Notion databases accessible to your integration.
16
+ Focused on efficiently retrieving essential database information.
17
+ """
18
+
19
+ def __init__(self, client: Optional[NotionClient] = None) -> None:
20
+ """
21
+ Initialize the database discovery with a NotionClient.
22
+
23
+ Args:
24
+ client: NotionClient instance for API communication
25
+ """
26
+ self._client = client if client else NotionClient()
27
+ self.logger.info("DatabaseDiscovery initialized")
28
+
29
+ async def discover(self, page_size: int = 100) -> List[Tuple[str, str]]:
30
+ """
31
+ Discover all accessible databases and return their titles and IDs.
32
+
33
+ Args:
34
+ page_size: The number of databases to fetch per request
35
+
36
+ Returns:
37
+ List of tuples containing (database_title, database_id)
38
+ """
39
+ databases = []
40
+
41
+ async for database in self._iter_databases(page_size):
42
+ db_id = database.get("id")
43
+ if not db_id:
44
+ continue
45
+
46
+ title = self._extract_database_title(database)
47
+ databases.append((title, db_id))
48
+
49
+ return databases
50
+
51
+ async def discover_and_print(self, page_size: int = 100) -> List[Tuple[str, str]]:
52
+ """
53
+ Discover databases and print the results in a nicely formatted way.
54
+
55
+ This is a convenience method that discovers databases and handles
56
+ the formatting and printing of results.
57
+
58
+ Args:
59
+ page_size: The number of databases to fetch per request
60
+
61
+ Returns:
62
+ The same list of databases as discover() for further processing
63
+ """
64
+ databases = await self.discover(page_size)
65
+
66
+ if not databases:
67
+ print("\n⚠️ No databases found!")
68
+ print("Please ensure your Notion integration has access to databases.")
69
+ print(
70
+ "You need to share the databases with your integration in Notion settings."
71
+ )
72
+ return databases
73
+
74
+ print(f"✅ Found {len(databases)} databases:")
75
+
76
+ for i, (title, db_id) in enumerate(databases, 1):
77
+ print(f"{i}. {title} (ID: {db_id})")
78
+
79
+ return databases
80
+
81
+ async def _iter_databases(
82
+ self, page_size: int = 100
83
+ ) -> AsyncGenerator[Dict[str, Any], None]:
84
+ """
85
+ Asynchronous generator that yields Notion databases one by one.
86
+
87
+ Uses the Notion API to provide paginated access to all databases
88
+ without loading all of them into memory at once.
89
+
90
+ Args:
91
+ page_size: The number of databases to fetch per request
92
+
93
+ Yields:
94
+ Individual database objects from the Notion API
95
+ """
96
+ start_cursor: Optional[str] = None
97
+
98
+ while True:
99
+ body: Dict[str, Any] = {
100
+ "filter": {"value": "database", "property": "object"},
101
+ "page_size": page_size,
102
+ }
103
+
104
+ if start_cursor:
105
+ body["start_cursor"] = start_cursor
106
+
107
+ result = await self._client.post("search", data=body)
108
+
109
+ if not result or "results" not in result:
110
+ self.logger.error("Error fetching databases")
111
+ return
112
+
113
+ for database in result["results"]:
114
+ yield database
115
+
116
+ if not result.get("has_more") or not result.get("next_cursor"):
117
+ return
118
+
119
+ start_cursor = result["next_cursor"]
120
+
121
+ def _extract_database_title(self, database: Dict[str, Any]) -> str:
122
+ """
123
+ Extract the database title from a Notion API response.
124
+
125
+ Args:
126
+ database: The database object from the Notion API
127
+
128
+ Returns:
129
+ The extracted title or "Untitled" if no title is found
130
+ """
131
+ if "title" not in database:
132
+ return "Untitled"
133
+
134
+ title_parts = []
135
+ for text_obj in database["title"]:
136
+ if "plain_text" in text_obj:
137
+ title_parts.append(text_obj["plain_text"])
138
+
139
+ if not title_parts:
140
+ return "Untitled"
141
+
142
+ return "".join(title_parts)
@@ -1,5 +1,5 @@
1
1
  from typing import Optional
2
- from notionary.core.notion_client import NotionClient
2
+ from notionary.notion_client import NotionClient
3
3
 
4
4
 
5
5
  class DatabaseInfoService:
@@ -1,15 +1,15 @@
1
1
  from typing import Any, AsyncGenerator, Dict, List, Optional
2
2
 
3
- from notionary.core.notion_client import NotionClient
4
- from notionary.core.page.notion_page_manager import NotionPageManager
3
+ from notionary.notion_client import NotionClient
4
+ from notionary.page.notion_page import NotionPage
5
5
  from notionary.util.logging_mixin import LoggingMixin
6
6
  from notionary.util.page_id_utils import format_uuid
7
7
 
8
8
 
9
- class NotionDatabaseManager(LoggingMixin):
9
+ class NotionDatabase(LoggingMixin):
10
10
  """
11
11
  Minimal manager for Notion databases.
12
- Focused exclusively on creating basic pages and retrieving page managers
12
+ Focused exclusively on creating basic pages and retrieving page managers
13
13
  for further page operations.
14
14
  """
15
15
 
@@ -24,59 +24,31 @@ class NotionDatabaseManager(LoggingMixin):
24
24
  self.database_id = format_uuid(database_id) or database_id
25
25
  self._client = NotionClient(token=token)
26
26
 
27
-
28
- async def create_blank_page(self) -> Optional[str]:
27
+ async def create_blank_page(self) -> Optional[NotionPage]:
29
28
  """
30
29
  Create a new blank page in the database with minimal properties.
31
-
30
+
32
31
  Returns:
33
- Optional[str]: The ID of the created page, or None if creation failed
32
+ NotionPage for the created page, or None if creation failed
34
33
  """
35
34
  try:
36
35
  response = await self._client.post(
37
- "pages",
38
- {
39
- "parent": {"database_id": self.database_id},
40
- "properties": {}
41
- }
36
+ "pages", {"parent": {"database_id": self.database_id}, "properties": {}}
42
37
  )
43
-
38
+
44
39
  if response and "id" in response:
45
40
  page_id = response["id"]
46
- self.logger.info("Created blank page %s in database %s", page_id, self.database_id)
47
- return page_id
48
-
49
- self.logger.warning("Page creation failed: invalid response")
50
- return None
51
-
52
- except Exception as e:
53
- self.logger.error("Error creating blank page: %s", str(e))
54
- return None
41
+ self.logger.info(
42
+ "Created blank page %s in database %s", page_id, self.database_id
43
+ )
55
44
 
56
- async def get_page_manager(self, page_id: str) -> Optional[NotionPageManager]:
57
- """
58
- Get a NotionPageManager for a specific page.
45
+ return NotionPage(page_id=page_id)
59
46
 
60
- Args:
61
- page_id: The ID of the page
47
+ self.logger.warning("Page creation failed: invalid response")
48
+ return None
62
49
 
63
- Returns:
64
- NotionPageManager instance or None if the page wasn't found
65
- """
66
- self.logger.debug("Getting page manager for page %s", page_id)
67
-
68
- try:
69
- # Check if the page exists
70
- page_data = await self._client.get_page(page_id)
71
-
72
- if not page_data:
73
- self.logger.error("Page %s not found", page_id)
74
- return None
75
-
76
- return NotionPageManager(page_id=page_id)
77
-
78
50
  except Exception as e:
79
- self.logger.error("Error getting page manager: %s", str(e))
51
+ self.logger.error("Error creating blank page: %s", str(e))
80
52
  return None
81
53
 
82
54
  async def get_pages(
@@ -84,7 +56,7 @@ class NotionDatabaseManager(LoggingMixin):
84
56
  limit: int = 100,
85
57
  filter_conditions: Optional[Dict[str, Any]] = None,
86
58
  sorts: Optional[List[Dict[str, Any]]] = None,
87
- ) -> List[NotionPageManager]:
59
+ ) -> List[NotionPage]:
88
60
  """
89
61
  Get all pages from the database.
90
62
 
@@ -94,7 +66,7 @@ class NotionDatabaseManager(LoggingMixin):
94
66
  sorts: Optional sort instructions for the database query
95
67
 
96
68
  Returns:
97
- List of NotionPageManager instances for each page
69
+ List of NotionPage instances for each page
98
70
  """
99
71
  self.logger.debug(
100
72
  "Getting up to %d pages with filter: %s, sorts: %s",
@@ -103,7 +75,7 @@ class NotionDatabaseManager(LoggingMixin):
103
75
  sorts,
104
76
  )
105
77
 
106
- pages: List[NotionPageManager] = []
78
+ pages: List[NotionPage] = []
107
79
  count = 0
108
80
 
109
81
  async for page in self.iter_pages(
@@ -127,7 +99,7 @@ class NotionDatabaseManager(LoggingMixin):
127
99
  page_size: int = 100,
128
100
  filter_conditions: Optional[Dict[str, Any]] = None,
129
101
  sorts: Optional[List[Dict[str, Any]]] = None,
130
- ) -> AsyncGenerator[NotionPageManager, None]:
102
+ ) -> AsyncGenerator[NotionPage, None]:
131
103
  """
132
104
  Asynchronous generator that yields pages from the database.
133
105
  Directly queries the Notion API without using the schema.
@@ -138,7 +110,7 @@ class NotionDatabaseManager(LoggingMixin):
138
110
  sorts: Optional sort instructions for the database query
139
111
 
140
112
  Yields:
141
- NotionPageManager instances for each page
113
+ NotionPage instances for each page
142
114
  """
143
115
  self.logger.debug(
144
116
  "Iterating pages with page_size: %d, filter: %s, sorts: %s",
@@ -174,10 +146,12 @@ class NotionDatabaseManager(LoggingMixin):
174
146
  for page in result["results"]:
175
147
  page_id: str = page.get("id", "")
176
148
  title = self._extract_page_title(page)
177
-
149
+
178
150
  page_url = f"https://notion.so/{page_id.replace('-', '')}"
179
151
 
180
- notion_page_manager = NotionPageManager(page_id=page_id, title=title, url=page_url)
152
+ notion_page_manager = NotionPage(
153
+ page_id=page_id, title=title, url=page_url
154
+ )
181
155
  yield notion_page_manager
182
156
 
183
157
  # Update pagination parameters
@@ -222,10 +196,10 @@ class NotionDatabaseManager(LoggingMixin):
222
196
  """
223
197
  try:
224
198
  formatted_page_id = format_uuid(page_id) or page_id
225
-
199
+
226
200
  # Archive the page (Notion's way of deleting)
227
201
  data = {"archived": True}
228
-
202
+
229
203
  result = await self._client.patch(f"pages/{formatted_page_id}", data)
230
204
  if not result:
231
205
  self.logger.error("Error deleting page %s", formatted_page_id)
@@ -233,14 +207,16 @@ class NotionDatabaseManager(LoggingMixin):
233
207
  "success": False,
234
208
  "message": f"Failed to delete page {formatted_page_id}",
235
209
  }
236
-
237
- self.logger.info("Page %s successfully deleted (archived)", formatted_page_id)
210
+
211
+ self.logger.info(
212
+ "Page %s successfully deleted (archived)", formatted_page_id
213
+ )
238
214
  return {"success": True, "page_id": formatted_page_id}
239
-
215
+
240
216
  except Exception as e:
241
217
  self.logger.error("Error in delete_page: %s", str(e))
242
218
  return {"success": False, "message": f"Error: {str(e)}"}
243
219
 
244
220
  async def close(self) -> None:
245
221
  """Close the client connection."""
246
- await self._client.close()
222
+ await self._client.close()
@@ -2,8 +2,8 @@ import logging
2
2
  from typing import List, Optional, Dict, Any
3
3
  from difflib import SequenceMatcher
4
4
 
5
- from notionary.core.notion_client import NotionClient
6
- from notionary.core.database.notion_database_manager import NotionDatabaseManager
5
+ from notionary.database.notion_database import NotionDatabase
6
+ from notionary.notion_client import NotionClient
7
7
  from notionary.exceptions.database_exceptions import (
8
8
  DatabaseConnectionError,
9
9
  DatabaseInitializationError,
@@ -29,7 +29,7 @@ class NotionDatabaseFactory(LoggingMixin):
29
29
  @classmethod
30
30
  async def from_database_id(
31
31
  cls, database_id: str, token: Optional[str] = None
32
- ) -> NotionDatabaseManager:
32
+ ) -> NotionDatabase:
33
33
  """
34
34
  Create a NotionDatabaseManager from a database ID.
35
35
 
@@ -44,18 +44,17 @@ class NotionDatabaseFactory(LoggingMixin):
44
44
 
45
45
  try:
46
46
  formatted_id = format_uuid(database_id) or database_id
47
-
48
- manager = NotionDatabaseManager(formatted_id, token)
49
-
50
47
 
51
- logger.info("Successfully created database manager for ID: %s", formatted_id)
48
+ manager = NotionDatabase(formatted_id, token)
49
+
50
+ logger.info(
51
+ "Successfully created database manager for ID: %s", formatted_id
52
+ )
52
53
  return manager
53
54
 
54
55
  except DatabaseInitializationError:
55
- # Re-raise the already typed exception
56
56
  raise
57
57
  except NotionDatabaseException:
58
- # Re-raise other custom exceptions
59
58
  raise
60
59
  except Exception as e:
61
60
  error_msg = f"Error connecting to database {database_id}: {str(e)}"
@@ -65,7 +64,7 @@ class NotionDatabaseFactory(LoggingMixin):
65
64
  @classmethod
66
65
  async def from_database_name(
67
66
  cls, database_name: str, token: Optional[str] = None
68
- ) -> NotionDatabaseManager:
67
+ ) -> NotionDatabase:
69
68
  """
70
69
  Create a NotionDatabaseManager by finding a database with a matching name.
71
70
  Uses fuzzy matching to find the closest match to the given name.
@@ -85,13 +84,11 @@ class NotionDatabaseFactory(LoggingMixin):
85
84
  try:
86
85
  logger.debug("Using search endpoint to find databases")
87
86
 
88
- # Create search query for databases
89
87
  search_payload = {
90
88
  "filter": {"property": "object", "value": "database"},
91
89
  "page_size": 100,
92
90
  }
93
91
 
94
- # Perform search
95
92
  response = await client.post("search", search_payload)
96
93
 
97
94
  if not response or "results" not in response:
@@ -136,11 +133,16 @@ class NotionDatabaseFactory(LoggingMixin):
136
133
 
137
134
  matched_name = cls._extract_title_from_database(best_match)
138
135
 
139
- logger.info("Found matching database: '%s' (ID: %s) with score: %.2f", matched_name, database_id, best_score)
136
+ logger.info(
137
+ "Found matching database: '%s' (ID: %s) with score: %.2f",
138
+ matched_name,
139
+ database_id,
140
+ best_score,
141
+ )
140
142
 
141
- manager = NotionDatabaseManager(database_id, token)
143
+ manager = NotionDatabase(database_id, token)
142
144
 
143
- logger.info(f"Successfully created database manager for '{matched_name}'")
145
+ logger.info("Successfully created database manager for '%s'", matched_name)
144
146
  await client.close()
145
147
  return manager
146
148
 
@@ -187,4 +189,4 @@ class NotionDatabaseFactory(LoggingMixin):
187
189
  if "plain_text" in text_obj:
188
190
  text_parts.append(text_obj["plain_text"])
189
191
 
190
- return "".join(text_parts)
192
+ return "".join(text_parts)
@@ -7,6 +7,7 @@ import httpx
7
7
  from dotenv import load_dotenv
8
8
  from notionary.util.logging_mixin import LoggingMixin
9
9
 
10
+
10
11
  class HttpMethod(Enum):
11
12
  """Enum für HTTP-Methoden."""
12
13
 
@@ -15,6 +16,7 @@ class HttpMethod(Enum):
15
16
  PATCH = "patch"
16
17
  DELETE = "delete"
17
18
 
19
+
18
20
  class NotionClient(LoggingMixin):
19
21
  """Verbesserter Notion-Client mit automatischer Ressourcenverwaltung."""
20
22
 
@@ -50,7 +52,7 @@ class NotionClient(LoggingMixin):
50
52
 
51
53
  async def get(self, endpoint: str) -> Optional[Dict[str, Any]]:
52
54
  return await self._make_request(HttpMethod.GET, endpoint)
53
-
55
+
54
56
  async def get_page(self, page_id: str) -> Optional[Dict[str, Any]]:
55
57
  return await self.get(f"pages/{page_id}")
56
58
 
@@ -126,4 +128,4 @@ class NotionClient(LoggingMixin):
126
128
  loop.create_task(self.close())
127
129
  self.logger.debug("Created cleanup task for NotionClient")
128
130
  except RuntimeError:
129
- self.logger.warning("No event loop available for auto-closing NotionClient")
131
+ self.logger.warning("No event loop available for auto-closing NotionClient")
@@ -0,0 +1,84 @@
1
+ import re
2
+ from typing import Any, Dict, List
3
+ from notionary.util.logging_mixin import LoggingMixin
4
+
5
+
6
+ class NotionPageContentChunker(LoggingMixin):
7
+ """
8
+ Handles markdown text processing to comply with Notion API length limitations.
9
+
10
+ This class specifically addresses the Notion API constraint that limits
11
+ rich_text elements to a maximum of 2000 characters. This particularly affects
12
+ paragraph blocks within toggle blocks or other nested structures.
13
+
14
+ Resolves the following typical API error:
15
+ "validation_error - body.children[79].toggle.children[2].paragraph.rich_text[0].text.content.length
16
+ should be ≤ 2000, instead was 2162."
17
+
18
+ The class provides methods for:
19
+ 1. Automatically truncating text that exceeds the limit
20
+ 2. Splitting markdown into smaller units for separate API requests
21
+ """
22
+
23
+ def __init__(self, max_text_length: int = 1900):
24
+ self.max_text_length = max_text_length
25
+
26
+ def fix_blocks_content_length(
27
+ self, blocks: List[Dict[str, Any]]
28
+ ) -> List[Dict[str, Any]]:
29
+ """Check each block and ensure text content doesn't exceed Notion's limit."""
30
+ return [self._fix_single_block_content(block) for block in blocks]
31
+
32
+ def _fix_single_block_content(self, block: Dict[str, Any]) -> Dict[str, Any]:
33
+ """Fix content length in a single block and its children recursively."""
34
+ block_copy = block.copy()
35
+
36
+ block_type = block.get("type")
37
+ if not block_type:
38
+ return block_copy
39
+
40
+ content = block.get(block_type)
41
+ if not content:
42
+ return block_copy
43
+
44
+ if "rich_text" in content:
45
+ self._fix_rich_text_content(block_copy, block_type, content)
46
+
47
+ if "children" in content and content["children"]:
48
+ block_copy[block_type]["children"] = [
49
+ self._fix_single_block_content(child) for child in content["children"]
50
+ ]
51
+
52
+ return block_copy
53
+
54
+ def _fix_rich_text_content(
55
+ self, block_copy: Dict[str, Any], block_type: str, content: Dict[str, Any]
56
+ ) -> None:
57
+ """Fix rich text content that exceeds the length limit."""
58
+ rich_text = content["rich_text"]
59
+ for i, text_item in enumerate(rich_text):
60
+ if "text" not in text_item or "content" not in text_item["text"]:
61
+ continue
62
+
63
+ text_content = text_item["text"]["content"]
64
+ if len(text_content) <= self.max_text_length:
65
+ continue
66
+
67
+ self.logger.warning(
68
+ "Truncating text content from %d to %d chars",
69
+ len(text_content),
70
+ self.max_text_length,
71
+ )
72
+ block_copy[block_type]["rich_text"][i]["text"]["content"] = text_content[
73
+ : self.max_text_length
74
+ ]
75
+
76
+ def split_to_paragraphs(self, markdown_text: str) -> List[str]:
77
+ """Split markdown into paragraphs."""
78
+ paragraphs = re.split(r"\n\s*\n", markdown_text)
79
+ return [p for p in paragraphs if p.strip()]
80
+
81
+ def split_to_sentences(self, paragraph: str) -> List[str]:
82
+ """Split a paragraph into sentences."""
83
+ sentences = re.split(r"(?<=[.!?])\s+", paragraph)
84
+ return [s for s in sentences if s.strip()]
@@ -1,15 +1,17 @@
1
1
  from typing import Any, Dict, List, Optional
2
2
 
3
- from notionary.core.converters.markdown_to_notion_converter import (
3
+ from notionary.notion_client import NotionClient
4
+ from notionary.converters.registry.block_element_registry import BlockElementRegistry
5
+
6
+ from notionary.converters.markdown_to_notion_converter import (
4
7
  MarkdownToNotionConverter,
5
8
  )
6
- from notionary.core.converters.notion_to_markdown_converter import (
9
+ from notionary.converters.notion_to_markdown_converter import (
7
10
  NotionToMarkdownConverter,
8
11
  )
9
- from notionary.core.converters.registry.block_element_registry import (
10
- BlockElementRegistry,
12
+ from notionary.page.content.notion_page_content_chunker import (
13
+ NotionPageContentChunker,
11
14
  )
12
- from notionary.core.notion_client import NotionClient
13
15
  from notionary.util.logging_mixin import LoggingMixin
14
16
 
15
17
 
@@ -28,15 +30,29 @@ class PageContentManager(LoggingMixin):
28
30
  self._notion_to_markdown_converter = NotionToMarkdownConverter(
29
31
  block_registry=block_registry
30
32
  )
33
+ self._chunker = NotionPageContentChunker()
31
34
 
32
35
  async def append_markdown(self, markdown_text: str) -> str:
33
- blocks = self._markdown_to_notion_converter.convert(markdown_text)
34
- result = await self._client.patch(
35
- f"blocks/{self.page_id}/children", {"children": blocks}
36
- )
37
- return (
38
- "Successfully added text to the page." if result else "Failed to add text."
39
- )
36
+ """
37
+ Append markdown text to a Notion page, automatically handling content length limits.
38
+ """
39
+ try:
40
+ blocks = self._markdown_to_notion_converter.convert(markdown_text)
41
+
42
+ # Fix any blocks that exceed Notion's content length limits
43
+ fixed_blocks = self._chunker.fix_blocks_content_length(blocks)
44
+
45
+ result = await self._client.patch(
46
+ f"blocks/{self.page_id}/children", {"children": fixed_blocks}
47
+ )
48
+ return (
49
+ "Successfully added text to the page."
50
+ if result
51
+ else "Failed to add text."
52
+ )
53
+ except Exception as e:
54
+ self.logger.error("Error appending markdown: %s", str(e))
55
+ raise
40
56
 
41
57
  async def clear(self) -> str:
42
58
  blocks = await self._client.get(f"blocks/{self.page_id}/children")
@@ -53,7 +69,7 @@ class PageContentManager(LoggingMixin):
53
69
  if block.get("type") in ["child_database", "database", "linked_database"]:
54
70
  skipped += 1
55
71
  continue
56
-
72
+
57
73
  if await self._client.delete(f"blocks/{block['id']}"):
58
74
  deleted += 1
59
75