notionary 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- notionary/__init__.py +3 -2
- notionary/blocks/__init__.py +54 -25
- notionary/blocks/audio/__init__.py +7 -0
- notionary/blocks/audio/audio_element.py +152 -0
- notionary/blocks/audio/audio_markdown_node.py +29 -0
- notionary/blocks/audio/audio_models.py +59 -0
- notionary/blocks/bookmark/__init__.py +7 -0
- notionary/blocks/{bookmark_element.py → bookmark/bookmark_element.py} +20 -65
- notionary/blocks/bookmark/bookmark_markdown_node.py +43 -0
- notionary/blocks/bookmark/bookmark_models.py +0 -0
- notionary/blocks/bulleted_list/__init__.py +7 -0
- notionary/blocks/{bulleted_list_element.py → bulleted_list/bulleted_list_element.py} +7 -3
- notionary/blocks/bulleted_list/bulleted_list_markdown_node.py +33 -0
- notionary/blocks/bulleted_list/bulleted_list_models.py +0 -0
- notionary/blocks/callout/__init__.py +7 -0
- notionary/blocks/callout/callout_element.py +132 -0
- notionary/blocks/callout/callout_markdown_node.py +31 -0
- notionary/blocks/callout/callout_models.py +0 -0
- notionary/blocks/code/__init__.py +7 -0
- notionary/blocks/{code_block_element.py → code/code_element.py} +72 -40
- notionary/blocks/code/code_markdown_node.py +43 -0
- notionary/blocks/code/code_models.py +0 -0
- notionary/blocks/column/__init__.py +5 -0
- notionary/blocks/{column_element.py → column/column_element.py} +24 -55
- notionary/blocks/column/column_models.py +0 -0
- notionary/blocks/divider/__init__.py +7 -0
- notionary/blocks/{divider_element.py → divider/divider_element.py} +11 -3
- notionary/blocks/divider/divider_markdown_node.py +24 -0
- notionary/blocks/divider/divider_models.py +0 -0
- notionary/blocks/document/__init__.py +7 -0
- notionary/blocks/document/document_element.py +102 -0
- notionary/blocks/document/document_markdown_node.py +31 -0
- notionary/blocks/document/document_models.py +0 -0
- notionary/blocks/embed/__init__.py +7 -0
- notionary/blocks/{embed_element.py → embed/embed_element.py} +50 -32
- notionary/blocks/embed/embed_markdown_node.py +30 -0
- notionary/blocks/embed/embed_models.py +0 -0
- notionary/blocks/heading/__init__.py +7 -0
- notionary/blocks/{heading_element.py → heading/heading_element.py} +25 -17
- notionary/blocks/heading/heading_markdown_node.py +29 -0
- notionary/blocks/heading/heading_models.py +0 -0
- notionary/blocks/image/__init__.py +7 -0
- notionary/blocks/{image_element.py → image/image_element.py} +62 -42
- notionary/blocks/image/image_markdown_node.py +33 -0
- notionary/blocks/image/image_models.py +0 -0
- notionary/blocks/markdown_builder.py +356 -0
- notionary/blocks/markdown_node.py +29 -0
- notionary/blocks/mention/__init__.py +7 -0
- notionary/blocks/{mention_element.py → mention/mention_element.py} +6 -2
- notionary/blocks/mention/mention_markdown_node.py +38 -0
- notionary/blocks/mention/mention_models.py +0 -0
- notionary/blocks/numbered_list/__init__.py +7 -0
- notionary/blocks/{numbered_list_element.py → numbered_list/numbered_list_element.py} +10 -6
- notionary/blocks/numbered_list/numbered_list_markdown_node.py +29 -0
- notionary/blocks/numbered_list/numbered_list_models.py +0 -0
- notionary/blocks/paragraph/__init__.py +7 -0
- notionary/blocks/{paragraph_element.py → paragraph/paragraph_element.py} +7 -3
- notionary/blocks/paragraph/paragraph_markdown_node.py +25 -0
- notionary/blocks/paragraph/paragraph_models.py +0 -0
- notionary/blocks/quote/__init__.py +7 -0
- notionary/blocks/quote/quote_element.py +92 -0
- notionary/blocks/quote/quote_markdown_node.py +23 -0
- notionary/blocks/quote/quote_models.py +0 -0
- notionary/blocks/registry/block_registry.py +17 -3
- notionary/blocks/registry/block_registry_builder.py +90 -178
- notionary/blocks/shared/__init__.py +0 -0
- notionary/blocks/shared/block_client.py +256 -0
- notionary/blocks/shared/models.py +713 -0
- notionary/blocks/{notion_block_element.py → shared/notion_block_element.py} +8 -5
- notionary/blocks/{text_inline_formatter.py → shared/text_inline_formatter.py} +14 -14
- notionary/blocks/shared/text_inline_formatter_new.py +139 -0
- notionary/blocks/table/__init__.py +7 -0
- notionary/blocks/{table_element.py → table/table_element.py} +23 -11
- notionary/blocks/table/table_markdown_node.py +40 -0
- notionary/blocks/table/table_models.py +0 -0
- notionary/blocks/todo/__init__.py +7 -0
- notionary/blocks/{todo_element.py → todo/todo_element.py} +8 -4
- notionary/blocks/todo/todo_markdown_node.py +31 -0
- notionary/blocks/todo/todo_models.py +0 -0
- notionary/blocks/toggle/__init__.py +4 -0
- notionary/blocks/{toggle_element.py → toggle/toggle_element.py} +7 -3
- notionary/blocks/toggle/toggle_markdown_node.py +35 -0
- notionary/blocks/toggle/toggle_models.py +0 -0
- notionary/blocks/toggleable_heading/__init__.py +9 -0
- notionary/blocks/{toggleable_heading_element.py → toggleable_heading/toggleable_heading_element.py} +8 -4
- notionary/blocks/toggleable_heading/toggleable_heading_markdown_node.py +43 -0
- notionary/blocks/toggleable_heading/toggleable_heading_models.py +0 -0
- notionary/blocks/video/__init__.py +7 -0
- notionary/blocks/{video_element.py → video/video_element.py} +82 -57
- notionary/blocks/video/video_markdown_node.py +30 -0
- notionary/file_upload/notion_file_upload.py +1 -1
- notionary/page/content/markdown_whitespace_processor.py +80 -0
- notionary/page/content/notion_text_length_utils.py +87 -0
- notionary/page/content/page_content_retriever.py +18 -10
- notionary/page/content/page_content_writer.py +97 -148
- notionary/page/formatting/line_processor.py +153 -0
- notionary/page/formatting/markdown_to_notion_converter.py +104 -425
- notionary/page/notion_page.py +9 -11
- notionary/page/notion_to_markdown_converter.py +9 -13
- notionary/util/factory_decorator.py +0 -0
- notionary/workspace.py +0 -1
- {notionary-0.2.17.dist-info → notionary-0.2.19.dist-info}/METADATA +1 -1
- notionary-0.2.19.dist-info/RECORD +150 -0
- notionary/blocks/audio_element.py +0 -144
- notionary/blocks/callout_element.py +0 -122
- notionary/blocks/document_element.py +0 -194
- notionary/blocks/notion_block_client.py +0 -26
- notionary/blocks/qoute_element.py +0 -169
- notionary/page/content/notion_page_content_chunker.py +0 -84
- notionary/page/formatting/spacer_rules.py +0 -483
- notionary-0.2.17.dist-info/RECORD +0 -85
- {notionary-0.2.17.dist-info → notionary-0.2.19.dist-info}/LICENSE +0 -0
- {notionary-0.2.17.dist-info → notionary-0.2.19.dist-info}/WHEEL +0 -0
@@ -1,29 +1,38 @@
|
|
1
1
|
import re
|
2
2
|
from typing import Dict, Any, Optional, List
|
3
3
|
|
4
|
-
from notionary.blocks import
|
5
|
-
|
4
|
+
from notionary.blocks import (
|
5
|
+
ElementPromptContent,
|
6
|
+
ElementPromptBuilder,
|
7
|
+
NotionBlockResult,
|
8
|
+
NotionBlockElement,
|
9
|
+
)
|
6
10
|
|
7
11
|
|
8
12
|
class VideoElement(NotionBlockElement):
|
9
13
|
"""
|
10
14
|
Handles conversion between Markdown video embeds and Notion video blocks.
|
11
15
|
|
12
|
-
Markdown video syntax
|
13
|
-
-
|
14
|
-
-
|
15
|
-
|
16
|
-
|
16
|
+
Markdown video syntax:
|
17
|
+
- [video](https://example.com/video.mp4) - Simple video with URL only
|
18
|
+
- [video](https://example.com/video.mp4 "Caption") - Video with URL and caption
|
19
|
+
|
20
|
+
Where:
|
21
|
+
- URL is the required video URL
|
22
|
+
- Caption is an optional descriptive text (enclosed in quotes)
|
17
23
|
|
18
24
|
Supports various video URLs including YouTube, Vimeo, and direct video file links.
|
19
25
|
"""
|
20
26
|
|
27
|
+
# Regex pattern for video syntax with optional caption
|
21
28
|
PATTERN = re.compile(
|
22
|
-
r"
|
23
|
-
+ r'
|
29
|
+
r"^\[video\]\(" # [video]( prefix
|
30
|
+
+ r'(https?://[^\s"]+)' # URL (required)
|
31
|
+
+ r'(?:\s+"([^"]+)")?' # Optional caption in quotes
|
24
32
|
+ r"\)$" # closing parenthesis
|
25
33
|
)
|
26
34
|
|
35
|
+
# YouTube URL patterns
|
27
36
|
YOUTUBE_PATTERNS = [
|
28
37
|
re.compile(
|
29
38
|
r"(?:https?://)?(?:www\.)?youtube\.com/watch\?v=([a-zA-Z0-9_-]{11})"
|
@@ -34,8 +43,9 @@ class VideoElement(NotionBlockElement):
|
|
34
43
|
@classmethod
|
35
44
|
def match_markdown(cls, text: str) -> bool:
|
36
45
|
"""Check if text is a markdown video embed."""
|
37
|
-
|
38
|
-
|
46
|
+
return text.strip().startswith("[video]") and bool(
|
47
|
+
VideoElement.PATTERN.match(text.strip())
|
48
|
+
)
|
39
49
|
|
40
50
|
@classmethod
|
41
51
|
def match_notion(cls, block: Dict[str, Any]) -> bool:
|
@@ -43,51 +53,38 @@ class VideoElement(NotionBlockElement):
|
|
43
53
|
return block.get("type") == "video"
|
44
54
|
|
45
55
|
@classmethod
|
46
|
-
def
|
47
|
-
"""Check if URL is a YouTube video and return video ID if it is."""
|
48
|
-
for pattern in VideoElement.YOUTUBE_PATTERNS:
|
49
|
-
match = pattern.match(url)
|
50
|
-
if match:
|
51
|
-
return True
|
52
|
-
return False
|
53
|
-
|
54
|
-
@classmethod
|
55
|
-
def get_youtube_id(cls, url: str) -> Optional[str]:
|
56
|
-
"""Extract YouTube video ID from URL."""
|
57
|
-
for pattern in VideoElement.YOUTUBE_PATTERNS:
|
58
|
-
match = pattern.match(url)
|
59
|
-
if match:
|
60
|
-
return match.group(1)
|
61
|
-
return None
|
62
|
-
|
63
|
-
@classmethod
|
64
|
-
def markdown_to_notion(cls, text: str) -> Optional[Dict[str, Any]]:
|
56
|
+
def markdown_to_notion(cls, text: str) -> NotionBlockResult:
|
65
57
|
"""Convert markdown video embed to Notion video block."""
|
66
58
|
video_match = VideoElement.PATTERN.match(text.strip())
|
67
59
|
if not video_match:
|
68
60
|
return None
|
69
61
|
|
70
|
-
|
71
|
-
|
62
|
+
url = video_match.group(1)
|
63
|
+
caption = video_match.group(2)
|
72
64
|
|
73
65
|
if not url:
|
74
66
|
return None
|
75
67
|
|
76
|
-
|
68
|
+
# Normalize YouTube URLs
|
69
|
+
youtube_id = VideoElement._get_youtube_id(url)
|
77
70
|
if youtube_id:
|
78
71
|
url = f"https://www.youtube.com/watch?v={youtube_id}"
|
79
72
|
|
80
|
-
|
81
|
-
"type": "video",
|
82
|
-
"video": {"type": "external", "external": {"url": url}},
|
83
|
-
}
|
73
|
+
video_data = {"type": "external", "external": {"url": url}}
|
84
74
|
|
75
|
+
# Add caption if provided
|
85
76
|
if caption:
|
86
|
-
|
87
|
-
|
88
|
-
]
|
77
|
+
video_data["caption"] = [{"type": "text", "text": {"content": caption}}]
|
78
|
+
else:
|
79
|
+
video_data["caption"] = []
|
89
80
|
|
90
|
-
|
81
|
+
# Prepare the video block
|
82
|
+
video_block = {"type": "video", "video": video_data}
|
83
|
+
|
84
|
+
# Add empty paragraph after video
|
85
|
+
empty_paragraph = {"type": "paragraph", "paragraph": {"rich_text": []}}
|
86
|
+
|
87
|
+
return [video_block, empty_paragraph]
|
91
88
|
|
92
89
|
@classmethod
|
93
90
|
def notion_to_markdown(cls, block: Dict[str, Any]) -> Optional[str]:
|
@@ -97,29 +94,56 @@ class VideoElement(NotionBlockElement):
|
|
97
94
|
|
98
95
|
video_data = block.get("video", {})
|
99
96
|
|
100
|
-
#
|
101
|
-
|
102
|
-
url = video_data.get("external", {}).get("url", "")
|
103
|
-
elif video_data.get("type") == "file":
|
104
|
-
url = video_data.get("file", {}).get("url", "")
|
105
|
-
else:
|
106
|
-
return None
|
107
|
-
|
97
|
+
# Extract URL from video data
|
98
|
+
url = VideoElement._extract_video_url(video_data)
|
108
99
|
if not url:
|
109
100
|
return None
|
110
101
|
|
111
|
-
caption = ""
|
112
102
|
caption_rich_text = video_data.get("caption", [])
|
113
|
-
if caption_rich_text:
|
114
|
-
caption = VideoElement._extract_text_content(caption_rich_text)
|
115
103
|
|
116
|
-
|
104
|
+
if not caption_rich_text:
|
105
|
+
# Simple video with URL only
|
106
|
+
return f"[video]({url})"
|
107
|
+
|
108
|
+
# Extract caption text
|
109
|
+
caption = VideoElement._extract_text_content(caption_rich_text)
|
110
|
+
|
111
|
+
if caption:
|
112
|
+
return f'[video]({url} "{caption}")'
|
113
|
+
|
114
|
+
return f"[video]({url})"
|
117
115
|
|
118
116
|
@classmethod
|
119
117
|
def is_multiline(cls) -> bool:
|
120
118
|
"""Videos are single-line elements."""
|
121
119
|
return False
|
122
120
|
|
121
|
+
@classmethod
|
122
|
+
def _is_youtube_url(cls, url: str) -> bool:
|
123
|
+
"""Check if URL is a YouTube video."""
|
124
|
+
for pattern in VideoElement.YOUTUBE_PATTERNS:
|
125
|
+
if pattern.match(url):
|
126
|
+
return True
|
127
|
+
return False
|
128
|
+
|
129
|
+
@classmethod
|
130
|
+
def _get_youtube_id(cls, url: str) -> Optional[str]:
|
131
|
+
"""Extract YouTube video ID from URL."""
|
132
|
+
for pattern in VideoElement.YOUTUBE_PATTERNS:
|
133
|
+
match = pattern.match(url)
|
134
|
+
if match:
|
135
|
+
return match.group(1)
|
136
|
+
return None
|
137
|
+
|
138
|
+
@classmethod
|
139
|
+
def _extract_video_url(cls, video_data: Dict[str, Any]) -> str:
|
140
|
+
"""Extract URL from video data, handling both external and uploaded videos."""
|
141
|
+
if video_data.get("type") == "external":
|
142
|
+
return video_data.get("external", {}).get("url", "")
|
143
|
+
elif video_data.get("type") == "file":
|
144
|
+
return video_data.get("file", {}).get("url", "")
|
145
|
+
return ""
|
146
|
+
|
123
147
|
@classmethod
|
124
148
|
def _extract_text_content(cls, rich_text: List[Dict[str, Any]]) -> str:
|
125
149
|
"""Extract plain text content from Notion rich_text elements."""
|
@@ -145,12 +169,13 @@ class VideoElement(NotionBlockElement):
|
|
145
169
|
"Use video embeds when you want to include multimedia content directly in your document. "
|
146
170
|
"Videos are useful for tutorials, demonstrations, presentations, or any content that benefits from visual explanation."
|
147
171
|
)
|
148
|
-
.with_syntax(
|
172
|
+
.with_syntax('[video](https://example.com/video.mp4 "Optional caption")')
|
149
173
|
.with_examples(
|
150
174
|
[
|
151
|
-
"
|
152
|
-
|
153
|
-
|
175
|
+
"[video](https://www.youtube.com/watch?v=dQw4w9WgXcQ)",
|
176
|
+
'[video](https://example.com/videos/demo.mp4 "Product demo")',
|
177
|
+
'[video](https://youtu.be/dQw4w9WgXcQ "How to use this feature")',
|
178
|
+
'[video](https://example.com/tutorial.mp4 "Step-by-step tutorial")',
|
154
179
|
]
|
155
180
|
)
|
156
181
|
.build()
|
@@ -0,0 +1,30 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import Optional
|
4
|
+
from pydantic import BaseModel
|
5
|
+
from notionary.blocks.markdown_node import MarkdownNode
|
6
|
+
|
7
|
+
|
8
|
+
class VideoMarkdownBlockParams(BaseModel):
|
9
|
+
url: str
|
10
|
+
caption: Optional[str] = None
|
11
|
+
|
12
|
+
|
13
|
+
class VideoMarkdownNode(MarkdownNode):
|
14
|
+
"""
|
15
|
+
Programmatic interface for creating Notion-style video blocks.
|
16
|
+
Example: [video](https://example.com/video.mp4 "Optional caption")
|
17
|
+
"""
|
18
|
+
|
19
|
+
def __init__(self, url: str, caption: Optional[str] = None):
|
20
|
+
self.url = url
|
21
|
+
self.caption = caption
|
22
|
+
|
23
|
+
@classmethod
|
24
|
+
def from_params(cls, params: VideoMarkdownBlockParams) -> VideoMarkdownNode:
|
25
|
+
return cls(url=params.url, caption=params.caption)
|
26
|
+
|
27
|
+
def to_markdown(self) -> str:
|
28
|
+
if self.caption:
|
29
|
+
return f'[video]({self.url} "{self.caption}")'
|
30
|
+
return f"[video]({self.url})"
|
@@ -23,7 +23,7 @@ class NotionFileUpload(LoggingMixin):
|
|
23
23
|
|
24
24
|
def __init__(self, token: Optional[str] = None):
|
25
25
|
"""Initialize the file upload service."""
|
26
|
-
from notionary import NotionFileUploadClient
|
26
|
+
from notionary.file_upload import NotionFileUploadClient
|
27
27
|
|
28
28
|
self.client = NotionFileUploadClient(token=token)
|
29
29
|
|
@@ -0,0 +1,80 @@
|
|
1
|
+
class MarkdownWhitespaceProcessor:
|
2
|
+
"""Helper class for processing markdown whitespace."""
|
3
|
+
|
4
|
+
def __init__(self):
|
5
|
+
self.processed_lines = []
|
6
|
+
self.in_code_block = False
|
7
|
+
self.current_code_block = []
|
8
|
+
|
9
|
+
def process_lines(self, lines: list[str]) -> str:
|
10
|
+
"""Process all lines and return the processed markdown."""
|
11
|
+
self.processed_lines = []
|
12
|
+
self.in_code_block = False
|
13
|
+
self.current_code_block = []
|
14
|
+
|
15
|
+
for line in lines:
|
16
|
+
self._process_single_line(line)
|
17
|
+
|
18
|
+
# Handle unclosed code block
|
19
|
+
if self.in_code_block and self.current_code_block:
|
20
|
+
self._finish_code_block()
|
21
|
+
|
22
|
+
return "\n".join(self.processed_lines)
|
23
|
+
|
24
|
+
def _process_single_line(self, line: str) -> None:
|
25
|
+
"""Process a single line of markdown."""
|
26
|
+
if self._is_code_block_marker(line):
|
27
|
+
self._handle_code_block_marker(line)
|
28
|
+
return
|
29
|
+
|
30
|
+
if self.in_code_block:
|
31
|
+
self.current_code_block.append(line)
|
32
|
+
return
|
33
|
+
|
34
|
+
# Regular text - remove leading whitespace
|
35
|
+
self.processed_lines.append(line.lstrip())
|
36
|
+
|
37
|
+
def _handle_code_block_marker(self, line: str) -> None:
|
38
|
+
"""Handle code block start/end markers."""
|
39
|
+
if not self.in_code_block:
|
40
|
+
# Starting new code block
|
41
|
+
self.in_code_block = True
|
42
|
+
self.processed_lines.append(self._normalize_code_block_start(line))
|
43
|
+
self.current_code_block = []
|
44
|
+
else:
|
45
|
+
# Ending code block
|
46
|
+
self._finish_code_block()
|
47
|
+
|
48
|
+
def _finish_code_block(self) -> None:
|
49
|
+
"""Finish processing current code block."""
|
50
|
+
self.processed_lines.extend(
|
51
|
+
self._normalize_code_block_content(self.current_code_block)
|
52
|
+
)
|
53
|
+
self.processed_lines.append("```")
|
54
|
+
self.in_code_block = False
|
55
|
+
|
56
|
+
def _is_code_block_marker(self, line: str) -> bool:
|
57
|
+
"""Check if line is a code block marker."""
|
58
|
+
return line.lstrip().startswith("```")
|
59
|
+
|
60
|
+
def _normalize_code_block_start(self, line: str) -> str:
|
61
|
+
"""Normalize code block opening marker."""
|
62
|
+
language = line.lstrip().replace("```", "", 1).strip()
|
63
|
+
return "```" + language
|
64
|
+
|
65
|
+
def _normalize_code_block_content(self, code_lines: list[str]) -> list[str]:
|
66
|
+
"""Normalize code block indentation."""
|
67
|
+
if not code_lines:
|
68
|
+
return []
|
69
|
+
|
70
|
+
# Find minimum indentation from non-empty lines
|
71
|
+
non_empty_lines = [line for line in code_lines if line.strip()]
|
72
|
+
if not non_empty_lines:
|
73
|
+
return [""] * len(code_lines)
|
74
|
+
|
75
|
+
min_indent = min(len(line) - len(line.lstrip()) for line in non_empty_lines)
|
76
|
+
if min_indent == 0:
|
77
|
+
return code_lines
|
78
|
+
|
79
|
+
# Remove common indentation
|
80
|
+
return ["" if not line.strip() else line[min_indent:] for line in code_lines]
|
@@ -0,0 +1,87 @@
|
|
1
|
+
"""
|
2
|
+
Utility functions for handling Notion API text length limitations.
|
3
|
+
|
4
|
+
This module provides functions to fix text content that exceeds Notion's
|
5
|
+
rich_text character limit of 2000 characters per element.
|
6
|
+
|
7
|
+
Resolves API errors like:
|
8
|
+
"validation_error - body.children[79].toggle.children[2].paragraph.rich_text[0].text.content.length
|
9
|
+
should be ≤ 2000, instead was 2162."
|
10
|
+
"""
|
11
|
+
|
12
|
+
import re
|
13
|
+
import logging
|
14
|
+
from typing import Any
|
15
|
+
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
|
19
|
+
def fix_blocks_content_length(
|
20
|
+
blocks: list[dict[str, Any]], max_text_length: int = 1900
|
21
|
+
) -> list[dict[str, Any]]:
|
22
|
+
"""Check each block and ensure text content doesn't exceed Notion's limit."""
|
23
|
+
return [_fix_single_block_content(block, max_text_length) for block in blocks]
|
24
|
+
|
25
|
+
|
26
|
+
def _fix_single_block_content(
|
27
|
+
block: dict[str, Any], max_text_length: int
|
28
|
+
) -> dict[str, Any]:
|
29
|
+
"""Fix content length in a single block and its children recursively."""
|
30
|
+
block_copy = block.copy()
|
31
|
+
|
32
|
+
block_type = block.get("type")
|
33
|
+
if not block_type:
|
34
|
+
return block_copy
|
35
|
+
|
36
|
+
content = block.get(block_type)
|
37
|
+
if not content:
|
38
|
+
return block_copy
|
39
|
+
|
40
|
+
if "rich_text" in content:
|
41
|
+
_fix_rich_text_content(block_copy, block_type, content, max_text_length)
|
42
|
+
|
43
|
+
if "children" in content and content["children"]:
|
44
|
+
block_copy[block_type]["children"] = [
|
45
|
+
_fix_single_block_content(child, max_text_length)
|
46
|
+
for child in content["children"]
|
47
|
+
]
|
48
|
+
|
49
|
+
return block_copy
|
50
|
+
|
51
|
+
|
52
|
+
def _fix_rich_text_content(
|
53
|
+
block_copy: dict[str, Any],
|
54
|
+
block_type: str,
|
55
|
+
content: dict[str, Any],
|
56
|
+
max_text_length: int,
|
57
|
+
) -> None:
|
58
|
+
"""Fix rich text content that exceeds the length limit."""
|
59
|
+
rich_text = content["rich_text"]
|
60
|
+
for i, text_item in enumerate(rich_text):
|
61
|
+
if "text" not in text_item or "content" not in text_item["text"]:
|
62
|
+
continue
|
63
|
+
|
64
|
+
text_content = text_item["text"]["content"]
|
65
|
+
if len(text_content) <= max_text_length:
|
66
|
+
continue
|
67
|
+
|
68
|
+
logger.warning(
|
69
|
+
"Truncating text content from %d to %d chars",
|
70
|
+
len(text_content),
|
71
|
+
max_text_length,
|
72
|
+
)
|
73
|
+
block_copy[block_type]["rich_text"][i]["text"]["content"] = text_content[
|
74
|
+
:max_text_length
|
75
|
+
]
|
76
|
+
|
77
|
+
|
78
|
+
def split_to_paragraphs(markdown_text: str) -> list[str]:
|
79
|
+
"""Split markdown into paragraphs."""
|
80
|
+
paragraphs = re.split(r"\n\s*\n", markdown_text)
|
81
|
+
return [p for p in paragraphs if p.strip()]
|
82
|
+
|
83
|
+
|
84
|
+
def split_to_sentences(paragraph: str) -> list[str]:
|
85
|
+
"""Split a paragraph into sentences."""
|
86
|
+
sentences = re.split(r"(?<=[.!?])\s+", paragraph)
|
87
|
+
return [s for s in sentences if s.strip()]
|
@@ -1,8 +1,10 @@
|
|
1
|
-
|
1
|
+
import json
|
2
|
+
from typing import Any, Dict, Optional
|
2
3
|
|
3
4
|
from notionary.blocks.registry.block_registry import BlockRegistry
|
4
5
|
|
5
6
|
from notionary.blocks import NotionBlockClient
|
7
|
+
from notionary.blocks.shared.models import Block
|
6
8
|
from notionary.page.notion_to_markdown_converter import (
|
7
9
|
NotionToMarkdownConverter,
|
8
10
|
)
|
@@ -23,30 +25,36 @@ class PageContentRetriever(LoggingMixin):
|
|
23
25
|
|
24
26
|
async def get_page_content(self) -> str:
|
25
27
|
blocks = await self._get_page_blocks_with_children()
|
26
|
-
|
28
|
+
|
29
|
+
# TODO: Fix this quick fix🧯 Quick-Fix: Konvertiere rekursive Block-Objekte in plain dicts
|
30
|
+
blocks_as_dicts = [block.model_dump(mode="python", exclude_unset=True) for block in blocks]
|
31
|
+
|
32
|
+
return self._notion_to_markdown_converter.convert(blocks_as_dicts)
|
27
33
|
|
28
34
|
async def _get_page_blocks_with_children(
|
29
35
|
self, parent_id: Optional[str] = None
|
30
|
-
) ->
|
31
|
-
|
32
|
-
await self.client.
|
36
|
+
) -> list[Block]:
|
37
|
+
response = (
|
38
|
+
await self.client.get_block_children(block_id=self.page_id)
|
33
39
|
if parent_id is None
|
34
40
|
else await self.client.get_block_children(parent_id)
|
35
41
|
)
|
36
42
|
|
37
|
-
if not
|
43
|
+
if not response or not response.results:
|
38
44
|
return []
|
39
45
|
|
46
|
+
blocks = response.results
|
47
|
+
|
40
48
|
for block in blocks:
|
41
|
-
if not block.
|
49
|
+
if not block.has_children:
|
42
50
|
continue
|
43
51
|
|
44
|
-
block_id = block.
|
52
|
+
block_id = block.id
|
45
53
|
if not block_id:
|
46
54
|
continue
|
47
55
|
|
48
56
|
children = await self._get_page_blocks_with_children(block_id)
|
49
57
|
if children:
|
50
|
-
block
|
58
|
+
block.children = children
|
51
59
|
|
52
|
-
return blocks
|
60
|
+
return blocks
|