notionary 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- notionary/__init__.py +3 -2
- notionary/blocks/__init__.py +54 -25
- notionary/blocks/audio/__init__.py +7 -0
- notionary/blocks/audio/audio_element.py +152 -0
- notionary/blocks/audio/audio_markdown_node.py +29 -0
- notionary/blocks/audio/audio_models.py +59 -0
- notionary/blocks/bookmark/__init__.py +7 -0
- notionary/blocks/{bookmark_element.py → bookmark/bookmark_element.py} +20 -65
- notionary/blocks/bookmark/bookmark_markdown_node.py +43 -0
- notionary/blocks/bookmark/bookmark_models.py +0 -0
- notionary/blocks/bulleted_list/__init__.py +7 -0
- notionary/blocks/{bulleted_list_element.py → bulleted_list/bulleted_list_element.py} +7 -3
- notionary/blocks/bulleted_list/bulleted_list_markdown_node.py +33 -0
- notionary/blocks/bulleted_list/bulleted_list_models.py +0 -0
- notionary/blocks/callout/__init__.py +7 -0
- notionary/blocks/callout/callout_element.py +132 -0
- notionary/blocks/callout/callout_markdown_node.py +31 -0
- notionary/blocks/callout/callout_models.py +0 -0
- notionary/blocks/code/__init__.py +7 -0
- notionary/blocks/{code_block_element.py → code/code_element.py} +72 -40
- notionary/blocks/code/code_markdown_node.py +43 -0
- notionary/blocks/code/code_models.py +0 -0
- notionary/blocks/column/__init__.py +5 -0
- notionary/blocks/{column_element.py → column/column_element.py} +24 -55
- notionary/blocks/column/column_models.py +0 -0
- notionary/blocks/divider/__init__.py +7 -0
- notionary/blocks/{divider_element.py → divider/divider_element.py} +11 -3
- notionary/blocks/divider/divider_markdown_node.py +24 -0
- notionary/blocks/divider/divider_models.py +0 -0
- notionary/blocks/document/__init__.py +7 -0
- notionary/blocks/document/document_element.py +102 -0
- notionary/blocks/document/document_markdown_node.py +31 -0
- notionary/blocks/document/document_models.py +0 -0
- notionary/blocks/embed/__init__.py +7 -0
- notionary/blocks/{embed_element.py → embed/embed_element.py} +50 -32
- notionary/blocks/embed/embed_markdown_node.py +30 -0
- notionary/blocks/embed/embed_models.py +0 -0
- notionary/blocks/heading/__init__.py +7 -0
- notionary/blocks/{heading_element.py → heading/heading_element.py} +25 -17
- notionary/blocks/heading/heading_markdown_node.py +29 -0
- notionary/blocks/heading/heading_models.py +0 -0
- notionary/blocks/image/__init__.py +7 -0
- notionary/blocks/{image_element.py → image/image_element.py} +62 -42
- notionary/blocks/image/image_markdown_node.py +33 -0
- notionary/blocks/image/image_models.py +0 -0
- notionary/blocks/markdown_builder.py +356 -0
- notionary/blocks/markdown_node.py +29 -0
- notionary/blocks/mention/__init__.py +7 -0
- notionary/blocks/{mention_element.py → mention/mention_element.py} +6 -2
- notionary/blocks/mention/mention_markdown_node.py +38 -0
- notionary/blocks/mention/mention_models.py +0 -0
- notionary/blocks/numbered_list/__init__.py +7 -0
- notionary/blocks/{numbered_list_element.py → numbered_list/numbered_list_element.py} +10 -6
- notionary/blocks/numbered_list/numbered_list_markdown_node.py +29 -0
- notionary/blocks/numbered_list/numbered_list_models.py +0 -0
- notionary/blocks/paragraph/__init__.py +7 -0
- notionary/blocks/{paragraph_element.py → paragraph/paragraph_element.py} +7 -3
- notionary/blocks/paragraph/paragraph_markdown_node.py +25 -0
- notionary/blocks/paragraph/paragraph_models.py +0 -0
- notionary/blocks/quote/__init__.py +7 -0
- notionary/blocks/quote/quote_element.py +92 -0
- notionary/blocks/quote/quote_markdown_node.py +23 -0
- notionary/blocks/quote/quote_models.py +0 -0
- notionary/blocks/registry/block_registry.py +17 -3
- notionary/blocks/registry/block_registry_builder.py +90 -178
- notionary/blocks/shared/__init__.py +0 -0
- notionary/blocks/shared/block_client.py +256 -0
- notionary/blocks/shared/models.py +713 -0
- notionary/blocks/{notion_block_element.py → shared/notion_block_element.py} +8 -5
- notionary/blocks/{text_inline_formatter.py → shared/text_inline_formatter.py} +14 -14
- notionary/blocks/shared/text_inline_formatter_new.py +139 -0
- notionary/blocks/table/__init__.py +7 -0
- notionary/blocks/{table_element.py → table/table_element.py} +23 -11
- notionary/blocks/table/table_markdown_node.py +40 -0
- notionary/blocks/table/table_models.py +0 -0
- notionary/blocks/todo/__init__.py +7 -0
- notionary/blocks/{todo_element.py → todo/todo_element.py} +8 -4
- notionary/blocks/todo/todo_markdown_node.py +31 -0
- notionary/blocks/todo/todo_models.py +0 -0
- notionary/blocks/toggle/__init__.py +4 -0
- notionary/blocks/{toggle_element.py → toggle/toggle_element.py} +7 -3
- notionary/blocks/toggle/toggle_markdown_node.py +35 -0
- notionary/blocks/toggle/toggle_models.py +0 -0
- notionary/blocks/toggleable_heading/__init__.py +9 -0
- notionary/blocks/{toggleable_heading_element.py → toggleable_heading/toggleable_heading_element.py} +8 -4
- notionary/blocks/toggleable_heading/toggleable_heading_markdown_node.py +43 -0
- notionary/blocks/toggleable_heading/toggleable_heading_models.py +0 -0
- notionary/blocks/video/__init__.py +7 -0
- notionary/blocks/{video_element.py → video/video_element.py} +82 -57
- notionary/blocks/video/video_markdown_node.py +30 -0
- notionary/file_upload/notion_file_upload.py +1 -1
- notionary/page/content/markdown_whitespace_processor.py +80 -0
- notionary/page/content/notion_text_length_utils.py +87 -0
- notionary/page/content/page_content_retriever.py +18 -10
- notionary/page/content/page_content_writer.py +97 -148
- notionary/page/formatting/line_processor.py +153 -0
- notionary/page/formatting/markdown_to_notion_converter.py +104 -425
- notionary/page/notion_page.py +9 -11
- notionary/page/notion_to_markdown_converter.py +9 -13
- notionary/util/factory_decorator.py +0 -0
- notionary/workspace.py +0 -1
- {notionary-0.2.17.dist-info → notionary-0.2.19.dist-info}/METADATA +1 -1
- notionary-0.2.19.dist-info/RECORD +150 -0
- notionary/blocks/audio_element.py +0 -144
- notionary/blocks/callout_element.py +0 -122
- notionary/blocks/document_element.py +0 -194
- notionary/blocks/notion_block_client.py +0 -26
- notionary/blocks/qoute_element.py +0 -169
- notionary/page/content/notion_page_content_chunker.py +0 -84
- notionary/page/formatting/spacer_rules.py +0 -483
- notionary-0.2.17.dist-info/RECORD +0 -85
- {notionary-0.2.17.dist-info → notionary-0.2.19.dist-info}/LICENSE +0 -0
- {notionary-0.2.17.dist-info → notionary-0.2.19.dist-info}/WHEEL +0 -0
@@ -1,194 +0,0 @@
|
|
1
|
-
import re
|
2
|
-
from typing import Dict, Any, Optional, List
|
3
|
-
|
4
|
-
from notionary.blocks import NotionBlockElement
|
5
|
-
from notionary.blocks import ElementPromptContent, ElementPromptBuilder
|
6
|
-
|
7
|
-
|
8
|
-
class DocumentElement(NotionBlockElement):
|
9
|
-
"""
|
10
|
-
Handles conversion between Markdown document embeds and Notion file blocks.
|
11
|
-
|
12
|
-
Markdown document syntax (custom format):
|
13
|
-
- %[Caption](https://example.com/document.pdf) - Basic document with caption
|
14
|
-
- %[](https://example.com/document.pdf) - Document without caption
|
15
|
-
- %[Meeting Notes](https://drive.google.com/file/d/123/view) - Google Drive document
|
16
|
-
- %[Report](https://company.sharepoint.com/document.docx) - SharePoint document
|
17
|
-
|
18
|
-
Supports various document URLs including PDFs, Word docs, Excel files, PowerPoint,
|
19
|
-
Google Drive files, and other document formats that Notion can display.
|
20
|
-
"""
|
21
|
-
|
22
|
-
PATTERN = re.compile(
|
23
|
-
r"^%\[(.*?)\]" # %[Caption] part
|
24
|
-
+ r'\((https?://[^\s"]+)' # (URL part
|
25
|
-
+ r"\)$" # closing parenthesis
|
26
|
-
)
|
27
|
-
|
28
|
-
DOCUMENT_EXTENSIONS = [
|
29
|
-
".pdf",
|
30
|
-
".doc",
|
31
|
-
".docx",
|
32
|
-
".xls",
|
33
|
-
".xlsx",
|
34
|
-
".ppt",
|
35
|
-
".pptx",
|
36
|
-
".txt",
|
37
|
-
".rtf",
|
38
|
-
".odt",
|
39
|
-
".ods",
|
40
|
-
".odp",
|
41
|
-
".pages",
|
42
|
-
".numbers",
|
43
|
-
".key",
|
44
|
-
".epub",
|
45
|
-
".mobi",
|
46
|
-
]
|
47
|
-
|
48
|
-
@classmethod
|
49
|
-
def match_markdown(cls, text: str) -> bool:
|
50
|
-
"""Check if text is a markdown document embed."""
|
51
|
-
text = text.strip()
|
52
|
-
return text.startswith("%[") and bool(cls.PATTERN.match(text))
|
53
|
-
|
54
|
-
@classmethod
|
55
|
-
def match_notion(cls, block: Dict[str, Any]) -> bool:
|
56
|
-
"""Check if block is a Notion file (document)."""
|
57
|
-
return block.get("type") == "file"
|
58
|
-
|
59
|
-
@classmethod
|
60
|
-
def is_document_url(cls, url: str) -> bool:
|
61
|
-
"""Check if URL points to a document file."""
|
62
|
-
url_lower = url.lower()
|
63
|
-
|
64
|
-
# Check for common document file extensions
|
65
|
-
if any(url_lower.endswith(ext) for ext in cls.DOCUMENT_EXTENSIONS):
|
66
|
-
return True
|
67
|
-
|
68
|
-
# Check for common document hosting services
|
69
|
-
document_services = [
|
70
|
-
"drive.google.com",
|
71
|
-
"docs.google.com",
|
72
|
-
"sheets.google.com",
|
73
|
-
"slides.google.com",
|
74
|
-
"sharepoint.com",
|
75
|
-
"onedrive.com",
|
76
|
-
"dropbox.com",
|
77
|
-
"box.com",
|
78
|
-
"scribd.com",
|
79
|
-
"slideshare.net",
|
80
|
-
]
|
81
|
-
|
82
|
-
return any(service in url_lower for service in document_services)
|
83
|
-
|
84
|
-
@classmethod
|
85
|
-
def markdown_to_notion(cls, text: str) -> Optional[Dict[str, Any]]:
|
86
|
-
"""Convert markdown document embed to Notion file block."""
|
87
|
-
doc_match = cls.PATTERN.match(text.strip())
|
88
|
-
if not doc_match:
|
89
|
-
return None
|
90
|
-
|
91
|
-
caption = doc_match.group(1)
|
92
|
-
url = doc_match.group(2)
|
93
|
-
|
94
|
-
if not url:
|
95
|
-
return None
|
96
|
-
|
97
|
-
# Verify this looks like a document URL
|
98
|
-
if not cls.is_document_url(url):
|
99
|
-
# Still proceed - user might know better than our detection
|
100
|
-
pass
|
101
|
-
|
102
|
-
# Prepare the file block
|
103
|
-
file_block = {
|
104
|
-
"type": "file",
|
105
|
-
"file": {"type": "external", "external": {"url": url}},
|
106
|
-
}
|
107
|
-
|
108
|
-
# Add caption if provided
|
109
|
-
if caption:
|
110
|
-
file_block["file"]["caption"] = [
|
111
|
-
{"type": "text", "text": {"content": caption}}
|
112
|
-
]
|
113
|
-
|
114
|
-
return file_block
|
115
|
-
|
116
|
-
@classmethod
|
117
|
-
def notion_to_markdown(cls, block: Dict[str, Any]) -> Optional[str]:
|
118
|
-
"""Convert Notion file block to markdown document embed."""
|
119
|
-
if block.get("type") != "file":
|
120
|
-
return None
|
121
|
-
|
122
|
-
file_data = block.get("file", {})
|
123
|
-
|
124
|
-
# Handle both external and file (uploaded) documents
|
125
|
-
if file_data.get("type") == "external":
|
126
|
-
url = file_data.get("external", {}).get("url", "")
|
127
|
-
elif file_data.get("type") == "file":
|
128
|
-
url = file_data.get("file", {}).get("url", "")
|
129
|
-
elif file_data.get("type") == "file_upload":
|
130
|
-
# Handle file uploads with special notion:// syntax
|
131
|
-
file_upload_id = file_data.get("file_upload", {}).get("id", "")
|
132
|
-
if file_upload_id:
|
133
|
-
url = f"notion://file_upload/{file_upload_id}"
|
134
|
-
else:
|
135
|
-
return None
|
136
|
-
else:
|
137
|
-
return None
|
138
|
-
|
139
|
-
if not url:
|
140
|
-
return None
|
141
|
-
|
142
|
-
# Extract caption if available
|
143
|
-
caption = ""
|
144
|
-
caption_rich_text = file_data.get("caption", [])
|
145
|
-
if caption_rich_text:
|
146
|
-
caption = cls._extract_text_content(caption_rich_text)
|
147
|
-
|
148
|
-
return f"%[{caption}]({url})"
|
149
|
-
|
150
|
-
@classmethod
|
151
|
-
def is_multiline(cls) -> bool:
|
152
|
-
"""Document embeds are single-line elements."""
|
153
|
-
return False
|
154
|
-
|
155
|
-
@classmethod
|
156
|
-
def _extract_text_content(cls, rich_text: List[Dict[str, Any]]) -> str:
|
157
|
-
"""Extract plain text content from Notion rich_text elements."""
|
158
|
-
result = ""
|
159
|
-
for text_obj in rich_text:
|
160
|
-
if text_obj.get("type") == "text":
|
161
|
-
result += text_obj.get("text", {}).get("content", "")
|
162
|
-
elif "plain_text" in text_obj:
|
163
|
-
result += text_obj.get("plain_text", "")
|
164
|
-
return result
|
165
|
-
|
166
|
-
@classmethod
|
167
|
-
def get_llm_prompt_content(cls) -> ElementPromptContent:
|
168
|
-
"""Returns information for LLM prompts about this element."""
|
169
|
-
return (
|
170
|
-
ElementPromptBuilder()
|
171
|
-
.with_description(
|
172
|
-
"Embeds document files from external sources like PDFs, Word docs, Excel files, or cloud storage services."
|
173
|
-
)
|
174
|
-
.with_usage_guidelines(
|
175
|
-
"Use document embeds when you want to include reference materials, reports, presentations, or any "
|
176
|
-
"file-based content directly in your document. Documents can be viewed inline or downloaded by users. "
|
177
|
-
"Perfect for sharing contracts, reports, manuals, or any important files."
|
178
|
-
)
|
179
|
-
.with_syntax("%[Caption](https://example.com/document.pdf)")
|
180
|
-
.with_examples(
|
181
|
-
[
|
182
|
-
"%[Project Proposal](https://drive.google.com/file/d/1a2b3c4d5e/view)",
|
183
|
-
"%[Q4 Financial Report](https://company.sharepoint.com/reports/q4-2024.xlsx)",
|
184
|
-
"%[User Manual](https://cdn.company.com/docs/manual-v2.1.pdf)",
|
185
|
-
"%[Meeting Minutes](https://docs.google.com/document/d/1x2y3z4/edit)",
|
186
|
-
"%[](https://example.com/contract.pdf)",
|
187
|
-
]
|
188
|
-
)
|
189
|
-
.with_avoidance_guidelines(
|
190
|
-
"Only use for actual document files. For web pages or articles, use bookmark or embed elements instead. "
|
191
|
-
"Ensure document URLs are accessible to your intended audience."
|
192
|
-
)
|
193
|
-
.build()
|
194
|
-
)
|
@@ -1,26 +0,0 @@
|
|
1
|
-
from typing import Dict, Any, List
|
2
|
-
from notionary.base_notion_client import BaseNotionClient
|
3
|
-
from notionary.util import singleton
|
4
|
-
|
5
|
-
|
6
|
-
# TODO: Tyoe the block api (fix registry as well)
|
7
|
-
@singleton
|
8
|
-
class NotionBlockClient(BaseNotionClient):
|
9
|
-
"""
|
10
|
-
Client for Notion page-specific operations.
|
11
|
-
Inherits base HTTP functionality from BaseNotionClient.
|
12
|
-
"""
|
13
|
-
|
14
|
-
async def get_page_blocks(self, page_id: str) -> List[Dict[str, Any]]:
|
15
|
-
"""
|
16
|
-
Retrieves all blocks of a Notion page.
|
17
|
-
"""
|
18
|
-
response = await self.get(f"blocks/{page_id}/children")
|
19
|
-
return response.get("results", [])
|
20
|
-
|
21
|
-
async def get_block_children(self, block_id: str) -> List[Dict[str, Any]]:
|
22
|
-
"""
|
23
|
-
Retrieves all children blocks of a specific block.
|
24
|
-
"""
|
25
|
-
response = await self.get(f"blocks/{block_id}/children")
|
26
|
-
return response.get("results", [])
|
@@ -1,169 +0,0 @@
|
|
1
|
-
import re
|
2
|
-
from typing import Dict, Any, Optional, List, Tuple
|
3
|
-
|
4
|
-
from notionary.blocks import NotionBlockElement
|
5
|
-
from notionary.blocks import ElementPromptContent, ElementPromptBuilder
|
6
|
-
|
7
|
-
|
8
|
-
class QuoteElement(NotionBlockElement):
|
9
|
-
"""Class for converting between Markdown blockquotes and Notion quote blocks."""
|
10
|
-
|
11
|
-
# Regular expression pattern to match Markdown blockquote lines
|
12
|
-
# Matches lines that start with optional whitespace, followed by '>',
|
13
|
-
# then optional whitespace, and captures any text after that
|
14
|
-
quote_pattern = re.compile(r"^\s*>\s?(.*)", re.MULTILINE)
|
15
|
-
|
16
|
-
@classmethod
|
17
|
-
def find_matches(cls, text: str) -> List[Tuple[int, int, Dict[str, Any]]]:
|
18
|
-
"""
|
19
|
-
Find all blockquote matches in the text and return their positions and blocks.
|
20
|
-
"""
|
21
|
-
matches = []
|
22
|
-
quote_matches = list(QuoteElement.quote_pattern.finditer(text))
|
23
|
-
|
24
|
-
if not quote_matches:
|
25
|
-
return []
|
26
|
-
|
27
|
-
current_match_index = 0
|
28
|
-
while current_match_index < len(quote_matches):
|
29
|
-
start_match = quote_matches[current_match_index]
|
30
|
-
start_pos = start_match.start()
|
31
|
-
|
32
|
-
next_match_index = current_match_index + 1
|
33
|
-
while next_match_index < len(
|
34
|
-
quote_matches
|
35
|
-
) and QuoteElement.is_consecutive_quote(
|
36
|
-
text, quote_matches, next_match_index
|
37
|
-
):
|
38
|
-
next_match_index += 1
|
39
|
-
|
40
|
-
end_pos = quote_matches[next_match_index - 1].end()
|
41
|
-
quote_text = text[start_pos:end_pos]
|
42
|
-
|
43
|
-
block = QuoteElement.markdown_to_notion(quote_text)
|
44
|
-
if block:
|
45
|
-
matches.append((start_pos, end_pos, block))
|
46
|
-
|
47
|
-
current_match_index = next_match_index
|
48
|
-
|
49
|
-
return matches
|
50
|
-
|
51
|
-
@classmethod
|
52
|
-
def is_consecutive_quote(cls, text: str, quote_matches: List, index: int) -> bool:
|
53
|
-
"""Checks if the current quote is part of the previous quote sequence."""
|
54
|
-
prev_end = quote_matches[index - 1].end()
|
55
|
-
curr_start = quote_matches[index].start()
|
56
|
-
gap_text = text[prev_end:curr_start]
|
57
|
-
|
58
|
-
if gap_text.count("\n") == 1:
|
59
|
-
return True
|
60
|
-
|
61
|
-
if gap_text.strip() == "" and gap_text.count("\n") <= 2:
|
62
|
-
return True
|
63
|
-
|
64
|
-
return False
|
65
|
-
|
66
|
-
@classmethod
|
67
|
-
def markdown_to_notion(cls, text: str) -> Optional[Dict[str, Any]]:
|
68
|
-
"""Convert markdown blockquote to Notion block."""
|
69
|
-
if not text:
|
70
|
-
return None
|
71
|
-
|
72
|
-
# Check if it's a blockquote
|
73
|
-
if not QuoteElement.quote_pattern.search(text):
|
74
|
-
return None
|
75
|
-
|
76
|
-
# Extract quote content
|
77
|
-
lines = text.split("\n")
|
78
|
-
quote_lines = []
|
79
|
-
|
80
|
-
# Extract content from each line
|
81
|
-
for line in lines:
|
82
|
-
quote_match = QuoteElement.quote_pattern.match(line)
|
83
|
-
if quote_match:
|
84
|
-
content = quote_match.group(1)
|
85
|
-
quote_lines.append(content)
|
86
|
-
elif not line.strip() and quote_lines:
|
87
|
-
# Allow empty lines within the quote
|
88
|
-
quote_lines.append("")
|
89
|
-
|
90
|
-
if not quote_lines:
|
91
|
-
return None
|
92
|
-
|
93
|
-
quote_content = "\n".join(quote_lines).strip()
|
94
|
-
|
95
|
-
rich_text = [{"type": "text", "text": {"content": quote_content}}]
|
96
|
-
|
97
|
-
return {"type": "quote", "quote": {"rich_text": rich_text, "color": "default"}}
|
98
|
-
|
99
|
-
@classmethod
|
100
|
-
def notion_to_markdown(cls, block: Dict[str, Any]) -> Optional[str]:
|
101
|
-
"""Convert Notion quote block to markdown."""
|
102
|
-
if block.get("type") != "quote":
|
103
|
-
return None
|
104
|
-
|
105
|
-
rich_text = block.get("quote", {}).get("rich_text", [])
|
106
|
-
|
107
|
-
# Extract the text content
|
108
|
-
content = QuoteElement._extract_text_content(rich_text)
|
109
|
-
|
110
|
-
# Format as markdown blockquote
|
111
|
-
lines = content.split("\n")
|
112
|
-
formatted_lines = []
|
113
|
-
|
114
|
-
# Add each line with blockquote prefix
|
115
|
-
for line in lines:
|
116
|
-
formatted_lines.append(f"> {line}")
|
117
|
-
|
118
|
-
return "\n".join(formatted_lines)
|
119
|
-
|
120
|
-
@classmethod
|
121
|
-
def match_markdown(cls, text: str) -> bool:
|
122
|
-
"""Check if this element can handle the given markdown text."""
|
123
|
-
return bool(QuoteElement.quote_pattern.search(text))
|
124
|
-
|
125
|
-
@classmethod
|
126
|
-
def match_notion(cls, block: Dict[str, Any]) -> bool:
|
127
|
-
"""Check if this element can handle the given Notion block."""
|
128
|
-
return block.get("type") == "quote"
|
129
|
-
|
130
|
-
@classmethod
|
131
|
-
def is_multiline(cls) -> bool:
|
132
|
-
"""Blockquotes can span multiple lines."""
|
133
|
-
return True
|
134
|
-
|
135
|
-
@classmethod
|
136
|
-
def _extract_text_content(cls, rich_text: List[Dict[str, Any]]) -> str:
|
137
|
-
"""Extract plain text content from Notion rich_text elements."""
|
138
|
-
result = ""
|
139
|
-
for text_obj in rich_text:
|
140
|
-
if text_obj.get("type") == "text":
|
141
|
-
result += text_obj.get("text", {}).get("content", "")
|
142
|
-
elif "plain_text" in text_obj:
|
143
|
-
result += text_obj.get("plain_text", "")
|
144
|
-
return result
|
145
|
-
|
146
|
-
@classmethod
|
147
|
-
def get_llm_prompt_content(cls) -> ElementPromptContent:
|
148
|
-
"""
|
149
|
-
Returns structured LLM prompt metadata for the quote element.
|
150
|
-
"""
|
151
|
-
return (
|
152
|
-
ElementPromptBuilder()
|
153
|
-
.with_description(
|
154
|
-
"Creates blockquotes that visually distinguish quoted text."
|
155
|
-
)
|
156
|
-
.with_usage_guidelines(
|
157
|
-
"Use blockquotes for quoting external sources, highlighting important statements, "
|
158
|
-
"or creating visual emphasis for key information."
|
159
|
-
)
|
160
|
-
.with_syntax("> Quoted text")
|
161
|
-
.with_examples(
|
162
|
-
[
|
163
|
-
"> This is a simple blockquote",
|
164
|
-
"> This is a multi-line quote\n> that continues on the next line",
|
165
|
-
"> Important note:\n> This quote spans\n> multiple lines.",
|
166
|
-
]
|
167
|
-
)
|
168
|
-
.build()
|
169
|
-
)
|
@@ -1,84 +0,0 @@
|
|
1
|
-
import re
|
2
|
-
from typing import Any, Dict, List
|
3
|
-
from notionary.util import LoggingMixin
|
4
|
-
|
5
|
-
|
6
|
-
class NotionPageContentChunker(LoggingMixin):
|
7
|
-
"""
|
8
|
-
Handles markdown text processing to comply with Notion API length limitations.
|
9
|
-
|
10
|
-
This class specifically addresses the Notion API constraint that limits
|
11
|
-
rich_text elements to a maximum of 2000 characters. This particularly affects
|
12
|
-
paragraph blocks within toggle blocks or other nested structures.
|
13
|
-
|
14
|
-
Resolves the following typical API error:
|
15
|
-
"validation_error - body.children[79].toggle.children[2].paragraph.rich_text[0].text.content.length
|
16
|
-
should be ≤ 2000, instead was 2162."
|
17
|
-
|
18
|
-
The class provides methods for:
|
19
|
-
1. Automatically truncating text that exceeds the limit
|
20
|
-
2. Splitting markdown into smaller units for separate API requests
|
21
|
-
"""
|
22
|
-
|
23
|
-
def __init__(self, max_text_length: int = 1900):
|
24
|
-
self.max_text_length = max_text_length
|
25
|
-
|
26
|
-
def fix_blocks_content_length(
|
27
|
-
self, blocks: List[Dict[str, Any]]
|
28
|
-
) -> List[Dict[str, Any]]:
|
29
|
-
"""Check each block and ensure text content doesn't exceed Notion's limit."""
|
30
|
-
return [self._fix_single_block_content(block) for block in blocks]
|
31
|
-
|
32
|
-
def _fix_single_block_content(self, block: Dict[str, Any]) -> Dict[str, Any]:
|
33
|
-
"""Fix content length in a single block and its children recursively."""
|
34
|
-
block_copy = block.copy()
|
35
|
-
|
36
|
-
block_type = block.get("type")
|
37
|
-
if not block_type:
|
38
|
-
return block_copy
|
39
|
-
|
40
|
-
content = block.get(block_type)
|
41
|
-
if not content:
|
42
|
-
return block_copy
|
43
|
-
|
44
|
-
if "rich_text" in content:
|
45
|
-
self._fix_rich_text_content(block_copy, block_type, content)
|
46
|
-
|
47
|
-
if "children" in content and content["children"]:
|
48
|
-
block_copy[block_type]["children"] = [
|
49
|
-
self._fix_single_block_content(child) for child in content["children"]
|
50
|
-
]
|
51
|
-
|
52
|
-
return block_copy
|
53
|
-
|
54
|
-
def _fix_rich_text_content(
|
55
|
-
self, block_copy: Dict[str, Any], block_type: str, content: Dict[str, Any]
|
56
|
-
) -> None:
|
57
|
-
"""Fix rich text content that exceeds the length limit."""
|
58
|
-
rich_text = content["rich_text"]
|
59
|
-
for i, text_item in enumerate(rich_text):
|
60
|
-
if "text" not in text_item or "content" not in text_item["text"]:
|
61
|
-
continue
|
62
|
-
|
63
|
-
text_content = text_item["text"]["content"]
|
64
|
-
if len(text_content) <= self.max_text_length:
|
65
|
-
continue
|
66
|
-
|
67
|
-
self.logger.warning(
|
68
|
-
"Truncating text content from %d to %d chars",
|
69
|
-
len(text_content),
|
70
|
-
self.max_text_length,
|
71
|
-
)
|
72
|
-
block_copy[block_type]["rich_text"][i]["text"]["content"] = text_content[
|
73
|
-
: self.max_text_length
|
74
|
-
]
|
75
|
-
|
76
|
-
def split_to_paragraphs(self, markdown_text: str) -> List[str]:
|
77
|
-
"""Split markdown into paragraphs."""
|
78
|
-
paragraphs = re.split(r"\n\s*\n", markdown_text)
|
79
|
-
return [p for p in paragraphs if p.strip()]
|
80
|
-
|
81
|
-
def split_to_sentences(self, paragraph: str) -> List[str]:
|
82
|
-
"""Split a paragraph into sentences."""
|
83
|
-
sentences = re.split(r"(?<=[.!?])\s+", paragraph)
|
84
|
-
return [s for s in sentences if s.strip()]
|