notionary 0.2.16__py3-none-any.whl → 0.2.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- notionary/__init__.py +10 -5
- notionary/base_notion_client.py +18 -7
- notionary/blocks/__init__.py +55 -24
- notionary/blocks/audio/__init__.py +7 -0
- notionary/blocks/audio/audio_element.py +152 -0
- notionary/blocks/audio/audio_markdown_node.py +29 -0
- notionary/blocks/audio/audio_models.py +59 -0
- notionary/blocks/bookmark/__init__.py +7 -0
- notionary/blocks/{bookmark_element.py → bookmark/bookmark_element.py} +20 -65
- notionary/blocks/bookmark/bookmark_markdown_node.py +43 -0
- notionary/blocks/bulleted_list/__init__.py +7 -0
- notionary/blocks/{bulleted_list_element.py → bulleted_list/bulleted_list_element.py} +7 -3
- notionary/blocks/bulleted_list/bulleted_list_markdown_node.py +33 -0
- notionary/blocks/bulleted_list/bulleted_list_models.py +0 -0
- notionary/blocks/callout/__init__.py +7 -0
- notionary/blocks/callout/callout_element.py +132 -0
- notionary/blocks/callout/callout_markdown_node.py +31 -0
- notionary/blocks/callout/callout_models.py +0 -0
- notionary/blocks/code/__init__.py +7 -0
- notionary/blocks/{code_block_element.py → code/code_element.py} +72 -40
- notionary/blocks/code/code_markdown_node.py +43 -0
- notionary/blocks/code/code_models.py +0 -0
- notionary/blocks/column/__init__.py +5 -0
- notionary/blocks/{column_element.py → column/column_element.py} +24 -55
- notionary/blocks/column/column_models.py +0 -0
- notionary/blocks/divider/__init__.py +7 -0
- notionary/blocks/{divider_element.py → divider/divider_element.py} +11 -3
- notionary/blocks/divider/divider_markdown_node.py +24 -0
- notionary/blocks/divider/divider_models.py +0 -0
- notionary/blocks/document/__init__.py +7 -0
- notionary/blocks/document/document_element.py +102 -0
- notionary/blocks/document/document_markdown_node.py +31 -0
- notionary/blocks/document/document_models.py +0 -0
- notionary/blocks/embed/__init__.py +7 -0
- notionary/blocks/{embed_element.py → embed/embed_element.py} +50 -32
- notionary/blocks/embed/embed_markdown_node.py +30 -0
- notionary/blocks/embed/embed_models.py +0 -0
- notionary/blocks/heading/__init__.py +7 -0
- notionary/blocks/{heading_element.py → heading/heading_element.py} +25 -17
- notionary/blocks/heading/heading_markdown_node.py +29 -0
- notionary/blocks/heading/heading_models.py +0 -0
- notionary/blocks/image/__init__.py +7 -0
- notionary/blocks/{image_element.py → image/image_element.py} +62 -42
- notionary/blocks/image/image_markdown_node.py +33 -0
- notionary/blocks/image/image_models.py +0 -0
- notionary/blocks/markdown_builder.py +356 -0
- notionary/blocks/markdown_node.py +29 -0
- notionary/blocks/mention/__init__.py +7 -0
- notionary/blocks/{mention_element.py → mention/mention_element.py} +6 -2
- notionary/blocks/mention/mention_markdown_node.py +38 -0
- notionary/blocks/mention/mention_models.py +0 -0
- notionary/blocks/numbered_list/__init__.py +7 -0
- notionary/blocks/{numbered_list_element.py → numbered_list/numbered_list_element.py} +10 -6
- notionary/blocks/numbered_list/numbered_list_markdown_node.py +29 -0
- notionary/blocks/numbered_list/numbered_list_models.py +0 -0
- notionary/blocks/paragraph/__init__.py +7 -0
- notionary/blocks/{paragraph_element.py → paragraph/paragraph_element.py} +7 -3
- notionary/blocks/paragraph/paragraph_markdown_node.py +25 -0
- notionary/blocks/paragraph/paragraph_models.py +0 -0
- notionary/blocks/quote/__init__.py +7 -0
- notionary/blocks/quote/quote_element.py +92 -0
- notionary/blocks/quote/quote_markdown_node.py +23 -0
- notionary/blocks/quote/quote_models.py +0 -0
- notionary/blocks/registry/block_registry.py +17 -3
- notionary/blocks/registry/block_registry_builder.py +90 -178
- notionary/blocks/shared/__init__.py +0 -0
- notionary/blocks/shared/block_client.py +256 -0
- notionary/blocks/shared/models.py +710 -0
- notionary/blocks/{notion_block_element.py → shared/notion_block_element.py} +8 -5
- notionary/blocks/{text_inline_formatter.py → shared/text_inline_formatter.py} +14 -14
- notionary/blocks/shared/text_inline_formatter_new.py +139 -0
- notionary/blocks/table/__init__.py +7 -0
- notionary/blocks/{table_element.py → table/table_element.py} +23 -11
- notionary/blocks/table/table_markdown_node.py +40 -0
- notionary/blocks/table/table_models.py +0 -0
- notionary/blocks/todo/__init__.py +7 -0
- notionary/blocks/{todo_element.py → todo/todo_element.py} +8 -4
- notionary/blocks/todo/todo_markdown_node.py +31 -0
- notionary/blocks/todo/todo_models.py +0 -0
- notionary/blocks/toggle/__init__.py +4 -0
- notionary/blocks/{toggle_element.py → toggle/toggle_element.py} +7 -3
- notionary/blocks/toggle/toggle_markdown_node.py +35 -0
- notionary/blocks/toggle/toggle_models.py +0 -0
- notionary/blocks/toggleable_heading/__init__.py +9 -0
- notionary/blocks/{toggleable_heading_element.py → toggleable_heading/toggleable_heading_element.py} +8 -4
- notionary/blocks/toggleable_heading/toggleable_heading_markdown_node.py +43 -0
- notionary/blocks/toggleable_heading/toggleable_heading_models.py +0 -0
- notionary/blocks/video/__init__.py +7 -0
- notionary/blocks/{video_element.py → video/video_element.py} +82 -57
- notionary/blocks/video/video_markdown_node.py +30 -0
- notionary/database/__init__.py +4 -0
- notionary/database/database.py +481 -0
- notionary/database/{filter_builder.py → database_filter_builder.py} +27 -29
- notionary/database/{notion_database_provider.py → database_provider.py} +4 -4
- notionary/database/notion_database.py +45 -18
- notionary/file_upload/__init__.py +7 -0
- notionary/file_upload/client.py +254 -0
- notionary/file_upload/models.py +60 -0
- notionary/file_upload/notion_file_upload.py +387 -0
- notionary/page/content/markdown_whitespace_processor.py +80 -0
- notionary/page/content/notion_text_length_utils.py +87 -0
- notionary/page/content/page_content_retriever.py +2 -2
- notionary/page/content/page_content_writer.py +97 -148
- notionary/page/formatting/line_processor.py +153 -0
- notionary/page/formatting/markdown_to_notion_converter.py +103 -424
- notionary/page/notion_page.py +13 -14
- notionary/page/notion_to_markdown_converter.py +9 -13
- notionary/telemetry/views.py +15 -6
- notionary/user/__init__.py +11 -0
- notionary/user/base_notion_user.py +52 -0
- notionary/user/client.py +129 -0
- notionary/user/models.py +83 -0
- notionary/user/notion_bot_user.py +227 -0
- notionary/user/notion_user.py +256 -0
- notionary/user/notion_user_manager.py +173 -0
- notionary/user/notion_user_provider.py +1 -0
- notionary/util/__init__.py +3 -5
- notionary/util/factory_decorator.py +0 -33
- notionary/util/factory_only.py +37 -0
- notionary/util/fuzzy.py +74 -0
- notionary/util/logging_mixin.py +12 -12
- notionary/workspace.py +38 -3
- {notionary-0.2.16.dist-info → notionary-0.2.18.dist-info}/METADATA +2 -1
- notionary-0.2.18.dist-info/RECORD +149 -0
- notionary/blocks/audio_element.py +0 -144
- notionary/blocks/callout_element.py +0 -122
- notionary/blocks/notion_block_client.py +0 -26
- notionary/blocks/qoute_element.py +0 -169
- notionary/page/content/notion_page_content_chunker.py +0 -84
- notionary/page/formatting/spacer_rules.py +0 -483
- notionary/util/fuzzy_matcher.py +0 -82
- notionary-0.2.16.dist-info/RECORD +0 -71
- /notionary/{elements/__init__.py → blocks/bookmark/bookmark_models.py} +0 -0
- /notionary/database/{database_exceptions.py → exceptions.py} +0 -0
- /notionary/util/{singleton_decorator.py → singleton.py} +0 -0
- {notionary-0.2.16.dist-info → notionary-0.2.18.dist-info}/LICENSE +0 -0
- {notionary-0.2.16.dist-info → notionary-0.2.18.dist-info}/WHEEL +0 -0
|
@@ -1,169 +0,0 @@
|
|
|
1
|
-
import re
|
|
2
|
-
from typing import Dict, Any, Optional, List, Tuple
|
|
3
|
-
|
|
4
|
-
from notionary.blocks import NotionBlockElement
|
|
5
|
-
from notionary.blocks import ElementPromptContent, ElementPromptBuilder
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class QuoteElement(NotionBlockElement):
|
|
9
|
-
"""Class for converting between Markdown blockquotes and Notion quote blocks."""
|
|
10
|
-
|
|
11
|
-
# Regular expression pattern to match Markdown blockquote lines
|
|
12
|
-
# Matches lines that start with optional whitespace, followed by '>',
|
|
13
|
-
# then optional whitespace, and captures any text after that
|
|
14
|
-
quote_pattern = re.compile(r"^\s*>\s?(.*)", re.MULTILINE)
|
|
15
|
-
|
|
16
|
-
@classmethod
|
|
17
|
-
def find_matches(cls, text: str) -> List[Tuple[int, int, Dict[str, Any]]]:
|
|
18
|
-
"""
|
|
19
|
-
Find all blockquote matches in the text and return their positions and blocks.
|
|
20
|
-
"""
|
|
21
|
-
matches = []
|
|
22
|
-
quote_matches = list(QuoteElement.quote_pattern.finditer(text))
|
|
23
|
-
|
|
24
|
-
if not quote_matches:
|
|
25
|
-
return []
|
|
26
|
-
|
|
27
|
-
current_match_index = 0
|
|
28
|
-
while current_match_index < len(quote_matches):
|
|
29
|
-
start_match = quote_matches[current_match_index]
|
|
30
|
-
start_pos = start_match.start()
|
|
31
|
-
|
|
32
|
-
next_match_index = current_match_index + 1
|
|
33
|
-
while next_match_index < len(
|
|
34
|
-
quote_matches
|
|
35
|
-
) and QuoteElement.is_consecutive_quote(
|
|
36
|
-
text, quote_matches, next_match_index
|
|
37
|
-
):
|
|
38
|
-
next_match_index += 1
|
|
39
|
-
|
|
40
|
-
end_pos = quote_matches[next_match_index - 1].end()
|
|
41
|
-
quote_text = text[start_pos:end_pos]
|
|
42
|
-
|
|
43
|
-
block = QuoteElement.markdown_to_notion(quote_text)
|
|
44
|
-
if block:
|
|
45
|
-
matches.append((start_pos, end_pos, block))
|
|
46
|
-
|
|
47
|
-
current_match_index = next_match_index
|
|
48
|
-
|
|
49
|
-
return matches
|
|
50
|
-
|
|
51
|
-
@classmethod
|
|
52
|
-
def is_consecutive_quote(cls, text: str, quote_matches: List, index: int) -> bool:
|
|
53
|
-
"""Checks if the current quote is part of the previous quote sequence."""
|
|
54
|
-
prev_end = quote_matches[index - 1].end()
|
|
55
|
-
curr_start = quote_matches[index].start()
|
|
56
|
-
gap_text = text[prev_end:curr_start]
|
|
57
|
-
|
|
58
|
-
if gap_text.count("\n") == 1:
|
|
59
|
-
return True
|
|
60
|
-
|
|
61
|
-
if gap_text.strip() == "" and gap_text.count("\n") <= 2:
|
|
62
|
-
return True
|
|
63
|
-
|
|
64
|
-
return False
|
|
65
|
-
|
|
66
|
-
@classmethod
|
|
67
|
-
def markdown_to_notion(cls, text: str) -> Optional[Dict[str, Any]]:
|
|
68
|
-
"""Convert markdown blockquote to Notion block."""
|
|
69
|
-
if not text:
|
|
70
|
-
return None
|
|
71
|
-
|
|
72
|
-
# Check if it's a blockquote
|
|
73
|
-
if not QuoteElement.quote_pattern.search(text):
|
|
74
|
-
return None
|
|
75
|
-
|
|
76
|
-
# Extract quote content
|
|
77
|
-
lines = text.split("\n")
|
|
78
|
-
quote_lines = []
|
|
79
|
-
|
|
80
|
-
# Extract content from each line
|
|
81
|
-
for line in lines:
|
|
82
|
-
quote_match = QuoteElement.quote_pattern.match(line)
|
|
83
|
-
if quote_match:
|
|
84
|
-
content = quote_match.group(1)
|
|
85
|
-
quote_lines.append(content)
|
|
86
|
-
elif not line.strip() and quote_lines:
|
|
87
|
-
# Allow empty lines within the quote
|
|
88
|
-
quote_lines.append("")
|
|
89
|
-
|
|
90
|
-
if not quote_lines:
|
|
91
|
-
return None
|
|
92
|
-
|
|
93
|
-
quote_content = "\n".join(quote_lines).strip()
|
|
94
|
-
|
|
95
|
-
rich_text = [{"type": "text", "text": {"content": quote_content}}]
|
|
96
|
-
|
|
97
|
-
return {"type": "quote", "quote": {"rich_text": rich_text, "color": "default"}}
|
|
98
|
-
|
|
99
|
-
@classmethod
|
|
100
|
-
def notion_to_markdown(cls, block: Dict[str, Any]) -> Optional[str]:
|
|
101
|
-
"""Convert Notion quote block to markdown."""
|
|
102
|
-
if block.get("type") != "quote":
|
|
103
|
-
return None
|
|
104
|
-
|
|
105
|
-
rich_text = block.get("quote", {}).get("rich_text", [])
|
|
106
|
-
|
|
107
|
-
# Extract the text content
|
|
108
|
-
content = QuoteElement._extract_text_content(rich_text)
|
|
109
|
-
|
|
110
|
-
# Format as markdown blockquote
|
|
111
|
-
lines = content.split("\n")
|
|
112
|
-
formatted_lines = []
|
|
113
|
-
|
|
114
|
-
# Add each line with blockquote prefix
|
|
115
|
-
for line in lines:
|
|
116
|
-
formatted_lines.append(f"> {line}")
|
|
117
|
-
|
|
118
|
-
return "\n".join(formatted_lines)
|
|
119
|
-
|
|
120
|
-
@classmethod
|
|
121
|
-
def match_markdown(cls, text: str) -> bool:
|
|
122
|
-
"""Check if this element can handle the given markdown text."""
|
|
123
|
-
return bool(QuoteElement.quote_pattern.search(text))
|
|
124
|
-
|
|
125
|
-
@classmethod
|
|
126
|
-
def match_notion(cls, block: Dict[str, Any]) -> bool:
|
|
127
|
-
"""Check if this element can handle the given Notion block."""
|
|
128
|
-
return block.get("type") == "quote"
|
|
129
|
-
|
|
130
|
-
@classmethod
|
|
131
|
-
def is_multiline(cls) -> bool:
|
|
132
|
-
"""Blockquotes can span multiple lines."""
|
|
133
|
-
return True
|
|
134
|
-
|
|
135
|
-
@classmethod
|
|
136
|
-
def _extract_text_content(cls, rich_text: List[Dict[str, Any]]) -> str:
|
|
137
|
-
"""Extract plain text content from Notion rich_text elements."""
|
|
138
|
-
result = ""
|
|
139
|
-
for text_obj in rich_text:
|
|
140
|
-
if text_obj.get("type") == "text":
|
|
141
|
-
result += text_obj.get("text", {}).get("content", "")
|
|
142
|
-
elif "plain_text" in text_obj:
|
|
143
|
-
result += text_obj.get("plain_text", "")
|
|
144
|
-
return result
|
|
145
|
-
|
|
146
|
-
@classmethod
|
|
147
|
-
def get_llm_prompt_content(cls) -> ElementPromptContent:
|
|
148
|
-
"""
|
|
149
|
-
Returns structured LLM prompt metadata for the quote element.
|
|
150
|
-
"""
|
|
151
|
-
return (
|
|
152
|
-
ElementPromptBuilder()
|
|
153
|
-
.with_description(
|
|
154
|
-
"Creates blockquotes that visually distinguish quoted text."
|
|
155
|
-
)
|
|
156
|
-
.with_usage_guidelines(
|
|
157
|
-
"Use blockquotes for quoting external sources, highlighting important statements, "
|
|
158
|
-
"or creating visual emphasis for key information."
|
|
159
|
-
)
|
|
160
|
-
.with_syntax("> Quoted text")
|
|
161
|
-
.with_examples(
|
|
162
|
-
[
|
|
163
|
-
"> This is a simple blockquote",
|
|
164
|
-
"> This is a multi-line quote\n> that continues on the next line",
|
|
165
|
-
"> Important note:\n> This quote spans\n> multiple lines.",
|
|
166
|
-
]
|
|
167
|
-
)
|
|
168
|
-
.build()
|
|
169
|
-
)
|
|
@@ -1,84 +0,0 @@
|
|
|
1
|
-
import re
|
|
2
|
-
from typing import Any, Dict, List
|
|
3
|
-
from notionary.util import LoggingMixin
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class NotionPageContentChunker(LoggingMixin):
|
|
7
|
-
"""
|
|
8
|
-
Handles markdown text processing to comply with Notion API length limitations.
|
|
9
|
-
|
|
10
|
-
This class specifically addresses the Notion API constraint that limits
|
|
11
|
-
rich_text elements to a maximum of 2000 characters. This particularly affects
|
|
12
|
-
paragraph blocks within toggle blocks or other nested structures.
|
|
13
|
-
|
|
14
|
-
Resolves the following typical API error:
|
|
15
|
-
"validation_error - body.children[79].toggle.children[2].paragraph.rich_text[0].text.content.length
|
|
16
|
-
should be ≤ 2000, instead was 2162."
|
|
17
|
-
|
|
18
|
-
The class provides methods for:
|
|
19
|
-
1. Automatically truncating text that exceeds the limit
|
|
20
|
-
2. Splitting markdown into smaller units for separate API requests
|
|
21
|
-
"""
|
|
22
|
-
|
|
23
|
-
def __init__(self, max_text_length: int = 1900):
|
|
24
|
-
self.max_text_length = max_text_length
|
|
25
|
-
|
|
26
|
-
def fix_blocks_content_length(
|
|
27
|
-
self, blocks: List[Dict[str, Any]]
|
|
28
|
-
) -> List[Dict[str, Any]]:
|
|
29
|
-
"""Check each block and ensure text content doesn't exceed Notion's limit."""
|
|
30
|
-
return [self._fix_single_block_content(block) for block in blocks]
|
|
31
|
-
|
|
32
|
-
def _fix_single_block_content(self, block: Dict[str, Any]) -> Dict[str, Any]:
|
|
33
|
-
"""Fix content length in a single block and its children recursively."""
|
|
34
|
-
block_copy = block.copy()
|
|
35
|
-
|
|
36
|
-
block_type = block.get("type")
|
|
37
|
-
if not block_type:
|
|
38
|
-
return block_copy
|
|
39
|
-
|
|
40
|
-
content = block.get(block_type)
|
|
41
|
-
if not content:
|
|
42
|
-
return block_copy
|
|
43
|
-
|
|
44
|
-
if "rich_text" in content:
|
|
45
|
-
self._fix_rich_text_content(block_copy, block_type, content)
|
|
46
|
-
|
|
47
|
-
if "children" in content and content["children"]:
|
|
48
|
-
block_copy[block_type]["children"] = [
|
|
49
|
-
self._fix_single_block_content(child) for child in content["children"]
|
|
50
|
-
]
|
|
51
|
-
|
|
52
|
-
return block_copy
|
|
53
|
-
|
|
54
|
-
def _fix_rich_text_content(
|
|
55
|
-
self, block_copy: Dict[str, Any], block_type: str, content: Dict[str, Any]
|
|
56
|
-
) -> None:
|
|
57
|
-
"""Fix rich text content that exceeds the length limit."""
|
|
58
|
-
rich_text = content["rich_text"]
|
|
59
|
-
for i, text_item in enumerate(rich_text):
|
|
60
|
-
if "text" not in text_item or "content" not in text_item["text"]:
|
|
61
|
-
continue
|
|
62
|
-
|
|
63
|
-
text_content = text_item["text"]["content"]
|
|
64
|
-
if len(text_content) <= self.max_text_length:
|
|
65
|
-
continue
|
|
66
|
-
|
|
67
|
-
self.logger.warning(
|
|
68
|
-
"Truncating text content from %d to %d chars",
|
|
69
|
-
len(text_content),
|
|
70
|
-
self.max_text_length,
|
|
71
|
-
)
|
|
72
|
-
block_copy[block_type]["rich_text"][i]["text"]["content"] = text_content[
|
|
73
|
-
: self.max_text_length
|
|
74
|
-
]
|
|
75
|
-
|
|
76
|
-
def split_to_paragraphs(self, markdown_text: str) -> List[str]:
|
|
77
|
-
"""Split markdown into paragraphs."""
|
|
78
|
-
paragraphs = re.split(r"\n\s*\n", markdown_text)
|
|
79
|
-
return [p for p in paragraphs if p.strip()]
|
|
80
|
-
|
|
81
|
-
def split_to_sentences(self, paragraph: str) -> List[str]:
|
|
82
|
-
"""Split a paragraph into sentences."""
|
|
83
|
-
sentences = re.split(r"(?<=[.!?])\s+", paragraph)
|
|
84
|
-
return [s for s in sentences if s.strip()]
|