notionary 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. notionary/__init__.py +3 -2
  2. notionary/blocks/__init__.py +54 -25
  3. notionary/blocks/audio/__init__.py +7 -0
  4. notionary/blocks/audio/audio_element.py +152 -0
  5. notionary/blocks/audio/audio_markdown_node.py +29 -0
  6. notionary/blocks/audio/audio_models.py +59 -0
  7. notionary/blocks/bookmark/__init__.py +7 -0
  8. notionary/blocks/{bookmark_element.py → bookmark/bookmark_element.py} +20 -65
  9. notionary/blocks/bookmark/bookmark_markdown_node.py +43 -0
  10. notionary/blocks/bookmark/bookmark_models.py +0 -0
  11. notionary/blocks/bulleted_list/__init__.py +7 -0
  12. notionary/blocks/{bulleted_list_element.py → bulleted_list/bulleted_list_element.py} +7 -3
  13. notionary/blocks/bulleted_list/bulleted_list_markdown_node.py +33 -0
  14. notionary/blocks/bulleted_list/bulleted_list_models.py +0 -0
  15. notionary/blocks/callout/__init__.py +7 -0
  16. notionary/blocks/callout/callout_element.py +132 -0
  17. notionary/blocks/callout/callout_markdown_node.py +31 -0
  18. notionary/blocks/callout/callout_models.py +0 -0
  19. notionary/blocks/code/__init__.py +7 -0
  20. notionary/blocks/{code_block_element.py → code/code_element.py} +72 -40
  21. notionary/blocks/code/code_markdown_node.py +43 -0
  22. notionary/blocks/code/code_models.py +0 -0
  23. notionary/blocks/column/__init__.py +5 -0
  24. notionary/blocks/{column_element.py → column/column_element.py} +24 -55
  25. notionary/blocks/column/column_models.py +0 -0
  26. notionary/blocks/divider/__init__.py +7 -0
  27. notionary/blocks/{divider_element.py → divider/divider_element.py} +11 -3
  28. notionary/blocks/divider/divider_markdown_node.py +24 -0
  29. notionary/blocks/divider/divider_models.py +0 -0
  30. notionary/blocks/document/__init__.py +7 -0
  31. notionary/blocks/document/document_element.py +102 -0
  32. notionary/blocks/document/document_markdown_node.py +31 -0
  33. notionary/blocks/document/document_models.py +0 -0
  34. notionary/blocks/embed/__init__.py +7 -0
  35. notionary/blocks/{embed_element.py → embed/embed_element.py} +50 -32
  36. notionary/blocks/embed/embed_markdown_node.py +30 -0
  37. notionary/blocks/embed/embed_models.py +0 -0
  38. notionary/blocks/heading/__init__.py +7 -0
  39. notionary/blocks/{heading_element.py → heading/heading_element.py} +25 -17
  40. notionary/blocks/heading/heading_markdown_node.py +29 -0
  41. notionary/blocks/heading/heading_models.py +0 -0
  42. notionary/blocks/image/__init__.py +7 -0
  43. notionary/blocks/{image_element.py → image/image_element.py} +62 -42
  44. notionary/blocks/image/image_markdown_node.py +33 -0
  45. notionary/blocks/image/image_models.py +0 -0
  46. notionary/blocks/markdown_builder.py +356 -0
  47. notionary/blocks/markdown_node.py +29 -0
  48. notionary/blocks/mention/__init__.py +7 -0
  49. notionary/blocks/{mention_element.py → mention/mention_element.py} +6 -2
  50. notionary/blocks/mention/mention_markdown_node.py +38 -0
  51. notionary/blocks/mention/mention_models.py +0 -0
  52. notionary/blocks/numbered_list/__init__.py +7 -0
  53. notionary/blocks/{numbered_list_element.py → numbered_list/numbered_list_element.py} +10 -6
  54. notionary/blocks/numbered_list/numbered_list_markdown_node.py +29 -0
  55. notionary/blocks/numbered_list/numbered_list_models.py +0 -0
  56. notionary/blocks/paragraph/__init__.py +7 -0
  57. notionary/blocks/{paragraph_element.py → paragraph/paragraph_element.py} +7 -3
  58. notionary/blocks/paragraph/paragraph_markdown_node.py +25 -0
  59. notionary/blocks/paragraph/paragraph_models.py +0 -0
  60. notionary/blocks/quote/__init__.py +7 -0
  61. notionary/blocks/quote/quote_element.py +92 -0
  62. notionary/blocks/quote/quote_markdown_node.py +23 -0
  63. notionary/blocks/quote/quote_models.py +0 -0
  64. notionary/blocks/registry/block_registry.py +17 -3
  65. notionary/blocks/registry/block_registry_builder.py +90 -178
  66. notionary/blocks/shared/__init__.py +0 -0
  67. notionary/blocks/shared/block_client.py +256 -0
  68. notionary/blocks/shared/models.py +713 -0
  69. notionary/blocks/{notion_block_element.py → shared/notion_block_element.py} +8 -5
  70. notionary/blocks/{text_inline_formatter.py → shared/text_inline_formatter.py} +14 -14
  71. notionary/blocks/shared/text_inline_formatter_new.py +139 -0
  72. notionary/blocks/table/__init__.py +7 -0
  73. notionary/blocks/{table_element.py → table/table_element.py} +23 -11
  74. notionary/blocks/table/table_markdown_node.py +40 -0
  75. notionary/blocks/table/table_models.py +0 -0
  76. notionary/blocks/todo/__init__.py +7 -0
  77. notionary/blocks/{todo_element.py → todo/todo_element.py} +8 -4
  78. notionary/blocks/todo/todo_markdown_node.py +31 -0
  79. notionary/blocks/todo/todo_models.py +0 -0
  80. notionary/blocks/toggle/__init__.py +4 -0
  81. notionary/blocks/{toggle_element.py → toggle/toggle_element.py} +7 -3
  82. notionary/blocks/toggle/toggle_markdown_node.py +35 -0
  83. notionary/blocks/toggle/toggle_models.py +0 -0
  84. notionary/blocks/toggleable_heading/__init__.py +9 -0
  85. notionary/blocks/{toggleable_heading_element.py → toggleable_heading/toggleable_heading_element.py} +8 -4
  86. notionary/blocks/toggleable_heading/toggleable_heading_markdown_node.py +43 -0
  87. notionary/blocks/toggleable_heading/toggleable_heading_models.py +0 -0
  88. notionary/blocks/video/__init__.py +7 -0
  89. notionary/blocks/{video_element.py → video/video_element.py} +82 -57
  90. notionary/blocks/video/video_markdown_node.py +30 -0
  91. notionary/file_upload/notion_file_upload.py +1 -1
  92. notionary/page/content/markdown_whitespace_processor.py +80 -0
  93. notionary/page/content/notion_text_length_utils.py +87 -0
  94. notionary/page/content/page_content_retriever.py +18 -10
  95. notionary/page/content/page_content_writer.py +97 -148
  96. notionary/page/formatting/line_processor.py +153 -0
  97. notionary/page/formatting/markdown_to_notion_converter.py +104 -425
  98. notionary/page/notion_page.py +9 -11
  99. notionary/page/notion_to_markdown_converter.py +9 -13
  100. notionary/util/factory_decorator.py +0 -0
  101. notionary/workspace.py +0 -1
  102. {notionary-0.2.17.dist-info → notionary-0.2.19.dist-info}/METADATA +1 -1
  103. notionary-0.2.19.dist-info/RECORD +150 -0
  104. notionary/blocks/audio_element.py +0 -144
  105. notionary/blocks/callout_element.py +0 -122
  106. notionary/blocks/document_element.py +0 -194
  107. notionary/blocks/notion_block_client.py +0 -26
  108. notionary/blocks/qoute_element.py +0 -169
  109. notionary/page/content/notion_page_content_chunker.py +0 -84
  110. notionary/page/formatting/spacer_rules.py +0 -483
  111. notionary-0.2.17.dist-info/RECORD +0 -85
  112. {notionary-0.2.17.dist-info → notionary-0.2.19.dist-info}/LICENSE +0 -0
  113. {notionary-0.2.17.dist-info → notionary-0.2.19.dist-info}/WHEEL +0 -0
@@ -1,194 +0,0 @@
1
- import re
2
- from typing import Dict, Any, Optional, List
3
-
4
- from notionary.blocks import NotionBlockElement
5
- from notionary.blocks import ElementPromptContent, ElementPromptBuilder
6
-
7
-
8
- class DocumentElement(NotionBlockElement):
9
- """
10
- Handles conversion between Markdown document embeds and Notion file blocks.
11
-
12
- Markdown document syntax (custom format):
13
- - %[Caption](https://example.com/document.pdf) - Basic document with caption
14
- - %[](https://example.com/document.pdf) - Document without caption
15
- - %[Meeting Notes](https://drive.google.com/file/d/123/view) - Google Drive document
16
- - %[Report](https://company.sharepoint.com/document.docx) - SharePoint document
17
-
18
- Supports various document URLs including PDFs, Word docs, Excel files, PowerPoint,
19
- Google Drive files, and other document formats that Notion can display.
20
- """
21
-
22
- PATTERN = re.compile(
23
- r"^%\[(.*?)\]" # %[Caption] part
24
- + r'\((https?://[^\s"]+)' # (URL part
25
- + r"\)$" # closing parenthesis
26
- )
27
-
28
- DOCUMENT_EXTENSIONS = [
29
- ".pdf",
30
- ".doc",
31
- ".docx",
32
- ".xls",
33
- ".xlsx",
34
- ".ppt",
35
- ".pptx",
36
- ".txt",
37
- ".rtf",
38
- ".odt",
39
- ".ods",
40
- ".odp",
41
- ".pages",
42
- ".numbers",
43
- ".key",
44
- ".epub",
45
- ".mobi",
46
- ]
47
-
48
- @classmethod
49
- def match_markdown(cls, text: str) -> bool:
50
- """Check if text is a markdown document embed."""
51
- text = text.strip()
52
- return text.startswith("%[") and bool(cls.PATTERN.match(text))
53
-
54
- @classmethod
55
- def match_notion(cls, block: Dict[str, Any]) -> bool:
56
- """Check if block is a Notion file (document)."""
57
- return block.get("type") == "file"
58
-
59
- @classmethod
60
- def is_document_url(cls, url: str) -> bool:
61
- """Check if URL points to a document file."""
62
- url_lower = url.lower()
63
-
64
- # Check for common document file extensions
65
- if any(url_lower.endswith(ext) for ext in cls.DOCUMENT_EXTENSIONS):
66
- return True
67
-
68
- # Check for common document hosting services
69
- document_services = [
70
- "drive.google.com",
71
- "docs.google.com",
72
- "sheets.google.com",
73
- "slides.google.com",
74
- "sharepoint.com",
75
- "onedrive.com",
76
- "dropbox.com",
77
- "box.com",
78
- "scribd.com",
79
- "slideshare.net",
80
- ]
81
-
82
- return any(service in url_lower for service in document_services)
83
-
84
- @classmethod
85
- def markdown_to_notion(cls, text: str) -> Optional[Dict[str, Any]]:
86
- """Convert markdown document embed to Notion file block."""
87
- doc_match = cls.PATTERN.match(text.strip())
88
- if not doc_match:
89
- return None
90
-
91
- caption = doc_match.group(1)
92
- url = doc_match.group(2)
93
-
94
- if not url:
95
- return None
96
-
97
- # Verify this looks like a document URL
98
- if not cls.is_document_url(url):
99
- # Still proceed - user might know better than our detection
100
- pass
101
-
102
- # Prepare the file block
103
- file_block = {
104
- "type": "file",
105
- "file": {"type": "external", "external": {"url": url}},
106
- }
107
-
108
- # Add caption if provided
109
- if caption:
110
- file_block["file"]["caption"] = [
111
- {"type": "text", "text": {"content": caption}}
112
- ]
113
-
114
- return file_block
115
-
116
- @classmethod
117
- def notion_to_markdown(cls, block: Dict[str, Any]) -> Optional[str]:
118
- """Convert Notion file block to markdown document embed."""
119
- if block.get("type") != "file":
120
- return None
121
-
122
- file_data = block.get("file", {})
123
-
124
- # Handle both external and file (uploaded) documents
125
- if file_data.get("type") == "external":
126
- url = file_data.get("external", {}).get("url", "")
127
- elif file_data.get("type") == "file":
128
- url = file_data.get("file", {}).get("url", "")
129
- elif file_data.get("type") == "file_upload":
130
- # Handle file uploads with special notion:// syntax
131
- file_upload_id = file_data.get("file_upload", {}).get("id", "")
132
- if file_upload_id:
133
- url = f"notion://file_upload/{file_upload_id}"
134
- else:
135
- return None
136
- else:
137
- return None
138
-
139
- if not url:
140
- return None
141
-
142
- # Extract caption if available
143
- caption = ""
144
- caption_rich_text = file_data.get("caption", [])
145
- if caption_rich_text:
146
- caption = cls._extract_text_content(caption_rich_text)
147
-
148
- return f"%[{caption}]({url})"
149
-
150
- @classmethod
151
- def is_multiline(cls) -> bool:
152
- """Document embeds are single-line elements."""
153
- return False
154
-
155
- @classmethod
156
- def _extract_text_content(cls, rich_text: List[Dict[str, Any]]) -> str:
157
- """Extract plain text content from Notion rich_text elements."""
158
- result = ""
159
- for text_obj in rich_text:
160
- if text_obj.get("type") == "text":
161
- result += text_obj.get("text", {}).get("content", "")
162
- elif "plain_text" in text_obj:
163
- result += text_obj.get("plain_text", "")
164
- return result
165
-
166
- @classmethod
167
- def get_llm_prompt_content(cls) -> ElementPromptContent:
168
- """Returns information for LLM prompts about this element."""
169
- return (
170
- ElementPromptBuilder()
171
- .with_description(
172
- "Embeds document files from external sources like PDFs, Word docs, Excel files, or cloud storage services."
173
- )
174
- .with_usage_guidelines(
175
- "Use document embeds when you want to include reference materials, reports, presentations, or any "
176
- "file-based content directly in your document. Documents can be viewed inline or downloaded by users. "
177
- "Perfect for sharing contracts, reports, manuals, or any important files."
178
- )
179
- .with_syntax("%[Caption](https://example.com/document.pdf)")
180
- .with_examples(
181
- [
182
- "%[Project Proposal](https://drive.google.com/file/d/1a2b3c4d5e/view)",
183
- "%[Q4 Financial Report](https://company.sharepoint.com/reports/q4-2024.xlsx)",
184
- "%[User Manual](https://cdn.company.com/docs/manual-v2.1.pdf)",
185
- "%[Meeting Minutes](https://docs.google.com/document/d/1x2y3z4/edit)",
186
- "%[](https://example.com/contract.pdf)",
187
- ]
188
- )
189
- .with_avoidance_guidelines(
190
- "Only use for actual document files. For web pages or articles, use bookmark or embed elements instead. "
191
- "Ensure document URLs are accessible to your intended audience."
192
- )
193
- .build()
194
- )
@@ -1,26 +0,0 @@
1
- from typing import Dict, Any, List
2
- from notionary.base_notion_client import BaseNotionClient
3
- from notionary.util import singleton
4
-
5
-
6
- # TODO: Tyoe the block api (fix registry as well)
7
- @singleton
8
- class NotionBlockClient(BaseNotionClient):
9
- """
10
- Client for Notion page-specific operations.
11
- Inherits base HTTP functionality from BaseNotionClient.
12
- """
13
-
14
- async def get_page_blocks(self, page_id: str) -> List[Dict[str, Any]]:
15
- """
16
- Retrieves all blocks of a Notion page.
17
- """
18
- response = await self.get(f"blocks/{page_id}/children")
19
- return response.get("results", [])
20
-
21
- async def get_block_children(self, block_id: str) -> List[Dict[str, Any]]:
22
- """
23
- Retrieves all children blocks of a specific block.
24
- """
25
- response = await self.get(f"blocks/{block_id}/children")
26
- return response.get("results", [])
@@ -1,169 +0,0 @@
1
- import re
2
- from typing import Dict, Any, Optional, List, Tuple
3
-
4
- from notionary.blocks import NotionBlockElement
5
- from notionary.blocks import ElementPromptContent, ElementPromptBuilder
6
-
7
-
8
- class QuoteElement(NotionBlockElement):
9
- """Class for converting between Markdown blockquotes and Notion quote blocks."""
10
-
11
- # Regular expression pattern to match Markdown blockquote lines
12
- # Matches lines that start with optional whitespace, followed by '>',
13
- # then optional whitespace, and captures any text after that
14
- quote_pattern = re.compile(r"^\s*>\s?(.*)", re.MULTILINE)
15
-
16
- @classmethod
17
- def find_matches(cls, text: str) -> List[Tuple[int, int, Dict[str, Any]]]:
18
- """
19
- Find all blockquote matches in the text and return their positions and blocks.
20
- """
21
- matches = []
22
- quote_matches = list(QuoteElement.quote_pattern.finditer(text))
23
-
24
- if not quote_matches:
25
- return []
26
-
27
- current_match_index = 0
28
- while current_match_index < len(quote_matches):
29
- start_match = quote_matches[current_match_index]
30
- start_pos = start_match.start()
31
-
32
- next_match_index = current_match_index + 1
33
- while next_match_index < len(
34
- quote_matches
35
- ) and QuoteElement.is_consecutive_quote(
36
- text, quote_matches, next_match_index
37
- ):
38
- next_match_index += 1
39
-
40
- end_pos = quote_matches[next_match_index - 1].end()
41
- quote_text = text[start_pos:end_pos]
42
-
43
- block = QuoteElement.markdown_to_notion(quote_text)
44
- if block:
45
- matches.append((start_pos, end_pos, block))
46
-
47
- current_match_index = next_match_index
48
-
49
- return matches
50
-
51
- @classmethod
52
- def is_consecutive_quote(cls, text: str, quote_matches: List, index: int) -> bool:
53
- """Checks if the current quote is part of the previous quote sequence."""
54
- prev_end = quote_matches[index - 1].end()
55
- curr_start = quote_matches[index].start()
56
- gap_text = text[prev_end:curr_start]
57
-
58
- if gap_text.count("\n") == 1:
59
- return True
60
-
61
- if gap_text.strip() == "" and gap_text.count("\n") <= 2:
62
- return True
63
-
64
- return False
65
-
66
- @classmethod
67
- def markdown_to_notion(cls, text: str) -> Optional[Dict[str, Any]]:
68
- """Convert markdown blockquote to Notion block."""
69
- if not text:
70
- return None
71
-
72
- # Check if it's a blockquote
73
- if not QuoteElement.quote_pattern.search(text):
74
- return None
75
-
76
- # Extract quote content
77
- lines = text.split("\n")
78
- quote_lines = []
79
-
80
- # Extract content from each line
81
- for line in lines:
82
- quote_match = QuoteElement.quote_pattern.match(line)
83
- if quote_match:
84
- content = quote_match.group(1)
85
- quote_lines.append(content)
86
- elif not line.strip() and quote_lines:
87
- # Allow empty lines within the quote
88
- quote_lines.append("")
89
-
90
- if not quote_lines:
91
- return None
92
-
93
- quote_content = "\n".join(quote_lines).strip()
94
-
95
- rich_text = [{"type": "text", "text": {"content": quote_content}}]
96
-
97
- return {"type": "quote", "quote": {"rich_text": rich_text, "color": "default"}}
98
-
99
- @classmethod
100
- def notion_to_markdown(cls, block: Dict[str, Any]) -> Optional[str]:
101
- """Convert Notion quote block to markdown."""
102
- if block.get("type") != "quote":
103
- return None
104
-
105
- rich_text = block.get("quote", {}).get("rich_text", [])
106
-
107
- # Extract the text content
108
- content = QuoteElement._extract_text_content(rich_text)
109
-
110
- # Format as markdown blockquote
111
- lines = content.split("\n")
112
- formatted_lines = []
113
-
114
- # Add each line with blockquote prefix
115
- for line in lines:
116
- formatted_lines.append(f"> {line}")
117
-
118
- return "\n".join(formatted_lines)
119
-
120
- @classmethod
121
- def match_markdown(cls, text: str) -> bool:
122
- """Check if this element can handle the given markdown text."""
123
- return bool(QuoteElement.quote_pattern.search(text))
124
-
125
- @classmethod
126
- def match_notion(cls, block: Dict[str, Any]) -> bool:
127
- """Check if this element can handle the given Notion block."""
128
- return block.get("type") == "quote"
129
-
130
- @classmethod
131
- def is_multiline(cls) -> bool:
132
- """Blockquotes can span multiple lines."""
133
- return True
134
-
135
- @classmethod
136
- def _extract_text_content(cls, rich_text: List[Dict[str, Any]]) -> str:
137
- """Extract plain text content from Notion rich_text elements."""
138
- result = ""
139
- for text_obj in rich_text:
140
- if text_obj.get("type") == "text":
141
- result += text_obj.get("text", {}).get("content", "")
142
- elif "plain_text" in text_obj:
143
- result += text_obj.get("plain_text", "")
144
- return result
145
-
146
- @classmethod
147
- def get_llm_prompt_content(cls) -> ElementPromptContent:
148
- """
149
- Returns structured LLM prompt metadata for the quote element.
150
- """
151
- return (
152
- ElementPromptBuilder()
153
- .with_description(
154
- "Creates blockquotes that visually distinguish quoted text."
155
- )
156
- .with_usage_guidelines(
157
- "Use blockquotes for quoting external sources, highlighting important statements, "
158
- "or creating visual emphasis for key information."
159
- )
160
- .with_syntax("> Quoted text")
161
- .with_examples(
162
- [
163
- "> This is a simple blockquote",
164
- "> This is a multi-line quote\n> that continues on the next line",
165
- "> Important note:\n> This quote spans\n> multiple lines.",
166
- ]
167
- )
168
- .build()
169
- )
@@ -1,84 +0,0 @@
1
- import re
2
- from typing import Any, Dict, List
3
- from notionary.util import LoggingMixin
4
-
5
-
6
- class NotionPageContentChunker(LoggingMixin):
7
- """
8
- Handles markdown text processing to comply with Notion API length limitations.
9
-
10
- This class specifically addresses the Notion API constraint that limits
11
- rich_text elements to a maximum of 2000 characters. This particularly affects
12
- paragraph blocks within toggle blocks or other nested structures.
13
-
14
- Resolves the following typical API error:
15
- "validation_error - body.children[79].toggle.children[2].paragraph.rich_text[0].text.content.length
16
- should be ≤ 2000, instead was 2162."
17
-
18
- The class provides methods for:
19
- 1. Automatically truncating text that exceeds the limit
20
- 2. Splitting markdown into smaller units for separate API requests
21
- """
22
-
23
- def __init__(self, max_text_length: int = 1900):
24
- self.max_text_length = max_text_length
25
-
26
- def fix_blocks_content_length(
27
- self, blocks: List[Dict[str, Any]]
28
- ) -> List[Dict[str, Any]]:
29
- """Check each block and ensure text content doesn't exceed Notion's limit."""
30
- return [self._fix_single_block_content(block) for block in blocks]
31
-
32
- def _fix_single_block_content(self, block: Dict[str, Any]) -> Dict[str, Any]:
33
- """Fix content length in a single block and its children recursively."""
34
- block_copy = block.copy()
35
-
36
- block_type = block.get("type")
37
- if not block_type:
38
- return block_copy
39
-
40
- content = block.get(block_type)
41
- if not content:
42
- return block_copy
43
-
44
- if "rich_text" in content:
45
- self._fix_rich_text_content(block_copy, block_type, content)
46
-
47
- if "children" in content and content["children"]:
48
- block_copy[block_type]["children"] = [
49
- self._fix_single_block_content(child) for child in content["children"]
50
- ]
51
-
52
- return block_copy
53
-
54
- def _fix_rich_text_content(
55
- self, block_copy: Dict[str, Any], block_type: str, content: Dict[str, Any]
56
- ) -> None:
57
- """Fix rich text content that exceeds the length limit."""
58
- rich_text = content["rich_text"]
59
- for i, text_item in enumerate(rich_text):
60
- if "text" not in text_item or "content" not in text_item["text"]:
61
- continue
62
-
63
- text_content = text_item["text"]["content"]
64
- if len(text_content) <= self.max_text_length:
65
- continue
66
-
67
- self.logger.warning(
68
- "Truncating text content from %d to %d chars",
69
- len(text_content),
70
- self.max_text_length,
71
- )
72
- block_copy[block_type]["rich_text"][i]["text"]["content"] = text_content[
73
- : self.max_text_length
74
- ]
75
-
76
- def split_to_paragraphs(self, markdown_text: str) -> List[str]:
77
- """Split markdown into paragraphs."""
78
- paragraphs = re.split(r"\n\s*\n", markdown_text)
79
- return [p for p in paragraphs if p.strip()]
80
-
81
- def split_to_sentences(self, paragraph: str) -> List[str]:
82
- """Split a paragraph into sentences."""
83
- sentences = re.split(r"(?<=[.!?])\s+", paragraph)
84
- return [s for s in sentences if s.strip()]